Blame view
net/ipv4/tcp_timer.c
22.2 KB
457c89965
|
1 |
// SPDX-License-Identifier: GPL-2.0-only |
1da177e4c
|
2 3 4 5 6 7 8 |
/* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Implementation of the Transmission Control Protocol(TCP). * |
02c30a84e
|
9 |
* Authors: Ross Biro |
1da177e4c
|
10 11 12 13 14 15 16 17 18 19 20 21 22 |
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * Corey Minyard <wf-rch!minyard@relay.EU.net> * Florian La Roche, <flla@stud.uni-sb.de> * Charles Hedrick, <hedrick@klinzhai.rutgers.edu> * Linus Torvalds, <torvalds@cs.helsinki.fi> * Alan Cox, <gw4pts@gw4pts.ampr.org> * Matthew Dillon, <dillon@apollo.west.oic.com> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * Jorge Cwik, <jorge@laser.satlink.net> */ #include <linux/module.h> |
5a0e3ad6a
|
23 |
#include <linux/gfp.h> |
1da177e4c
|
24 |
#include <net/tcp.h> |
b701a99e4
|
25 26 27 28 |
static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); u32 elapsed, start_ts; |
9efdda4e3
|
29 |
s32 remaining; |
b701a99e4
|
30 |
|
7ae189759
|
31 32 |
start_ts = tcp_sk(sk)->retrans_stamp; if (!icsk->icsk_user_timeout) |
b701a99e4
|
33 34 |
return icsk->icsk_rto; elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts; |
9efdda4e3
|
35 36 |
remaining = icsk->icsk_user_timeout - elapsed; if (remaining <= 0) |
b701a99e4
|
37 |
return 1; /* user timeout has passed; fire ASAP */ |
9efdda4e3
|
38 39 |
return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining)); |
b701a99e4
|
40 |
} |
c380d37e9
|
41 42 43 44 45 46 |
/** * tcp_write_err() - close socket and save error info * @sk: The socket the error has appeared on. * * Returns: Nothing (void) */ |
1da177e4c
|
47 48 49 50 |
static void tcp_write_err(struct sock *sk) { sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; sk->sk_error_report(sk); |
e05836ac0
|
51 |
tcp_write_queue_purge(sk); |
1da177e4c
|
52 |
tcp_done(sk); |
02a1d6e7a
|
53 |
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT); |
1da177e4c
|
54 |
} |
c380d37e9
|
55 56 57 58 |
/** * tcp_out_of_resources() - Close socket if out of resources * @sk: pointer to current socket * @do_reset: send a last packet with reset flag |
1da177e4c
|
59 |
* |
c380d37e9
|
60 61 62 63 64 |
* Do not allow orphaned sockets to eat all our resources. * This is direct violation of TCP specs, but it is required * to prevent DoS attacks. It is called when a retransmission timeout * or zero probe timeout occurs on orphaned socket. * |
4ee806d51
|
65 66 67 68 69 70 71 |
* Also close if our net namespace is exiting; in that case there is no * hope of ever communicating again since all netns interfaces are already * down (or about to be down), and we need to release our dst references, * which have been moved to the netns loopback interface, so the namespace * can finish exiting. This condition is only possible if we are a kernel * socket, as those do not hold references to the namespace. * |
c380d37e9
|
72 73 74 75 76 |
* Criteria is still not confirmed experimentally and may change. * We kill the socket, if: * 1. If number of orphaned sockets exceeds an administratively configured * limit. * 2. If we have strong memory pressure. |
4ee806d51
|
77 |
* 3. If our net namespace is exiting. |
1da177e4c
|
78 |
*/ |
b248230c3
|
79 |
static int tcp_out_of_resources(struct sock *sk, bool do_reset) |
1da177e4c
|
80 81 |
{ struct tcp_sock *tp = tcp_sk(sk); |
ad1af0fed
|
82 |
int shift = 0; |
1da177e4c
|
83 |
|
e905a9eda
|
84 |
/* If peer does not open window for long time, or did not transmit |
1da177e4c
|
85 |
* anything for long time, penalize it. */ |
d635fbe27
|
86 |
if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) |
ad1af0fed
|
87 |
shift++; |
1da177e4c
|
88 89 90 |
/* If some dubious ICMP arrived, penalize even more. */ if (sk->sk_err_soft) |
ad1af0fed
|
91 |
shift++; |
1da177e4c
|
92 |
|
efcdbf24f
|
93 |
if (tcp_check_oom(sk, shift)) { |
1da177e4c
|
94 95 |
/* Catch exceptional cases, when connection requires reset. * 1. Last segment was sent recently. */ |
d635fbe27
|
96 |
if ((s32)(tcp_jiffies32 - tp->lsndtime) <= TCP_TIMEWAIT_LEN || |
1da177e4c
|
97 98 |
/* 2. Window is closed. */ (!tp->snd_wnd && !tp->packets_out)) |
b248230c3
|
99 |
do_reset = true; |
1da177e4c
|
100 101 102 |
if (do_reset) tcp_send_active_reset(sk, GFP_ATOMIC); tcp_done(sk); |
02a1d6e7a
|
103 |
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); |
1da177e4c
|
104 105 |
return 1; } |
4ee806d51
|
106 107 108 109 110 111 |
if (!check_net(sock_net(sk))) { /* Not possible to send reset; just close */ tcp_done(sk); return 1; } |
1da177e4c
|
112 113 |
return 0; } |
c380d37e9
|
114 115 116 117 118 |
/** * tcp_orphan_retries() - Returns maximal number of retries on an orphaned socket * @sk: Pointer to the current socket. * @alive: bool, socket alive state */ |
7533ce305
|
119 |
static int tcp_orphan_retries(struct sock *sk, bool alive) |
1da177e4c
|
120 |
{ |
c402d9bef
|
121 |
int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */ |
1da177e4c
|
122 123 124 125 126 127 128 129 130 131 132 133 |
/* We know from an ICMP that something is wrong. */ if (sk->sk_err_soft && !alive) retries = 0; /* However, if socket sent something recently, select some safe * number of retries. 8 corresponds to >100 seconds with minimal * RTO of 200msec. */ if (retries == 0 && alive) retries = 8; return retries; } |
ce55dd361
|
134 135 |
static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) { |
d0f368470
|
136 137 |
const struct net *net = sock_net(sk); int mss; |
b0f9ca53c
|
138 |
|
ce55dd361
|
139 |
/* Black hole detection */ |
d0f368470
|
140 141 142 143 144 145 146 147 148 |
if (!net->ipv4.sysctl_tcp_mtu_probing) return; if (!icsk->icsk_mtup.enabled) { icsk->icsk_mtup.enabled = 1; icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; } else { mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; mss = min(net->ipv4.sysctl_tcp_base_mss, mss); |
c04b79b6c
|
149 |
mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor); |
967c05aee
|
150 |
mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss); |
d0f368470
|
151 |
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); |
ce55dd361
|
152 |
} |
d0f368470
|
153 |
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); |
ce55dd361
|
154 |
} |
01a523b07
|
155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
static unsigned int tcp_model_timeout(struct sock *sk, unsigned int boundary, unsigned int rto_base) { unsigned int linear_backoff_thresh, timeout; linear_backoff_thresh = ilog2(TCP_RTO_MAX / rto_base); if (boundary <= linear_backoff_thresh) timeout = ((2 << boundary) - 1) * rto_base; else timeout = ((2 << linear_backoff_thresh) - 1) * rto_base + (boundary - linear_backoff_thresh) * TCP_RTO_MAX; return jiffies_to_msecs(timeout); } |
c380d37e9
|
169 170 171 172 173 174 175 |
/** * retransmits_timed_out() - returns true if this connection has timed out * @sk: The current socket * @boundary: max number of retransmissions * @timeout: A custom timeout value. * If set to 0 the default timeout is calculated and used. * Using TCP_RTO_MIN and the number of unsuccessful retransmits. |
c380d37e9
|
176 177 178 179 |
* * The default "timeout" value this function can calculate and use * is equivalent to the timeout of a TCP Connection * after "boundary" unsuccessful, exponentially backed-off |
ce682ef6e
|
180 |
* retransmissions with an initial RTO of TCP_RTO_MIN. |
2f7de5710
|
181 182 |
*/ static bool retransmits_timed_out(struct sock *sk, |
dca43c75e
|
183 |
unsigned int boundary, |
ce682ef6e
|
184 |
unsigned int timeout) |
2f7de5710
|
185 |
{ |
01a523b07
|
186 |
unsigned int start_ts; |
2f7de5710
|
187 188 189 |
if (!inet_csk(sk)->icsk_retransmits) return false; |
7ae189759
|
190 |
start_ts = tcp_sk(sk)->retrans_stamp; |
3256a2d6a
|
191 192 193 194 195 196 197 |
if (likely(timeout == 0)) { unsigned int rto_base = TCP_RTO_MIN; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) rto_base = tcp_timeout_init(sk); timeout = tcp_model_timeout(sk, boundary, rto_base); } |
01a523b07
|
198 |
|
9efdda4e3
|
199 |
return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0; |
2f7de5710
|
200 |
} |
1da177e4c
|
201 202 203 |
/* A write timeout has occurred. Process the after effects. */ static int tcp_write_timeout(struct sock *sk) { |
5d424d5a6
|
204 |
struct inet_connection_sock *icsk = inet_csk(sk); |
c968601d1
|
205 |
struct tcp_sock *tp = tcp_sk(sk); |
6fa251663
|
206 |
struct net *net = sock_net(sk); |
a41e8a88b
|
207 |
bool expired = false, do_reset; |
1da177e4c
|
208 209 210 |
int retry_until; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { |
c968601d1
|
211 |
if (icsk->icsk_retransmits) { |
b6c6712a4
|
212 |
dst_negative_advice(sk); |
c5715b8fa
|
213 |
} else { |
3acf3ec3f
|
214 |
sk_rethink_txhash(sk); |
c968601d1
|
215 |
} |
6fa251663
|
216 |
retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; |
ce682ef6e
|
217 |
expired = icsk->icsk_retransmits >= retry_until; |
1da177e4c
|
218 |
} else { |
ce682ef6e
|
219 |
if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) { |
5d424d5a6
|
220 |
/* Black hole detection */ |
ce55dd361
|
221 |
tcp_mtu_probing(icsk, sk); |
1da177e4c
|
222 |
|
b6c6712a4
|
223 |
dst_negative_advice(sk); |
3acf3ec3f
|
224 225 |
} else { sk_rethink_txhash(sk); |
1da177e4c
|
226 |
} |
c6214a97c
|
227 |
retry_until = net->ipv4.sysctl_tcp_retries2; |
1da177e4c
|
228 |
if (sock_flag(sk, SOCK_DEAD)) { |
7533ce305
|
229 |
const bool alive = icsk->icsk_rto < TCP_RTO_MAX; |
e905a9eda
|
230 |
|
1da177e4c
|
231 |
retry_until = tcp_orphan_retries(sk, alive); |
6fa12c850
|
232 |
do_reset = alive || |
ce682ef6e
|
233 |
!retransmits_timed_out(sk, retry_until, 0); |
1da177e4c
|
234 |
|
6fa12c850
|
235 |
if (tcp_out_of_resources(sk, do_reset)) |
1da177e4c
|
236 237 |
return 1; } |
a41e8a88b
|
238 239 |
} if (!expired) |
ce682ef6e
|
240 241 |
expired = retransmits_timed_out(sk, retry_until, icsk->icsk_user_timeout); |
7268586ba
|
242 |
tcp_fastopen_active_detect_blackhole(sk, expired); |
f89013f66
|
243 244 245 246 247 |
if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG)) tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RTO_CB, icsk->icsk_retransmits, icsk->icsk_rto, (int)expired); |
ce682ef6e
|
248 |
if (expired) { |
1da177e4c
|
249 250 251 252 |
/* Has it gone just too far? */ tcp_write_err(sk); return 1; } |
f89013f66
|
253 |
|
1da177e4c
|
254 255 |
return 0; } |
c10d9310e
|
256 |
/* Called with BH disabled */ |
6f458dfb4
|
257 |
void tcp_delack_timer_handler(struct sock *sk) |
1da177e4c
|
258 |
{ |
463c84b97
|
259 |
struct inet_connection_sock *icsk = inet_csk(sk); |
1da177e4c
|
260 |
|
9993e7d31
|
261 |
sk_mem_reclaim_partial(sk); |
1da177e4c
|
262 |
|
02b2faaf0
|
263 264 |
if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) |
1da177e4c
|
265 |
goto out; |
463c84b97
|
266 267 |
if (time_after(icsk->icsk_ack.timeout, jiffies)) { sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); |
1da177e4c
|
268 269 |
goto out; } |
463c84b97
|
270 |
icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; |
1da177e4c
|
271 |
|
463c84b97
|
272 |
if (inet_csk_ack_scheduled(sk)) { |
31954cd8b
|
273 |
if (!inet_csk_in_pingpong_mode(sk)) { |
1da177e4c
|
274 |
/* Delayed ACK missed: inflate ATO. */ |
463c84b97
|
275 |
icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); |
1da177e4c
|
276 277 278 279 |
} else { /* Delayed ACK missed: leave pingpong mode and * deflate ATO. */ |
31954cd8b
|
280 |
inet_csk_exit_pingpong_mode(sk); |
463c84b97
|
281 |
icsk->icsk_ack.ato = TCP_ATO_MIN; |
1da177e4c
|
282 |
} |
4688eb7cf
|
283 |
tcp_mstamp_refresh(tcp_sk(sk)); |
1da177e4c
|
284 |
tcp_send_ack(sk); |
02a1d6e7a
|
285 |
__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS); |
1da177e4c
|
286 |
} |
1da177e4c
|
287 288 |
out: |
b8da51ebb
|
289 |
if (tcp_under_memory_pressure(sk)) |
3ab224be6
|
290 |
sk_mem_reclaim(sk); |
6f458dfb4
|
291 |
} |
c380d37e9
|
292 293 294 295 296 297 298 299 300 301 |
/** * tcp_delack_timer() - The TCP delayed ACK timeout handler * @data: Pointer to the current socket. (gets casted to struct sock *) * * This function gets (indirectly) called when the kernel timer for a TCP packet * of this socket expires. Calls tcp_delack_timer_handler() to do the actual work. * * Returns: Nothing (void) */ |
59f379f90
|
302 |
static void tcp_delack_timer(struct timer_list *t) |
6f458dfb4
|
303 |
{ |
59f379f90
|
304 305 306 |
struct inet_connection_sock *icsk = from_timer(icsk, t, icsk_delack_timer); struct sock *sk = &icsk->icsk_inet.sk; |
6f458dfb4
|
307 308 309 310 311 |
bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { tcp_delack_timer_handler(sk); } else { |
59f379f90
|
312 |
icsk->icsk_ack.blocked = 1; |
02a1d6e7a
|
313 |
__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); |
6f458dfb4
|
314 |
/* deleguate our work to tcp_release_cb() */ |
7aa5470c2
|
315 |
if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags)) |
144d56e91
|
316 |
sock_hold(sk); |
6f458dfb4
|
317 |
} |
1da177e4c
|
318 319 320 321 322 323 |
bh_unlock_sock(sk); sock_put(sk); } static void tcp_probe_timer(struct sock *sk) { |
6687e988d
|
324 |
struct inet_connection_sock *icsk = inet_csk(sk); |
75c119afe
|
325 |
struct sk_buff *skb = tcp_send_head(sk); |
1da177e4c
|
326 327 |
struct tcp_sock *tp = tcp_sk(sk); int max_probes; |
75c119afe
|
328 |
if (tp->packets_out || !skb) { |
6687e988d
|
329 |
icsk->icsk_probes_out = 0; |
1da177e4c
|
330 331 |
return; } |
b248230c3
|
332 333 334 335 336 337 338 |
/* RFC 1122 4.2.2.17 requires the sender to stay open indefinitely as * long as the receiver continues to respond probes. We support this by * default and reset icsk_probes_out with incoming ACKs. But if the * socket is orphaned or the user specifies TCP_USER_TIMEOUT, we * kill the socket when the retry count and the time exceeds the * corresponding system limit. We also implement similar policy when * we use RTO to probe window in tcp_retransmit_timer(). |
1da177e4c
|
339 |
*/ |
9721e709f
|
340 341 342 343 344 345 346 |
if (icsk->icsk_user_timeout) { u32 elapsed = tcp_model_timeout(sk, icsk->icsk_probes_out, tcp_probe0_base(sk)); if (elapsed >= icsk->icsk_user_timeout) goto abort; } |
1da177e4c
|
347 |
|
c6214a97c
|
348 |
max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2; |
1da177e4c
|
349 |
if (sock_flag(sk, SOCK_DEAD)) { |
7533ce305
|
350 |
const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; |
e905a9eda
|
351 |
|
1da177e4c
|
352 |
max_probes = tcp_orphan_retries(sk, alive); |
b248230c3
|
353 354 355 |
if (!alive && icsk->icsk_backoff >= max_probes) goto abort; if (tcp_out_of_resources(sk, true)) |
1da177e4c
|
356 357 |
return; } |
3976535af
|
358 |
if (icsk->icsk_probes_out >= max_probes) { |
b248230c3
|
359 |
abort: tcp_write_err(sk); |
1da177e4c
|
360 361 362 363 364 365 366 |
} else { /* Only send another probe if we didn't close things up. */ tcp_send_probe0(sk); } } /* |
8336886f7
|
367 368 369 |
* Timer for Fast Open socket to retransmit SYNACK. Note that the * sk here is the child socket, not the parent (listener) socket. */ |
d983ea6f1
|
370 |
static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) |
8336886f7
|
371 372 373 |
{ struct inet_connection_sock *icsk = inet_csk(sk); int max_retries = icsk->icsk_syn_retries ? : |
7c083ecb3
|
374 |
sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ |
c7d13c8fa
|
375 |
struct tcp_sock *tp = tcp_sk(sk); |
8336886f7
|
376 |
|
42cb80a23
|
377 |
req->rsk_ops->syn_ack_timeout(req); |
8336886f7
|
378 |
|
e6c022a4f
|
379 |
if (req->num_timeout >= max_retries) { |
8336886f7
|
380 381 382 |
tcp_write_err(sk); return; } |
8c3cfe19f
|
383 384 385 |
/* Lower cwnd after certain SYNACK timeout like tcp_init_transfer() */ if (icsk->icsk_retransmits == 1) tcp_enter_loss(sk); |
8336886f7
|
386 387 388 389 390 |
/* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error * returned from rtx_syn_ack() to make it more persistent like * regular retransmit because if the child socket has been accepted * it's not good to give up too easily. */ |
e6c022a4f
|
391 392 |
inet_rtx_syn_ack(sk, req); req->num_timeout++; |
7e32b4436
|
393 |
icsk->icsk_retransmits++; |
c7d13c8fa
|
394 395 |
if (!tp->retrans_stamp) tp->retrans_stamp = tcp_time_stamp(tp); |
8336886f7
|
396 |
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
e6c022a4f
|
397 |
TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); |
8336886f7
|
398 |
} |
1da177e4c
|
399 |
|
c380d37e9
|
400 401 402 403 404 405 406 407 408 409 410 |
/** * tcp_retransmit_timer() - The TCP retransmit timeout handler * @sk: Pointer to the current socket. * * This function gets called when the kernel timer for a TCP packet * of this socket expires. * * It handles retransmission, timer adjustment and other necesarry measures. * * Returns: Nothing (void) */ |
f1ecd5d9e
|
411 |
void tcp_retransmit_timer(struct sock *sk) |
1da177e4c
|
412 413 |
{ struct tcp_sock *tp = tcp_sk(sk); |
ae5c3f406
|
414 |
struct net *net = sock_net(sk); |
463c84b97
|
415 |
struct inet_connection_sock *icsk = inet_csk(sk); |
d983ea6f1
|
416 |
struct request_sock *req; |
1da177e4c
|
417 |
|
d983ea6f1
|
418 419 420 |
req = rcu_dereference_protected(tp->fastopen_rsk, lockdep_sock_is_held(sk)); if (req) { |
37561f68b
|
421 422 |
WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); |
d983ea6f1
|
423 |
tcp_fastopen_synack_timer(sk, req); |
8336886f7
|
424 425 426 427 428 |
/* Before we receive ACK to our SYN-ACK don't retransmit * anything else (e.g., data or FIN segments). */ return; } |
88f8598d0
|
429 430 |
if (!tp->packets_out || WARN_ON_ONCE(tcp_rtx_queue_empty(sk))) return; |
1da177e4c
|
431 |
|
9b717a8d2
|
432 |
tp->tlp_high_seq = 0; |
1da177e4c
|
433 434 435 436 437 438 439 |
if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { /* Receiver dastardly shrinks window. Our retransmits * become zero probes, but we should not timeout this * connection. If the socket is an orphan, time it out, * we cannot allow such beasts to hang infinitely. */ |
569508c96
|
440 441 |
struct inet_sock *inet = inet_sk(sk); if (sk->sk_family == AF_INET) { |
ba7a46f16
|
442 443 444 445 446 447 |
net_dbg_ratelimited("Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired) ", &inet->inet_daddr, ntohs(inet->inet_dport), inet->inet_num, tp->snd_una, tp->snd_nxt); |
1da177e4c
|
448 |
} |
dfd56b8b3
|
449 |
#if IS_ENABLED(CONFIG_IPV6) |
569508c96
|
450 |
else if (sk->sk_family == AF_INET6) { |
ba7a46f16
|
451 452 453 454 455 456 |
net_dbg_ratelimited("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired) ", &sk->sk_v6_daddr, ntohs(inet->inet_dport), inet->inet_num, tp->snd_una, tp->snd_nxt); |
569508c96
|
457 458 |
} #endif |
70eabf0e1
|
459 |
if (tcp_jiffies32 - tp->rcv_tstamp > TCP_RTO_MAX) { |
1da177e4c
|
460 461 462 |
tcp_write_err(sk); goto out; } |
5ae344c94
|
463 |
tcp_enter_loss(sk); |
75c119afe
|
464 |
tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1); |
1da177e4c
|
465 466 467 |
__sk_dst_reset(sk); goto out_reset_timer; } |
e1561fe2d
|
468 |
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTS); |
1da177e4c
|
469 470 |
if (tcp_write_timeout(sk)) goto out; |
463c84b97
|
471 |
if (icsk->icsk_retransmits == 0) { |
e1561fe2d
|
472 |
int mib_idx = 0; |
40b215e59
|
473 |
|
c60ce4e26
|
474 |
if (icsk->icsk_ca_state == TCP_CA_Recovery) { |
bc079e9ed
|
475 476 477 478 |
if (tcp_is_sack(tp)) mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL; else mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL; |
6687e988d
|
479 |
} else if (icsk->icsk_ca_state == TCP_CA_Loss) { |
40b215e59
|
480 |
mib_idx = LINUX_MIB_TCPLOSSFAILURES; |
c60ce4e26
|
481 482 483 484 485 486 |
} else if ((icsk->icsk_ca_state == TCP_CA_Disorder) || tp->sacked_out) { if (tcp_is_sack(tp)) mib_idx = LINUX_MIB_TCPSACKFAILURES; else mib_idx = LINUX_MIB_TCPRENOFAILURES; |
1da177e4c
|
487 |
} |
e1561fe2d
|
488 489 |
if (mib_idx) __NET_INC_STATS(sock_net(sk), mib_idx); |
1da177e4c
|
490 |
} |
5ae344c94
|
491 |
tcp_enter_loss(sk); |
1da177e4c
|
492 |
|
590d2026d
|
493 |
icsk->icsk_retransmits++; |
75c119afe
|
494 |
if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) { |
1da177e4c
|
495 |
/* Retransmission failed because of local congestion, |
590d2026d
|
496 |
* Let senders fight for local resources conservatively. |
1da177e4c
|
497 |
*/ |
463c84b97
|
498 |
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, |
590d2026d
|
499 |
TCP_RESOURCE_PROBE_INTERVAL, |
3f421baa4
|
500 |
TCP_RTO_MAX); |
1da177e4c
|
501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 |
goto out; } /* Increase the timeout each time we retransmit. Note that * we do not increase the rtt estimate. rto is initialized * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests * that doubling rto each time is the least we can get away with. * In KA9Q, Karn uses this for the first few times, and then * goes to quadratic. netBSD doubles, but only goes up to *64, * and clamps at 1 to 64 sec afterwards. Note that 120 sec is * defined in the protocol as the maximum possible RTT. I guess * we'll have to use something other than TCP to talk to the * University of Mars. * * PAWS allows us longer timeouts and large windows, so once * implemented ftp to mars will work nicely. We will have to fix * the 120 second clamps though! */ |
463c84b97
|
519 |
icsk->icsk_backoff++; |
1da177e4c
|
520 521 |
out_reset_timer: |
36e31b0af
|
522 523 524 525 526 527 528 529 530 531 |
/* If stream is thin, use linear timeouts. Since 'icsk_backoff' is * used to reset timer, set to 0. Recalculate 'icsk_rto' as this * might be increased if the stream oscillates between thin and thick, * thus the old value might already be too high compared to the value * set by 'tcp_set_rto' in tcp_input.c which resets the rto without * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating * exponential backoff behaviour to avoid continue hammering * linear-timeout retransmissions into a black hole */ if (sk->sk_state == TCP_ESTABLISHED && |
2c04ac8ae
|
532 |
(tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) && |
36e31b0af
|
533 534 535 536 537 538 539 540 |
tcp_stream_is_thin(tp) && icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { icsk->icsk_backoff = 0; icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX); } else { /* Use normal (exponential) backoff */ icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); } |
b701a99e4
|
541 542 |
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX); |
ce682ef6e
|
543 |
if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0)) |
1da177e4c
|
544 545 546 547 |
__sk_dst_reset(sk); out:; } |
c380d37e9
|
548 549 |
/* Called with bottom-half processing disabled. Called by tcp_write_timer() */ |
6f458dfb4
|
550 |
void tcp_write_timer_handler(struct sock *sk) |
1da177e4c
|
551 |
{ |
463c84b97
|
552 |
struct inet_connection_sock *icsk = inet_csk(sk); |
1da177e4c
|
553 |
int event; |
02b2faaf0
|
554 555 |
if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || !icsk->icsk_pending) |
1da177e4c
|
556 |
goto out; |
463c84b97
|
557 558 |
if (time_after(icsk->icsk_timeout, jiffies)) { sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); |
1da177e4c
|
559 560 |
goto out; } |
9a568de48
|
561 |
tcp_mstamp_refresh(tcp_sk(sk)); |
463c84b97
|
562 |
event = icsk->icsk_pending; |
1da177e4c
|
563 564 |
switch (event) { |
57dde7f70
|
565 566 567 |
case ICSK_TIME_REO_TIMEOUT: tcp_rack_reo_timeout(sk); break; |
6ba8a3b19
|
568 569 570 |
case ICSK_TIME_LOSS_PROBE: tcp_send_loss_probe(sk); break; |
463c84b97
|
571 |
case ICSK_TIME_RETRANS: |
6ba8a3b19
|
572 |
icsk->icsk_pending = 0; |
1da177e4c
|
573 574 |
tcp_retransmit_timer(sk); break; |
463c84b97
|
575 |
case ICSK_TIME_PROBE0: |
6ba8a3b19
|
576 |
icsk->icsk_pending = 0; |
1da177e4c
|
577 578 579 |
tcp_probe_timer(sk); break; } |
1da177e4c
|
580 581 |
out: |
3ab224be6
|
582 |
sk_mem_reclaim(sk); |
6f458dfb4
|
583 |
} |
59f379f90
|
584 |
static void tcp_write_timer(struct timer_list *t) |
6f458dfb4
|
585 |
{ |
59f379f90
|
586 587 588 |
struct inet_connection_sock *icsk = from_timer(icsk, t, icsk_retransmit_timer); struct sock *sk = &icsk->icsk_inet.sk; |
6f458dfb4
|
589 590 591 592 593 |
bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { tcp_write_timer_handler(sk); } else { |
c380d37e9
|
594 |
/* delegate our work to tcp_release_cb() */ |
7aa5470c2
|
595 |
if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags)) |
144d56e91
|
596 |
sock_hold(sk); |
6f458dfb4
|
597 |
} |
1da177e4c
|
598 599 600 |
bh_unlock_sock(sk); sock_put(sk); } |
42cb80a23
|
601 |
void tcp_syn_ack_timeout(const struct request_sock *req) |
72659ecce
|
602 |
{ |
42cb80a23
|
603 |
struct net *net = read_pnet(&inet_rsk(req)->ireq_net); |
02a1d6e7a
|
604 |
__NET_INC_STATS(net, LINUX_MIB_TCPTIMEOUTS); |
72659ecce
|
605 606 |
} EXPORT_SYMBOL(tcp_syn_ack_timeout); |
1da177e4c
|
607 608 609 610 611 612 |
void tcp_set_keepalive(struct sock *sk, int val) { if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) return; if (val && !sock_flag(sk, SOCK_KEEPOPEN)) |
463c84b97
|
613 |
inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk))); |
1da177e4c
|
614 |
else if (!val) |
463c84b97
|
615 |
inet_csk_delete_keepalive_timer(sk); |
1da177e4c
|
616 |
} |
4b9d07a44
|
617 |
EXPORT_SYMBOL_GPL(tcp_set_keepalive); |
1da177e4c
|
618 |
|
59f379f90
|
619 |
static void tcp_keepalive_timer (struct timer_list *t) |
1da177e4c
|
620 |
{ |
59f379f90
|
621 |
struct sock *sk = from_timer(sk, t, sk_timer); |
6687e988d
|
622 |
struct inet_connection_sock *icsk = inet_csk(sk); |
1da177e4c
|
623 |
struct tcp_sock *tp = tcp_sk(sk); |
6c37e5de4
|
624 |
u32 elapsed; |
1da177e4c
|
625 626 627 628 |
/* Only process if socket is not in use. */ bh_lock_sock(sk); if (sock_owned_by_user(sk)) { |
e905a9eda
|
629 |
/* Try again later. */ |
463c84b97
|
630 |
inet_csk_reset_keepalive_timer (sk, HZ/20); |
1da177e4c
|
631 632 633 634 |
goto out; } if (sk->sk_state == TCP_LISTEN) { |
fa76ce732
|
635 636 |
pr_err("Hmm... keepalive on a LISTEN ??? "); |
1da177e4c
|
637 638 |
goto out; } |
4688eb7cf
|
639 |
tcp_mstamp_refresh(tp); |
1da177e4c
|
640 641 |
if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) { if (tp->linger2 >= 0) { |
463c84b97
|
642 |
const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN; |
1da177e4c
|
643 644 645 646 647 648 649 650 651 |
if (tmo > 0) { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); goto out; } } tcp_send_active_reset(sk, GFP_ATOMIC); goto death; } |
2dda64004
|
652 653 |
if (!sock_flag(sk, SOCK_KEEPOPEN) || ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT))) |
1da177e4c
|
654 655 656 657 658 |
goto out; elapsed = keepalive_time_when(tp); /* It is alive without keepalive 8) */ |
75c119afe
|
659 |
if (tp->packets_out || !tcp_write_queue_empty(sk)) |
1da177e4c
|
660 |
goto resched; |
6c37e5de4
|
661 |
elapsed = keepalive_time_elapsed(tp); |
1da177e4c
|
662 663 |
if (elapsed >= keepalive_time_when(tp)) { |
dca43c75e
|
664 665 666 667 |
/* If the TCP_USER_TIMEOUT option is enabled, use that * to determine when to timeout instead. */ if ((icsk->icsk_user_timeout != 0 && |
9bcc66e19
|
668 |
elapsed >= msecs_to_jiffies(icsk->icsk_user_timeout) && |
dca43c75e
|
669 670 671 |
icsk->icsk_probes_out > 0) || (icsk->icsk_user_timeout == 0 && icsk->icsk_probes_out >= keepalive_probes(tp))) { |
1da177e4c
|
672 673 674 675 |
tcp_send_active_reset(sk, GFP_ATOMIC); tcp_write_err(sk); goto out; } |
e520af48c
|
676 |
if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) { |
6687e988d
|
677 |
icsk->icsk_probes_out++; |
1da177e4c
|
678 679 680 681 682 683 684 685 686 687 688 |
elapsed = keepalive_intvl_when(tp); } else { /* If keepalive was lost due to local congestion, * try harder. */ elapsed = TCP_RESOURCE_PROBE_INTERVAL; } } else { /* It is tp->rcv_tstamp + keepalive_time_when(tp) */ elapsed = keepalive_time_when(tp) - elapsed; } |
3ab224be6
|
689 |
sk_mem_reclaim(sk); |
1da177e4c
|
690 691 |
resched: |
463c84b97
|
692 |
inet_csk_reset_keepalive_timer (sk, elapsed); |
1da177e4c
|
693 |
goto out; |
e905a9eda
|
694 |
death: |
1da177e4c
|
695 696 697 698 699 700 |
tcp_done(sk); out: bh_unlock_sock(sk); sock_put(sk); } |
6f458dfb4
|
701 |
|
5d9f4262b
|
702 703 704 705 706 707 708 |
static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer) { struct tcp_sock *tp = container_of(timer, struct tcp_sock, compressed_ack_timer); struct sock *sk = (struct sock *)tp; bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { |
86de5921a
|
709 |
if (tp->compressed_ack > TCP_FASTRETRANS_THRESH) |
5d9f4262b
|
710 711 712 713 714 715 716 717 718 719 720 721 |
tcp_send_ack(sk); } else { if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags)) sock_hold(sk); } bh_unlock_sock(sk); sock_put(sk); return HRTIMER_NORESTART; } |
6f458dfb4
|
722 723 724 725 |
void tcp_init_xmit_timers(struct sock *sk) { inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, &tcp_keepalive_timer); |
fb420d5d9
|
726 |
hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC, |
73a6bab5a
|
727 |
HRTIMER_MODE_ABS_PINNED_SOFT); |
218af599f
|
728 |
tcp_sk(sk)->pacing_timer.function = tcp_pace_kick; |
5d9f4262b
|
729 730 731 732 |
hrtimer_init(&tcp_sk(sk)->compressed_ack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED_SOFT); tcp_sk(sk)->compressed_ack_timer.function = tcp_compressed_ack_kick; |
6f458dfb4
|
733 |
} |