Commit dca43c75e7e545694a9dd6288553f55c53e2a3a3
Committed by
David S. Miller
1 parent
409456b10f
Exists in
master
and in
4 other branches
tcp: Add TCP_USER_TIMEOUT socket option.
This patch provides a "user timeout" support as described in RFC793. The socket option is also needed for the the local half of RFC5482 "TCP User Timeout Option". TCP_USER_TIMEOUT is a TCP level socket option that takes an unsigned int, when > 0, to specify the maximum amount of time in ms that transmitted data may remain unacknowledged before TCP will forcefully close the corresponding connection and return ETIMEDOUT to the application. If 0 is given, TCP will continue to use the system default. Increasing the user timeouts allows a TCP connection to survive extended periods without end-to-end connectivity. Decreasing the user timeouts allows applications to "fail fast" if so desired. Otherwise it may take upto 20 minutes with the current system defaults in a normal WAN environment. The socket option can be made during any state of a TCP connection, but is only effective during the synchronized states of a connection (ESTABLISHED, FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, or LAST-ACK). Moreover, when used with the TCP keepalive (SO_KEEPALIVE) option, TCP_USER_TIMEOUT will overtake keepalive to determine when to close a connection due to keepalive failure. The option does not change in anyway when TCP retransmits a packet, nor when a keepalive probe will be sent. This option, like many others, will be inherited by an acceptor from its listener. Signed-off-by: H.K. Jerry Chu <hkchu@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 4 changed files with 37 additions and 16 deletions Side-by-side Diff
include/linux/tcp.h
... | ... | @@ -105,6 +105,7 @@ |
105 | 105 | #define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */ |
106 | 106 | #define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ |
107 | 107 | #define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ |
108 | +#define TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */ | |
108 | 109 | |
109 | 110 | /* for TCP_INFO socket option */ |
110 | 111 | #define TCPI_OPT_TIMESTAMPS 1 |
include/net/inet_connection_sock.h
net/ipv4/tcp.c
... | ... | @@ -2391,7 +2391,12 @@ |
2391 | 2391 | err = tp->af_specific->md5_parse(sk, optval, optlen); |
2392 | 2392 | break; |
2393 | 2393 | #endif |
2394 | - | |
2394 | + case TCP_USER_TIMEOUT: | |
2395 | + /* Cap the max timeout in ms TCP will retry/retrans | |
2396 | + * before giving up and aborting (ETIMEDOUT) a connection. | |
2397 | + */ | |
2398 | + icsk->icsk_user_timeout = msecs_to_jiffies(val); | |
2399 | + break; | |
2395 | 2400 | default: |
2396 | 2401 | err = -ENOPROTOOPT; |
2397 | 2402 | break; |
... | ... | @@ -2609,6 +2614,10 @@ |
2609 | 2614 | break; |
2610 | 2615 | case TCP_THIN_DUPACK: |
2611 | 2616 | val = tp->thin_dupack; |
2617 | + break; | |
2618 | + | |
2619 | + case TCP_USER_TIMEOUT: | |
2620 | + val = jiffies_to_msecs(icsk->icsk_user_timeout); | |
2612 | 2621 | break; |
2613 | 2622 | default: |
2614 | 2623 | return -ENOPROTOOPT; |
net/ipv4/tcp_timer.c
... | ... | @@ -138,10 +138,10 @@ |
138 | 138 | * retransmissions with an initial RTO of TCP_RTO_MIN. |
139 | 139 | */ |
140 | 140 | static bool retransmits_timed_out(struct sock *sk, |
141 | - unsigned int boundary) | |
141 | + unsigned int boundary, | |
142 | + unsigned int timeout) | |
142 | 143 | { |
143 | - unsigned int timeout, linear_backoff_thresh; | |
144 | - unsigned int start_ts; | |
144 | + unsigned int linear_backoff_thresh, start_ts; | |
145 | 145 | |
146 | 146 | if (!inet_csk(sk)->icsk_retransmits) |
147 | 147 | return false; |
148 | 148 | |
... | ... | @@ -151,14 +151,15 @@ |
151 | 151 | else |
152 | 152 | start_ts = tcp_sk(sk)->retrans_stamp; |
153 | 153 | |
154 | - linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); | |
154 | + if (likely(timeout == 0)) { | |
155 | + linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); | |
155 | 156 | |
156 | - if (boundary <= linear_backoff_thresh) | |
157 | - timeout = ((2 << boundary) - 1) * TCP_RTO_MIN; | |
158 | - else | |
159 | - timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN + | |
160 | - (boundary - linear_backoff_thresh) * TCP_RTO_MAX; | |
161 | - | |
157 | + if (boundary <= linear_backoff_thresh) | |
158 | + timeout = ((2 << boundary) - 1) * TCP_RTO_MIN; | |
159 | + else | |
160 | + timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN + | |
161 | + (boundary - linear_backoff_thresh) * TCP_RTO_MAX; | |
162 | + } | |
162 | 163 | return (tcp_time_stamp - start_ts) >= timeout; |
163 | 164 | } |
164 | 165 | |
... | ... | @@ -174,7 +175,7 @@ |
174 | 175 | dst_negative_advice(sk); |
175 | 176 | retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; |
176 | 177 | } else { |
177 | - if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { | |
178 | + if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0)) { | |
178 | 179 | /* Black hole detection */ |
179 | 180 | tcp_mtu_probing(icsk, sk); |
180 | 181 | |
181 | 182 | |
... | ... | @@ -187,14 +188,16 @@ |
187 | 188 | |
188 | 189 | retry_until = tcp_orphan_retries(sk, alive); |
189 | 190 | do_reset = alive || |
190 | - !retransmits_timed_out(sk, retry_until); | |
191 | + !retransmits_timed_out(sk, retry_until, 0); | |
191 | 192 | |
192 | 193 | if (tcp_out_of_resources(sk, do_reset)) |
193 | 194 | return 1; |
194 | 195 | } |
195 | 196 | } |
196 | 197 | |
197 | - if (retransmits_timed_out(sk, retry_until)) { | |
198 | + if (retransmits_timed_out(sk, retry_until, | |
199 | + (1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV) ? 0 : | |
200 | + icsk->icsk_user_timeout)) { | |
198 | 201 | /* Has it gone just too far? */ |
199 | 202 | tcp_write_err(sk); |
200 | 203 | return 1; |
... | ... | @@ -436,7 +439,7 @@ |
436 | 439 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); |
437 | 440 | } |
438 | 441 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); |
439 | - if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) | |
442 | + if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0)) | |
440 | 443 | __sk_dst_reset(sk); |
441 | 444 | |
442 | 445 | out:; |
... | ... | @@ -556,7 +559,14 @@ |
556 | 559 | elapsed = keepalive_time_elapsed(tp); |
557 | 560 | |
558 | 561 | if (elapsed >= keepalive_time_when(tp)) { |
559 | - if (icsk->icsk_probes_out >= keepalive_probes(tp)) { | |
562 | + /* If the TCP_USER_TIMEOUT option is enabled, use that | |
563 | + * to determine when to timeout instead. | |
564 | + */ | |
565 | + if ((icsk->icsk_user_timeout != 0 && | |
566 | + elapsed >= icsk->icsk_user_timeout && | |
567 | + icsk->icsk_probes_out > 0) || | |
568 | + (icsk->icsk_user_timeout == 0 && | |
569 | + icsk->icsk_probes_out >= keepalive_probes(tp))) { | |
560 | 570 | tcp_send_active_reset(sk, GFP_ATOMIC); |
561 | 571 | tcp_write_err(sk); |
562 | 572 | goto out; |