Blame view
net/ipv4/tcp_output.c
111 KB
457c89965 treewide: Add SPD... |
1 |
// SPDX-License-Identifier: GPL-2.0-only |
1da177e4c Linux-2.6.12-rc2 |
2 3 4 5 6 7 8 |
/* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Implementation of the Transmission Control Protocol(TCP). * |
02c30a84e [PATCH] update Ro... |
9 |
* Authors: Ross Biro |
1da177e4c Linux-2.6.12-rc2 |
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * Corey Minyard <wf-rch!minyard@relay.EU.net> * Florian La Roche, <flla@stud.uni-sb.de> * Charles Hedrick, <hedrick@klinzhai.rutgers.edu> * Linus Torvalds, <torvalds@cs.helsinki.fi> * Alan Cox, <gw4pts@gw4pts.ampr.org> * Matthew Dillon, <dillon@apollo.west.oic.com> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * Jorge Cwik, <jorge@laser.satlink.net> */ /* * Changes: Pedro Roque : Retransmit queue handled by TCP. * : Fragmentation on mtu decrease * : Segment collapse on retransmit * : AF independence * * Linus Torvalds : send_delayed_ack * David S. Miller : Charge memory using the right skb * during syn/ack processing. * David S. Miller : Output engine completely rewritten. * Andrea Arcangeli: SYNACK carry ts_recent in tsecr. * Cacophonix Gaul : draft-minshall-nagle-01 * J Hadi Salim : ECN support * */ |
91df42bed net: ipv4 and ipv... |
37 |
#define pr_fmt(fmt) "TCP: " fmt |
1da177e4c Linux-2.6.12-rc2 |
38 39 40 |
#include <net/tcp.h> #include <linux/compiler.h> |
5a0e3ad6a include cleanup: ... |
41 |
#include <linux/gfp.h> |
1da177e4c Linux-2.6.12-rc2 |
42 |
#include <linux/module.h> |
60e2a7780 tcp: TCP experime... |
43 |
#include <linux/static_key.h> |
1da177e4c Linux-2.6.12-rc2 |
44 |
|
e086101b1 tcp: add a tracep... |
45 |
#include <trace/events/tcp.h> |
35089bb20 [TCP]: Add tcp_sl... |
46 |
|
9799ccb0e tcp: add tcp_wsta... |
47 48 49 50 51 52 |
/* Refresh clocks of a TCP socket, * ensuring monotically increasing values. */ void tcp_mstamp_refresh(struct tcp_sock *tp) { u64 val = tcp_clock_ns(); |
e6d140701 tcp: remove condi... |
53 54 |
tp->tcp_clock_cache = val; tp->tcp_mstamp = div_u64(val, NSEC_PER_USEC); |
9799ccb0e tcp: add tcp_wsta... |
55 |
} |
46d3ceabd tcp: TCP Small Qu... |
56 57 |
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp); |
519855c50 TCPCT part 1c: sy... |
58 |
|
67edfef78 TCP: Add comments... |
59 |
/* Account for new data that has been sent to the network. */ |
75c119afe tcp: implement rb... |
60 |
static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) |
1da177e4c Linux-2.6.12-rc2 |
61 |
{ |
6ba8a3b19 tcp: Tail loss pr... |
62 |
struct inet_connection_sock *icsk = inet_csk(sk); |
9e412ba76 [TCP]: Sed magic ... |
63 |
struct tcp_sock *tp = tcp_sk(sk); |
66f5fe624 [TCP]: Rename upd... |
64 |
unsigned int prior_packets = tp->packets_out; |
9e412ba76 [TCP]: Sed magic ... |
65 |
|
e0d694d63 tcp: annotate tp-... |
66 |
WRITE_ONCE(tp->snd_nxt, TCP_SKB_CB(skb)->end_seq); |
8512430e5 [TCP]: Move FRTO ... |
67 |
|
75c119afe tcp: implement rb... |
68 69 |
__skb_unlink(skb, &sk->sk_write_queue); tcp_rbtree_insert(&sk->tcp_rtx_queue, skb); |
4c5fa9d3c tcp: Fix highest_... |
70 71 |
if (tp->highest_sack == NULL) tp->highest_sack = skb; |
66f5fe624 [TCP]: Rename upd... |
72 |
tp->packets_out += tcp_skb_pcount(skb); |
bec41a11d tcp: remove early... |
73 |
if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) |
750ea2baf tcp: early retran... |
74 |
tcp_rearm_rto(sk); |
f19c29e3e tcp: snmp stats f... |
75 |
|
f7324acd9 tcp: Use NET_ADD_... |
76 77 |
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT, tcp_skb_pcount(skb)); |
1da177e4c Linux-2.6.12-rc2 |
78 |
} |
a4ecb15a2 tcp: accommodate ... |
79 80 |
/* SND.NXT, if window was not shrunk or the amount of shrunk was less than one * window scaling factor due to loss of precision. |
1da177e4c Linux-2.6.12-rc2 |
81 82 83 84 85 |
* If window has been shrunk, what should we make? It is not clear at all. * Using SND.UNA we will fail to open window, SND.NXT is out of window. :-( * Anything in between SND.UNA...SND.UNA+SND.WND also can be already * invalid. OK, let's make this for now: */ |
cf533ea53 tcp: add const qu... |
86 |
static inline __u32 tcp_acceptable_seq(const struct sock *sk) |
1da177e4c Linux-2.6.12-rc2 |
87 |
{ |
cf533ea53 tcp: add const qu... |
88 |
const struct tcp_sock *tp = tcp_sk(sk); |
9e412ba76 [TCP]: Sed magic ... |
89 |
|
a4ecb15a2 tcp: accommodate ... |
90 91 92 |
if (!before(tcp_wnd_end(tp), tp->snd_nxt) || (tp->rx_opt.wscale_ok && ((tp->snd_nxt - tcp_wnd_end(tp)) < (1 << tp->rx_opt.rcv_wscale)))) |
1da177e4c Linux-2.6.12-rc2 |
93 94 |
return tp->snd_nxt; else |
90840defa [TCP]: Introduce ... |
95 |
return tcp_wnd_end(tp); |
1da177e4c Linux-2.6.12-rc2 |
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
} /* Calculate mss to advertise in SYN segment. * RFC1122, RFC1063, draft-ietf-tcpimpl-pmtud-01 state that: * * 1. It is independent of path mtu. * 2. Ideally, it is maximal possible segment size i.e. 65535-40. * 3. For IPv4 it is reasonable to calculate it from maximal MTU of * attached devices, because some buggy hosts are confused by * large MSS. * 4. We do not make 3, we advertise MSS, calculated from first * hop device mtu, but allow to raise it to ip_rt_min_advmss. * This may be overridden via information stored in routing table. * 5. Value 65535 for MSS is valid in IPv6 and means "as large as possible, * probably even Jumbo". */ static __u16 tcp_advertise_mss(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); |
cf533ea53 tcp: add const qu... |
115 |
const struct dst_entry *dst = __sk_dst_get(sk); |
1da177e4c Linux-2.6.12-rc2 |
116 |
int mss = tp->advmss; |
0dbaee3b3 net: Abstract def... |
117 118 119 120 121 122 123 |
if (dst) { unsigned int metric = dst_metric_advmss(dst); if (metric < mss) { mss = metric; tp->advmss = mss; } |
1da177e4c Linux-2.6.12-rc2 |
124 125 126 127 128 129 |
} return (__u16)mss; } /* RFC2861. Reset CWND after idle period longer RTO to "restart window". |
6f021c62d tcp: fix slow sta... |
130 131 132 |
* This is the first part of cwnd validation mechanism. */ void tcp_cwnd_restart(struct sock *sk, s32 delta) |
1da177e4c Linux-2.6.12-rc2 |
133 |
{ |
463c84b97 [NET]: Introduce ... |
134 |
struct tcp_sock *tp = tcp_sk(sk); |
6f021c62d tcp: fix slow sta... |
135 |
u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk)); |
1da177e4c Linux-2.6.12-rc2 |
136 |
u32 cwnd = tp->snd_cwnd; |
6687e988d [ICSK]: Move TCP ... |
137 |
tcp_ca_event(sk, CA_EVENT_CWND_RESTART); |
1da177e4c Linux-2.6.12-rc2 |
138 |
|
6687e988d [ICSK]: Move TCP ... |
139 |
tp->snd_ssthresh = tcp_current_ssthresh(sk); |
1da177e4c Linux-2.6.12-rc2 |
140 |
restart_cwnd = min(restart_cwnd, cwnd); |
463c84b97 [NET]: Introduce ... |
141 |
while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd) |
1da177e4c Linux-2.6.12-rc2 |
142 143 |
cwnd >>= 1; tp->snd_cwnd = max(cwnd, restart_cwnd); |
c2203cf75 tcp: use tcp_jiff... |
144 |
tp->snd_cwnd_stamp = tcp_jiffies32; |
1da177e4c Linux-2.6.12-rc2 |
145 146 |
tp->snd_cwnd_used = 0; } |
67edfef78 TCP: Add comments... |
147 |
/* Congestion state accounting after a packet has been sent. */ |
40efc6fa1 [TCP]: less inline's |
148 |
static void tcp_event_data_sent(struct tcp_sock *tp, |
cf533ea53 tcp: add const qu... |
149 |
struct sock *sk) |
1da177e4c Linux-2.6.12-rc2 |
150 |
{ |
463c84b97 [NET]: Introduce ... |
151 |
struct inet_connection_sock *icsk = inet_csk(sk); |
d635fbe27 tcp: use tcp_jiff... |
152 |
const u32 now = tcp_jiffies32; |
1da177e4c Linux-2.6.12-rc2 |
153 |
|
05c5a46d7 tcp: generate CA_... |
154 155 |
if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); |
4a41f453b tcp: change pingp... |
156 157 158 159 |
/* If this is the first data packet sent in response to the * previous received data, * and it is a reply for ato after last received packet, * increase pingpong count. |
1da177e4c Linux-2.6.12-rc2 |
160 |
*/ |
4a41f453b tcp: change pingp... |
161 162 163 164 165 |
if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) && (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) inet_csk_inc_pingpong_cnt(sk); tp->lsndtime = now; |
1da177e4c Linux-2.6.12-rc2 |
166 |
} |
67edfef78 TCP: Add comments... |
167 |
/* Account for an ACK we sent. */ |
27cde44a2 tcp: do not cance... |
168 169 |
static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts, u32 rcv_nxt) |
1da177e4c Linux-2.6.12-rc2 |
170 |
{ |
5d9f4262b tcp: add SACK com... |
171 |
struct tcp_sock *tp = tcp_sk(sk); |
86de5921a tcp: defer SACK c... |
172 |
if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) { |
200d95f45 tcp: add TCPAckCo... |
173 |
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED, |
86de5921a tcp: defer SACK c... |
174 175 |
tp->compressed_ack - TCP_FASTRETRANS_THRESH); tp->compressed_ack = TCP_FASTRETRANS_THRESH; |
5d9f4262b tcp: add SACK com... |
176 177 178 |
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1) __sock_put(sk); } |
27cde44a2 tcp: do not cance... |
179 180 181 |
if (unlikely(rcv_nxt != tp->rcv_nxt)) return; /* Special ACK sent by DCTCP to reflect ECN */ |
463c84b97 [NET]: Introduce ... |
182 183 |
tcp_dec_quickack_mode(sk, pkts); inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); |
1da177e4c Linux-2.6.12-rc2 |
184 185 186 187 188 189 190 191 192 |
} /* Determine a window scaling and initial window to offer. * Based on the assumption that the given amount of space * will be offered. Store the results in the tp structure. * NOTE: for smooth operation initial space offering should * be a multiple of mss if possible. We assume here that mss >= 1. * This MUST be enforced by all callers. */ |
ceef9ab6b tcp: Namespace-if... |
193 |
void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, |
1da177e4c Linux-2.6.12-rc2 |
194 |
__u32 *rcv_wnd, __u32 *window_clamp, |
31d12926e net: Add rtnetlin... |
195 196 |
int wscale_ok, __u8 *rcv_wscale, __u32 init_rcv_wnd) |
1da177e4c Linux-2.6.12-rc2 |
197 198 199 200 201 |
{ unsigned int space = (__space < 0 ? 0 : __space); /* If no clamp set the clamp to the max possible scaled window */ if (*window_clamp == 0) |
589c49cbf net: tcp: Define ... |
202 |
(*window_clamp) = (U16_MAX << TCP_MAX_WSCALE); |
1da177e4c Linux-2.6.12-rc2 |
203 204 205 206 |
space = min(*window_clamp, space); /* Quantize space offering to a multiple of mss if possible. */ if (space > mss) |
589c49cbf net: tcp: Define ... |
207 |
space = rounddown(space, mss); |
1da177e4c Linux-2.6.12-rc2 |
208 209 |
/* NOTE: offering an initial window larger than 32767 |
15d99e02b [TCP]: sysctl to ... |
210 211 212 213 214 215 |
* will break some buggy TCP stacks. If the admin tells us * it is likely we could be speaking with such a buggy stack * we will truncate our initial window offering to 32K-1 * unless the remote has sent us a window scaling option, * which we interpret as a sign the remote TCP is not * misinterpreting the window field as a signed quantity. |
1da177e4c Linux-2.6.12-rc2 |
216 |
*/ |
ceef9ab6b tcp: Namespace-if... |
217 |
if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows) |
15d99e02b [TCP]: sysctl to ... |
218 219 |
(*rcv_wnd) = min(space, MAX_TCP_WINDOW); else |
a337531b9 tcp: up initial r... |
220 221 222 223 |
(*rcv_wnd) = min_t(u32, space, U16_MAX); if (init_rcv_wnd) *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss); |
15d99e02b [TCP]: sysctl to ... |
224 |
|
19bf62613 tcp: remove loop ... |
225 |
*rcv_wscale = 0; |
1da177e4c Linux-2.6.12-rc2 |
226 |
if (wscale_ok) { |
589c49cbf net: tcp: Define ... |
227 |
/* Set window scaling on max possible window */ |
356d1833b tcp: Namespace-if... |
228 |
space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); |
f626300a3 tcp: consider rec... |
229 |
space = max_t(u32, space, sysctl_rmem_max); |
316c1592b [TCP]: Limit wind... |
230 |
space = min_t(u32, space, *window_clamp); |
19bf62613 tcp: remove loop ... |
231 232 |
*rcv_wscale = clamp_t(int, ilog2(space) - 15, 0, TCP_MAX_WSCALE); |
1da177e4c Linux-2.6.12-rc2 |
233 |
} |
1da177e4c Linux-2.6.12-rc2 |
234 |
/* Set the clamp no higher than max representable value */ |
589c49cbf net: tcp: Define ... |
235 |
(*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp); |
1da177e4c Linux-2.6.12-rc2 |
236 |
} |
4bc2f18ba net/ipv4: EXPORT_... |
237 |
EXPORT_SYMBOL(tcp_select_initial_window); |
1da177e4c Linux-2.6.12-rc2 |
238 239 240 241 242 243 |
/* Chose a new window to advertise, update state in tcp_sock for the * socket, and return result with RFC1323 scaling applied. The return * value can be stuffed directly into th->window for an outgoing * frame. */ |
40efc6fa1 [TCP]: less inline's |
244 |
static u16 tcp_select_window(struct sock *sk) |
1da177e4c Linux-2.6.12-rc2 |
245 246 |
{ struct tcp_sock *tp = tcp_sk(sk); |
8e165e203 net: tcp: add mib... |
247 |
u32 old_win = tp->rcv_wnd; |
1da177e4c Linux-2.6.12-rc2 |
248 249 250 251 |
u32 cur_win = tcp_receive_window(tp); u32 new_win = __tcp_select_window(sk); /* Never shrink the offered window */ |
2de979bd7 [TCP]: whitespace... |
252 |
if (new_win < cur_win) { |
1da177e4c Linux-2.6.12-rc2 |
253 254 255 256 257 258 259 |
/* Danger Will Robinson! * Don't update rcv_wup/rcv_wnd here or else * we will not be able to advertise a zero * window in time. --DaveM * * Relax Will Robinson. */ |
8e165e203 net: tcp: add mib... |
260 261 262 |
if (new_win == 0) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWANTZEROWINDOWADV); |
607bfbf2d [TCP]: Fix shrink... |
263 |
new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale); |
1da177e4c Linux-2.6.12-rc2 |
264 265 266 267 268 269 270 |
} tp->rcv_wnd = new_win; tp->rcv_wup = tp->rcv_nxt; /* Make sure we do not exceed the maximum possible * scaled window. */ |
ceef9ab6b tcp: Namespace-if... |
271 272 |
if (!tp->rx_opt.rcv_wscale && sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows) |
1da177e4c Linux-2.6.12-rc2 |
273 274 275 276 277 278 |
new_win = min(new_win, MAX_TCP_WINDOW); else new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); /* RFC1323 scaling applied */ new_win >>= tp->rx_opt.rcv_wscale; |
31770e34e tcp: Revert "tcp:... |
279 |
/* If we advertise zero window, disable fast path. */ |
8e165e203 net: tcp: add mib... |
280 |
if (new_win == 0) { |
31770e34e tcp: Revert "tcp:... |
281 |
tp->pred_flags = 0; |
8e165e203 net: tcp: add mib... |
282 283 284 285 286 287 |
if (old_win) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTOZEROWINDOWADV); } else if (old_win == 0) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV); } |
1da177e4c Linux-2.6.12-rc2 |
288 289 290 |
return new_win; } |
67edfef78 TCP: Add comments... |
291 |
/* Packet ECN state for a SYN-ACK */ |
735d38311 tcp: change TCP_E... |
292 |
static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) |
bdf1ee5d3 [TCP]: Move code ... |
293 |
{ |
30e502a34 net: tcp: add fla... |
294 |
const struct tcp_sock *tp = tcp_sk(sk); |
4de075e04 tcp: rename tcp_s... |
295 |
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; |
056834d9f [TCP]: cleanup tc... |
296 |
if (!(tp->ecn_flags & TCP_ECN_OK)) |
4de075e04 tcp: rename tcp_s... |
297 |
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; |
91b5b21c7 bpf: Add support ... |
298 299 |
else if (tcp_ca_needs_ecn(sk) || tcp_bpf_ca_needs_ecn(sk)) |
30e502a34 net: tcp: add fla... |
300 |
INET_ECN_xmit(sk); |
bdf1ee5d3 [TCP]: Move code ... |
301 |
} |
67edfef78 TCP: Add comments... |
302 |
/* Packet ECN state for a SYN. */ |
735d38311 tcp: change TCP_E... |
303 |
static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) |
bdf1ee5d3 [TCP]: Move code ... |
304 305 |
{ struct tcp_sock *tp = tcp_sk(sk); |
91b5b21c7 bpf: Add support ... |
306 |
bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); |
f7b3bec6f net: allow settin... |
307 |
bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 || |
91b5b21c7 bpf: Add support ... |
308 |
tcp_ca_needs_ecn(sk) || bpf_needs_ecn; |
f7b3bec6f net: allow settin... |
309 310 311 312 313 314 315 |
if (!use_ecn) { const struct dst_entry *dst = __sk_dst_get(sk); if (dst && dst_feature(dst, RTAX_FEATURE_ECN)) use_ecn = true; } |
bdf1ee5d3 [TCP]: Move code ... |
316 317 |
tp->ecn_flags = 0; |
f7b3bec6f net: allow settin... |
318 319 |
if (use_ecn) { |
4de075e04 tcp: rename tcp_s... |
320 |
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; |
bdf1ee5d3 [TCP]: Move code ... |
321 |
tp->ecn_flags = TCP_ECN_OK; |
91b5b21c7 bpf: Add support ... |
322 |
if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) |
30e502a34 net: tcp: add fla... |
323 |
INET_ECN_xmit(sk); |
bdf1ee5d3 [TCP]: Move code ... |
324 325 |
} } |
492135557 tcp: add rfc3168,... |
326 327 328 329 330 331 332 333 |
static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) { if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback) /* tp->ecn_flags are cleared at a later point in time when * SYN ACK is ultimatively being received. */ TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR); } |
735d38311 tcp: change TCP_E... |
334 |
static void |
6ac705b18 tcp: remove tcp_e... |
335 |
tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th) |
bdf1ee5d3 [TCP]: Move code ... |
336 |
{ |
6ac705b18 tcp: remove tcp_e... |
337 |
if (inet_rsk(req)->ecn_ok) |
bdf1ee5d3 [TCP]: Move code ... |
338 339 |
th->ece = 1; } |
67edfef78 TCP: Add comments... |
340 341 342 |
/* Set up ECN state for a packet on a ESTABLISHED socket that is about to * be sent. */ |
735d38311 tcp: change TCP_E... |
343 |
static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, |
ea1627c20 tcp: minor optimi... |
344 |
struct tcphdr *th, int tcp_header_len) |
bdf1ee5d3 [TCP]: Move code ... |
345 346 347 348 349 350 351 352 |
{ struct tcp_sock *tp = tcp_sk(sk); if (tp->ecn_flags & TCP_ECN_OK) { /* Not-retransmitted data segment: set ECT and inject CWR. */ if (skb->len != tcp_header_len && !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) { INET_ECN_xmit(sk); |
056834d9f [TCP]: cleanup tc... |
353 |
if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) { |
bdf1ee5d3 [TCP]: Move code ... |
354 |
tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; |
ea1627c20 tcp: minor optimi... |
355 |
th->cwr = 1; |
bdf1ee5d3 [TCP]: Move code ... |
356 357 |
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; } |
30e502a34 net: tcp: add fla... |
358 |
} else if (!tcp_ca_needs_ecn(sk)) { |
bdf1ee5d3 [TCP]: Move code ... |
359 360 361 362 |
/* ACK or retransmitted segment: clear ECT|CE */ INET_ECN_dontxmit(sk); } if (tp->ecn_flags & TCP_ECN_DEMAND_CWR) |
ea1627c20 tcp: minor optimi... |
363 |
th->ece = 1; |
bdf1ee5d3 [TCP]: Move code ... |
364 365 |
} } |
e870a8efc [TCP]: Perform se... |
366 367 368 369 370 |
/* Constructs common control bits of non-data skb. If SYN/FIN is present, * auto increment end seqno. */ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) { |
2e8e18ef5 tcp: Set CHECKSUM... |
371 |
skb->ip_summed = CHECKSUM_PARTIAL; |
e870a8efc [TCP]: Perform se... |
372 |
|
4de075e04 tcp: rename tcp_s... |
373 |
TCP_SKB_CB(skb)->tcp_flags = flags; |
e870a8efc [TCP]: Perform se... |
374 |
TCP_SKB_CB(skb)->sacked = 0; |
cd7d8498c tcp: change tcp_s... |
375 |
tcp_skb_pcount_set(skb, 1); |
e870a8efc [TCP]: Perform se... |
376 377 |
TCP_SKB_CB(skb)->seq = seq; |
a3433f35a tcp: unify tcp fl... |
378 |
if (flags & (TCPHDR_SYN | TCPHDR_FIN)) |
e870a8efc [TCP]: Perform se... |
379 380 381 |
seq++; TCP_SKB_CB(skb)->end_seq = seq; } |
a2a385d62 tcp: bool convers... |
382 |
static inline bool tcp_urg_mode(const struct tcp_sock *tp) |
33f5f57ee tcp: kill pointle... |
383 384 385 |
{ return tp->snd_una != tp->snd_up; } |
33ad798c9 tcp: options clea... |
386 387 388 |
#define OPTION_SACK_ADVERTISE (1 << 0) #define OPTION_TS (1 << 1) #define OPTION_MD5 (1 << 2) |
89e95a613 IPv4 TCP fails to... |
389 |
#define OPTION_WSCALE (1 << 3) |
2100c8d2d net-tcp: Fast Ope... |
390 |
#define OPTION_FAST_OPEN_COOKIE (1 << 8) |
60e2a7780 tcp: TCP experime... |
391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 |
#define OPTION_SMC (1 << 9) static void smc_options_write(__be32 *ptr, u16 *options) { #if IS_ENABLED(CONFIG_SMC) if (static_branch_unlikely(&tcp_have_smc)) { if (unlikely(OPTION_SMC & *options)) { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_EXP << 8) | (TCPOLEN_EXP_SMC_BASE)); *ptr++ = htonl(TCPOPT_SMC_MAGIC); } } #endif } |
33ad798c9 tcp: options clea... |
407 408 |
struct tcp_out_options { |
2100c8d2d net-tcp: Fast Ope... |
409 410 |
u16 options; /* bit field of OPTION_* */ u16 mss; /* 0 to disable */ |
33ad798c9 tcp: options clea... |
411 412 |
u8 ws; /* window scale, 0 to disable */ u8 num_sack_blocks; /* number of SACK blocks to include */ |
bd0388ae7 TCPCT part 1f: In... |
413 |
u8 hash_size; /* bytes in hash_location */ |
bd0388ae7 TCPCT part 1f: In... |
414 |
__u8 *hash_location; /* temporary pointer, overloaded */ |
2100c8d2d net-tcp: Fast Ope... |
415 416 |
__u32 tsval, tsecr; /* need to include OPTION_TS */ struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ |
33ad798c9 tcp: options clea... |
417 |
}; |
67edfef78 TCP: Add comments... |
418 419 420 |
/* Write previously computed TCP options to the packet. * * Beware: Something in the Internet is very sensitive to the ordering of |
fd6149d33 tcp: Restore orde... |
421 422 |
* TCP options, we learned this through the hard way, so be careful here. * Luckily we can at least blame others for their non-compliance but from |
8e3bff96a net: more spellin... |
423 |
* inter-operability perspective it seems that we're somewhat stuck with |
fd6149d33 tcp: Restore orde... |
424 425 426 427 428 429 430 |
* the ordering which we have been using if we want to keep working with * those broken things (not that it currently hurts anybody as there isn't * particular reason why the ordering would need to be changed). * * At least SACK_PERM as the first option is known to lead to a disaster * (but it may well be that other scenarios fail similarly). */ |
33ad798c9 tcp: options clea... |
431 |
static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, |
bd0388ae7 TCPCT part 1f: In... |
432 433 |
struct tcp_out_options *opts) { |
2100c8d2d net-tcp: Fast Ope... |
434 |
u16 options = opts->options; /* mungable copy */ |
bd0388ae7 TCPCT part 1f: In... |
435 |
|
bd0388ae7 TCPCT part 1f: In... |
436 |
if (unlikely(OPTION_MD5 & options)) { |
1a2c6181c tcp: Remove TCPCT |
437 438 |
*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); |
bd0388ae7 TCPCT part 1f: In... |
439 440 |
/* overload cookie hash location */ opts->hash_location = (__u8 *)ptr; |
33ad798c9 tcp: options clea... |
441 |
ptr += 4; |
40efc6fa1 [TCP]: less inline's |
442 |
} |
33ad798c9 tcp: options clea... |
443 |
|
fd6149d33 tcp: Restore orde... |
444 445 446 447 448 |
if (unlikely(opts->mss)) { *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | opts->mss); } |
bd0388ae7 TCPCT part 1f: In... |
449 450 |
if (likely(OPTION_TS & options)) { if (unlikely(OPTION_SACK_ADVERTISE & options)) { |
33ad798c9 tcp: options clea... |
451 452 453 454 |
*ptr++ = htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); |
bd0388ae7 TCPCT part 1f: In... |
455 |
options &= ~OPTION_SACK_ADVERTISE; |
33ad798c9 tcp: options clea... |
456 457 458 459 460 461 462 463 464 |
} else { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); } *ptr++ = htonl(opts->tsval); *ptr++ = htonl(opts->tsecr); } |
bd0388ae7 TCPCT part 1f: In... |
465 |
if (unlikely(OPTION_SACK_ADVERTISE & options)) { |
33ad798c9 tcp: options clea... |
466 467 468 469 470 |
*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM); } |
bd0388ae7 TCPCT part 1f: In... |
471 |
if (unlikely(OPTION_WSCALE & options)) { |
33ad798c9 tcp: options clea... |
472 473 474 475 476 477 478 479 480 |
*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | opts->ws); } if (unlikely(opts->num_sack_blocks)) { struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks; |
40efc6fa1 [TCP]: less inline's |
481 482 483 484 485 |
int this_sack; *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_SACK << 8) | |
33ad798c9 tcp: options clea... |
486 |
(TCPOLEN_SACK_BASE + (opts->num_sack_blocks * |
40efc6fa1 [TCP]: less inline's |
487 |
TCPOLEN_SACK_PERBLOCK))); |
2de979bd7 [TCP]: whitespace... |
488 |
|
33ad798c9 tcp: options clea... |
489 490 |
for (this_sack = 0; this_sack < opts->num_sack_blocks; ++this_sack) { |
40efc6fa1 [TCP]: less inline's |
491 492 493 |
*ptr++ = htonl(sp[this_sack].start_seq); *ptr++ = htonl(sp[this_sack].end_seq); } |
2de979bd7 [TCP]: whitespace... |
494 |
|
5861f8e58 tcp: remove point... |
495 |
tp->rx_opt.dsack = 0; |
40efc6fa1 [TCP]: less inline's |
496 |
} |
2100c8d2d net-tcp: Fast Ope... |
497 498 499 |
if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) { struct tcp_fastopen_cookie *foc = opts->fastopen_cookie; |
7f9b838b7 tcp: RFC7413 opti... |
500 501 502 503 504 505 506 507 508 509 510 511 512 |
u8 *p = (u8 *)ptr; u32 len; /* Fast Open option length */ if (foc->exp) { len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; *ptr = htonl((TCPOPT_EXP << 24) | (len << 16) | TCPOPT_FASTOPEN_MAGIC); p += TCPOLEN_EXP_FASTOPEN_BASE; } else { len = TCPOLEN_FASTOPEN_BASE + foc->len; *p++ = TCPOPT_FASTOPEN; *p++ = len; } |
2100c8d2d net-tcp: Fast Ope... |
513 |
|
7f9b838b7 tcp: RFC7413 opti... |
514 515 516 517 |
memcpy(p, foc->val, foc->len); if ((len & 3) == 2) { p[foc->len] = TCPOPT_NOP; p[foc->len + 1] = TCPOPT_NOP; |
2100c8d2d net-tcp: Fast Ope... |
518 |
} |
7f9b838b7 tcp: RFC7413 opti... |
519 |
ptr += (len + 3) >> 2; |
2100c8d2d net-tcp: Fast Ope... |
520 |
} |
60e2a7780 tcp: TCP experime... |
521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 |
smc_options_write(ptr, &options); } static void smc_set_option(const struct tcp_sock *tp, struct tcp_out_options *opts, unsigned int *remaining) { #if IS_ENABLED(CONFIG_SMC) if (static_branch_unlikely(&tcp_have_smc)) { if (tp->syn_smc) { if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) { opts->options |= OPTION_SMC; *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED; } } } #endif } static void smc_set_option_cond(const struct tcp_sock *tp, const struct inet_request_sock *ireq, struct tcp_out_options *opts, unsigned int *remaining) { #if IS_ENABLED(CONFIG_SMC) if (static_branch_unlikely(&tcp_have_smc)) { if (tp->syn_smc && ireq->smc_ok) { if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) { opts->options |= OPTION_SMC; *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED; } } } #endif |
33ad798c9 tcp: options clea... |
556 |
} |
67edfef78 TCP: Add comments... |
557 558 559 |
/* Compute TCP options for SYN packets. This is not the final * network wire format yet. */ |
95c961747 net: cleanup unsi... |
560 |
static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, |
33ad798c9 tcp: options clea... |
561 |
struct tcp_out_options *opts, |
cf533ea53 tcp: add const qu... |
562 563 |
struct tcp_md5sig_key **md5) { |
33ad798c9 tcp: options clea... |
564 |
struct tcp_sock *tp = tcp_sk(sk); |
95c961747 net: cleanup unsi... |
565 |
unsigned int remaining = MAX_TCP_OPTION_SPACE; |
783237e8d net-tcp: Fast Ope... |
566 |
struct tcp_fastopen_request *fastopen = tp->fastopen_req; |
33ad798c9 tcp: options clea... |
567 |
|
8c2320e84 tcp: md5: only ca... |
568 |
*md5 = NULL; |
cfb6eeb4c [TCP]: MD5 Signat... |
569 |
#ifdef CONFIG_TCP_MD5SIG |
921f9a0f2 tcp: convert tcp_... |
570 |
if (static_branch_unlikely(&tcp_md5_needed) && |
6015c71e6 tcp: md5: add tcp... |
571 |
rcu_access_pointer(tp->md5sig_info)) { |
8c2320e84 tcp: md5: only ca... |
572 573 574 575 576 |
*md5 = tp->af_specific->md5_lookup(sk, sk); if (*md5) { opts->options |= OPTION_MD5; remaining -= TCPOLEN_MD5SIG_ALIGNED; } |
cfb6eeb4c [TCP]: MD5 Signat... |
577 578 |
} #endif |
33ad798c9 tcp: options clea... |
579 580 581 582 583 584 585 586 587 588 589 |
/* We always get an MSS option. The option bytes which will be seen in * normal data packets should timestamps be used, must be in the MSS * advertised. But we subtract them from tp->mss_cache so that * calculations in tcp_sendmsg are simpler etc. So account for this * fact here if necessary. If we don't do this correctly, as a * receiver we won't recognize data packets as being full sized when we * should, and thus we won't abide by the delayed ACK rules correctly. * SACKs don't matter, we never delay an ACK when we have any of those * going out. */ opts->mss = tcp_advertise_mss(sk); |
bd0388ae7 TCPCT part 1f: In... |
590 |
remaining -= TCPOLEN_MSS_ALIGNED; |
33ad798c9 tcp: options clea... |
591 |
|
5d2ed0521 tcp: Namespaceify... |
592 |
if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) { |
33ad798c9 tcp: options clea... |
593 |
opts->options |= OPTION_TS; |
7faee5c0d tcp: remove TCP_S... |
594 |
opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; |
33ad798c9 tcp: options clea... |
595 |
opts->tsecr = tp->rx_opt.ts_recent; |
bd0388ae7 TCPCT part 1f: In... |
596 |
remaining -= TCPOLEN_TSTAMP_ALIGNED; |
33ad798c9 tcp: options clea... |
597 |
} |
9bb37ef00 tcp: Namespaceify... |
598 |
if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) { |
33ad798c9 tcp: options clea... |
599 |
opts->ws = tp->rx_opt.rcv_wscale; |
89e95a613 IPv4 TCP fails to... |
600 |
opts->options |= OPTION_WSCALE; |
bd0388ae7 TCPCT part 1f: In... |
601 |
remaining -= TCPOLEN_WSCALE_ALIGNED; |
33ad798c9 tcp: options clea... |
602 |
} |
f93010342 tcp: Namespaceify... |
603 |
if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) { |
33ad798c9 tcp: options clea... |
604 |
opts->options |= OPTION_SACK_ADVERTISE; |
b32d13102 tcp: Fix bitmask ... |
605 |
if (unlikely(!(OPTION_TS & opts->options))) |
bd0388ae7 TCPCT part 1f: In... |
606 |
remaining -= TCPOLEN_SACKPERM_ALIGNED; |
33ad798c9 tcp: options clea... |
607 |
} |
783237e8d net-tcp: Fast Ope... |
608 |
if (fastopen && fastopen->cookie.len >= 0) { |
2646c831c tcp: RFC7413 opti... |
609 610 611 612 |
u32 need = fastopen->cookie.len; need += fastopen->cookie.exp ? TCPOLEN_EXP_FASTOPEN_BASE : TCPOLEN_FASTOPEN_BASE; |
783237e8d net-tcp: Fast Ope... |
613 614 615 616 617 618 |
need = (need + 3) & ~3U; /* Align to 32 bits */ if (remaining >= need) { opts->options |= OPTION_FAST_OPEN_COOKIE; opts->fastopen_cookie = &fastopen->cookie; remaining -= need; tp->syn_fastopen = 1; |
2646c831c tcp: RFC7413 opti... |
619 |
tp->syn_fastopen_exp = fastopen->cookie.exp ? 1 : 0; |
783237e8d net-tcp: Fast Ope... |
620 621 |
} } |
bd0388ae7 TCPCT part 1f: In... |
622 |
|
60e2a7780 tcp: TCP experime... |
623 |
smc_set_option(tp, opts, &remaining); |
bd0388ae7 TCPCT part 1f: In... |
624 |
return MAX_TCP_OPTION_SPACE - remaining; |
40efc6fa1 [TCP]: less inline's |
625 |
} |
67edfef78 TCP: Add comments... |
626 |
/* Set up TCP options for SYN-ACKs. */ |
60e2a7780 tcp: TCP experime... |
627 628 |
static unsigned int tcp_synack_options(const struct sock *sk, struct request_sock *req, |
37bfbdda0 tcp: remove tcp_s... |
629 630 631 632 |
unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, const struct tcp_md5sig_key *md5, struct tcp_fastopen_cookie *foc) |
4957faade TCPCT part 1g: Re... |
633 |
{ |
33ad798c9 tcp: options clea... |
634 |
struct inet_request_sock *ireq = inet_rsk(req); |
95c961747 net: cleanup unsi... |
635 |
unsigned int remaining = MAX_TCP_OPTION_SPACE; |
33ad798c9 tcp: options clea... |
636 |
|
cfb6eeb4c [TCP]: MD5 Signat... |
637 |
#ifdef CONFIG_TCP_MD5SIG |
80f03e27a tcp: md5: fix rcu... |
638 |
if (md5) { |
33ad798c9 tcp: options clea... |
639 |
opts->options |= OPTION_MD5; |
4957faade TCPCT part 1g: Re... |
640 641 642 643 644 645 646 |
remaining -= TCPOLEN_MD5SIG_ALIGNED; /* We can't fit any SACK blocks in a packet with MD5 + TS * options. There was discussion about disabling SACK * rather than TS in order to fit in better with old, * buggy kernels, but that was deemed to be unnecessary. */ |
de213e5ee tcp: tcp_synack_o... |
647 |
ireq->tstamp_ok &= !ireq->sack_ok; |
cfb6eeb4c [TCP]: MD5 Signat... |
648 649 |
} #endif |
33ad798c9 tcp: options clea... |
650 |
|
4957faade TCPCT part 1g: Re... |
651 |
/* We always send an MSS option. */ |
33ad798c9 tcp: options clea... |
652 |
opts->mss = mss; |
4957faade TCPCT part 1g: Re... |
653 |
remaining -= TCPOLEN_MSS_ALIGNED; |
33ad798c9 tcp: options clea... |
654 655 656 |
if (likely(ireq->wscale_ok)) { opts->ws = ireq->rcv_wscale; |
89e95a613 IPv4 TCP fails to... |
657 |
opts->options |= OPTION_WSCALE; |
4957faade TCPCT part 1g: Re... |
658 |
remaining -= TCPOLEN_WSCALE_ALIGNED; |
33ad798c9 tcp: options clea... |
659 |
} |
de213e5ee tcp: tcp_synack_o... |
660 |
if (likely(ireq->tstamp_ok)) { |
33ad798c9 tcp: options clea... |
661 |
opts->options |= OPTION_TS; |
95a22caee tcp: randomize tc... |
662 |
opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off; |
33ad798c9 tcp: options clea... |
663 |
opts->tsecr = req->ts_recent; |
4957faade TCPCT part 1g: Re... |
664 |
remaining -= TCPOLEN_TSTAMP_ALIGNED; |
33ad798c9 tcp: options clea... |
665 666 667 |
} if (likely(ireq->sack_ok)) { opts->options |= OPTION_SACK_ADVERTISE; |
de213e5ee tcp: tcp_synack_o... |
668 |
if (unlikely(!ireq->tstamp_ok)) |
4957faade TCPCT part 1g: Re... |
669 |
remaining -= TCPOLEN_SACKPERM_ALIGNED; |
33ad798c9 tcp: options clea... |
670 |
} |
7f9b838b7 tcp: RFC7413 opti... |
671 672 673 674 675 |
if (foc != NULL && foc->len >= 0) { u32 need = foc->len; need += foc->exp ? TCPOLEN_EXP_FASTOPEN_BASE : TCPOLEN_FASTOPEN_BASE; |
8336886f7 tcp: TCP Fast Ope... |
676 677 678 679 680 681 682 |
need = (need + 3) & ~3U; /* Align to 32 bits */ if (remaining >= need) { opts->options |= OPTION_FAST_OPEN_COOKIE; opts->fastopen_cookie = foc; remaining -= need; } } |
1a2c6181c tcp: Remove TCPCT |
683 |
|
60e2a7780 tcp: TCP experime... |
684 |
smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining); |
4957faade TCPCT part 1g: Re... |
685 |
return MAX_TCP_OPTION_SPACE - remaining; |
33ad798c9 tcp: options clea... |
686 |
} |
67edfef78 TCP: Add comments... |
687 688 689 |
/* Compute TCP options for ESTABLISHED sockets. This is not the * final wire format yet. */ |
95c961747 net: cleanup unsi... |
690 |
static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb, |
33ad798c9 tcp: options clea... |
691 |
struct tcp_out_options *opts, |
cf533ea53 tcp: add const qu... |
692 693 |
struct tcp_md5sig_key **md5) { |
33ad798c9 tcp: options clea... |
694 |
struct tcp_sock *tp = tcp_sk(sk); |
95c961747 net: cleanup unsi... |
695 |
unsigned int size = 0; |
cabeccbd1 tcp: kill eff_sac... |
696 |
unsigned int eff_sacks; |
33ad798c9 tcp: options clea... |
697 |
|
5843ef421 tcp: Always set o... |
698 |
opts->options = 0; |
8c2320e84 tcp: md5: only ca... |
699 |
*md5 = NULL; |
33ad798c9 tcp: options clea... |
700 |
#ifdef CONFIG_TCP_MD5SIG |
921f9a0f2 tcp: convert tcp_... |
701 |
if (static_branch_unlikely(&tcp_md5_needed) && |
6015c71e6 tcp: md5: add tcp... |
702 |
rcu_access_pointer(tp->md5sig_info)) { |
8c2320e84 tcp: md5: only ca... |
703 704 705 706 707 |
*md5 = tp->af_specific->md5_lookup(sk, sk); if (*md5) { opts->options |= OPTION_MD5; size += TCPOLEN_MD5SIG_ALIGNED; } |
33ad798c9 tcp: options clea... |
708 |
} |
33ad798c9 tcp: options clea... |
709 710 711 712 |
#endif if (likely(tp->rx_opt.tstamp_ok)) { opts->options |= OPTION_TS; |
7faee5c0d tcp: remove TCP_S... |
713 |
opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0; |
33ad798c9 tcp: options clea... |
714 715 716 |
opts->tsecr = tp->rx_opt.ts_recent; size += TCPOLEN_TSTAMP_ALIGNED; } |
cabeccbd1 tcp: kill eff_sac... |
717 718 |
eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; if (unlikely(eff_sacks)) { |
95c961747 net: cleanup unsi... |
719 |
const unsigned int remaining = MAX_TCP_OPTION_SPACE - size; |
33ad798c9 tcp: options clea... |
720 |
opts->num_sack_blocks = |
95c961747 net: cleanup unsi... |
721 |
min_t(unsigned int, eff_sacks, |
33ad798c9 tcp: options clea... |
722 723 |
(remaining - TCPOLEN_SACK_BASE_ALIGNED) / TCPOLEN_SACK_PERBLOCK); |
2fc7d173e tcp: md5: fix pot... |
724 725 726 |
if (likely(opts->num_sack_blocks)) size += TCPOLEN_SACK_BASE_ALIGNED + opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; |
33ad798c9 tcp: options clea... |
727 728 729 |
} return size; |
40efc6fa1 [TCP]: less inline's |
730 |
} |
1da177e4c Linux-2.6.12-rc2 |
731 |
|
46d3ceabd tcp: TCP Small Qu... |
732 733 734 735 736 737 738 739 740 |
/* TCP SMALL QUEUES (TSQ) * * TSQ goal is to keep small amount of skbs per tcp flow in tx queues (qdisc+dev) * to reduce RTT and bufferbloat. * We do this using a special skb destructor (tcp_wfree). * * Its important tcp_wfree() can be replaced by sock_wfree() in the event skb * needs to be reallocated in a driver. |
8e3bff96a net: more spellin... |
741 |
* The invariant being skb->truesize subtracted from sk->sk_wmem_alloc |
46d3ceabd tcp: TCP Small Qu... |
742 743 744 745 746 747 748 749 750 751 |
* * Since transmit from skb destructor is forbidden, we use a tasklet * to process all sockets that eventually need to send more skbs. * We use one tasklet per cpu, with its own queue of sockets. */ struct tsq_tasklet { struct tasklet_struct tasklet; struct list_head head; /* queue of tcp sockets */ }; static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet); |
73a6bab5a tcp: switch pacin... |
752 |
static void tcp_tsq_write(struct sock *sk) |
6f458dfb4 tcp: improve late... |
753 754 755 |
{ if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | |
f9616c35a tcp: implement TS... |
756 757 758 759 |
TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) { struct tcp_sock *tp = tcp_sk(sk); if (tp->lost_out > tp->retrans_out && |
3a91d29f2 tcp: do tcp_mstam... |
760 761 |
tp->snd_cwnd > tcp_packets_in_flight(tp)) { tcp_mstamp_refresh(tp); |
f9616c35a tcp: implement TS... |
762 |
tcp_xmit_retransmit_queue(sk); |
3a91d29f2 tcp: do tcp_mstam... |
763 |
} |
f9616c35a tcp: implement TS... |
764 765 |
tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle, |
bf06200e7 tcp: tsq: fix non... |
766 |
0, GFP_ATOMIC); |
f9616c35a tcp: implement TS... |
767 |
} |
6f458dfb4 tcp: improve late... |
768 |
} |
73a6bab5a tcp: switch pacin... |
769 770 771 772 773 774 775 776 777 778 |
static void tcp_tsq_handler(struct sock *sk) { bh_lock_sock(sk); if (!sock_owned_by_user(sk)) tcp_tsq_write(sk); else if (!test_and_set_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) sock_hold(sk); bh_unlock_sock(sk); } |
46d3ceabd tcp: TCP Small Qu... |
779 |
/* |
8e3bff96a net: more spellin... |
780 |
* One tasklet per cpu tries to send more skbs. |
46d3ceabd tcp: TCP Small Qu... |
781 |
* We run in tasklet context but need to disable irqs when |
8e3bff96a net: more spellin... |
782 |
* transferring tsq->head because tcp_wfree() might |
46d3ceabd tcp: TCP Small Qu... |
783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 |
* interrupt us (non NAPI drivers) */ static void tcp_tasklet_func(unsigned long data) { struct tsq_tasklet *tsq = (struct tsq_tasklet *)data; LIST_HEAD(list); unsigned long flags; struct list_head *q, *n; struct tcp_sock *tp; struct sock *sk; local_irq_save(flags); list_splice_init(&tsq->head, &list); local_irq_restore(flags); list_for_each_safe(q, n, &list) { tp = list_entry(q, struct tcp_sock, tsq_node); list_del(&tp->tsq_node); sk = (struct sock *)tp; |
0a9648f12 tcp: add a missin... |
803 |
smp_mb__before_atomic(); |
7aa5470c2 tcp: tsq: move ts... |
804 |
clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags); |
73a6bab5a tcp: switch pacin... |
805 |
tcp_tsq_handler(sk); |
46d3ceabd tcp: TCP Small Qu... |
806 807 808 |
sk_free(sk); } } |
40fc3423b tcp: tsq: add tsq... |
809 810 811 812 |
#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \ TCPF_WRITE_TIMER_DEFERRED | \ TCPF_DELACK_TIMER_DEFERRED | \ TCPF_MTU_REDUCED_DEFERRED) |
46d3ceabd tcp: TCP Small Qu... |
813 814 815 816 817 818 819 820 821 |
/** * tcp_release_cb - tcp release_sock() callback * @sk: socket * * called from release_sock() to perform protocol dependent * actions before socket release. */ void tcp_release_cb(struct sock *sk) { |
6f458dfb4 tcp: improve late... |
822 |
unsigned long flags, nflags; |
46d3ceabd tcp: TCP Small Qu... |
823 |
|
6f458dfb4 tcp: improve late... |
824 825 |
/* perform an atomic operation only if at least one flag is set */ do { |
7aa5470c2 tcp: tsq: move ts... |
826 |
flags = sk->sk_tsq_flags; |
6f458dfb4 tcp: improve late... |
827 828 829 |
if (!(flags & TCP_DEFERRED_ALL)) return; nflags = flags & ~TCP_DEFERRED_ALL; |
7aa5470c2 tcp: tsq: move ts... |
830 |
} while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags); |
6f458dfb4 tcp: improve late... |
831 |
|
73a6bab5a tcp: switch pacin... |
832 833 834 835 |
if (flags & TCPF_TSQ_DEFERRED) { tcp_tsq_write(sk); __sock_put(sk); } |
c3f9b0184 tcp: tcp_release_... |
836 837 838 839 840 841 842 843 844 845 |
/* Here begins the tricky part : * We are called from release_sock() with : * 1) BH disabled * 2) sk_lock.slock spinlock held * 3) socket owned by us (sk->sk_lock.owned == 1) * * But following code is meant to be called from BH handlers, * so we should keep BH disabled, but early release socket ownership */ sock_release_ownership(sk); |
40fc3423b tcp: tsq: add tsq... |
846 |
if (flags & TCPF_WRITE_TIMER_DEFERRED) { |
6f458dfb4 tcp: improve late... |
847 |
tcp_write_timer_handler(sk); |
144d56e91 tcp: fix possible... |
848 849 |
__sock_put(sk); } |
40fc3423b tcp: tsq: add tsq... |
850 |
if (flags & TCPF_DELACK_TIMER_DEFERRED) { |
6f458dfb4 tcp: improve late... |
851 |
tcp_delack_timer_handler(sk); |
144d56e91 tcp: fix possible... |
852 853 |
__sock_put(sk); } |
40fc3423b tcp: tsq: add tsq... |
854 |
if (flags & TCPF_MTU_REDUCED_DEFERRED) { |
4fab90719 tcp: fix tcp_rele... |
855 |
inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); |
144d56e91 tcp: fix possible... |
856 857 |
__sock_put(sk); } |
46d3ceabd tcp: TCP Small Qu... |
858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 |
} EXPORT_SYMBOL(tcp_release_cb); void __init tcp_tasklet_init(void) { int i; for_each_possible_cpu(i) { struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i); INIT_LIST_HEAD(&tsq->head); tasklet_init(&tsq->tasklet, tcp_tasklet_func, (unsigned long)tsq); } } /* * Write buffer destructor automatically called from kfree_skb. |
8e3bff96a net: more spellin... |
877 |
* We can't xmit new skbs from this context, as we might already |
46d3ceabd tcp: TCP Small Qu... |
878 879 |
* hold qdisc lock. */ |
d6a4a1041 tcp: GSO should b... |
880 |
void tcp_wfree(struct sk_buff *skb) |
46d3ceabd tcp: TCP Small Qu... |
881 882 883 |
{ struct sock *sk = skb->sk; struct tcp_sock *tp = tcp_sk(sk); |
408f0a6c2 tcp: tsq: remove ... |
884 |
unsigned long flags, nval, oval; |
9b462d02d tcp: TCP Small Qu... |
885 886 887 888 |
/* Keep one reference on sk_wmem_alloc. * Will be released by sk_free() from here or tcp_tasklet_func() */ |
14afee4b6 net: convert sock... |
889 |
WARN_ON(refcount_sub_and_test(skb->truesize - 1, &sk->sk_wmem_alloc)); |
9b462d02d tcp: TCP Small Qu... |
890 891 892 893 894 895 896 897 |
/* If this softirq is serviced by ksoftirqd, we are likely under stress. * Wait until our queues (qdisc + devices) are drained. * This gives : * - less callbacks to tcp_write_xmit(), reducing stress (batches) * - chance for incoming ACK (processed by another cpu maybe) * to migrate this flow (skb->ooo_okay will be eventually set) */ |
14afee4b6 net: convert sock... |
898 |
if (refcount_read(&sk->sk_wmem_alloc) >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) |
9b462d02d tcp: TCP Small Qu... |
899 |
goto out; |
46d3ceabd tcp: TCP Small Qu... |
900 |
|
7aa5470c2 tcp: tsq: move ts... |
901 |
for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) { |
46d3ceabd tcp: TCP Small Qu... |
902 |
struct tsq_tasklet *tsq; |
a9b204d15 tcp: tsq: avoid o... |
903 |
bool empty; |
46d3ceabd tcp: TCP Small Qu... |
904 |
|
408f0a6c2 tcp: tsq: remove ... |
905 906 |
if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED)) goto out; |
73a6bab5a tcp: switch pacin... |
907 |
nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED; |
7aa5470c2 tcp: tsq: move ts... |
908 |
nval = cmpxchg(&sk->sk_tsq_flags, oval, nval); |
408f0a6c2 tcp: tsq: remove ... |
909 910 |
if (nval != oval) continue; |
46d3ceabd tcp: TCP Small Qu... |
911 912 |
/* queue this socket to tasklet queue */ local_irq_save(flags); |
903ceff7c net: Replace get_... |
913 |
tsq = this_cpu_ptr(&tsq_tasklet); |
a9b204d15 tcp: tsq: avoid o... |
914 |
empty = list_empty(&tsq->head); |
46d3ceabd tcp: TCP Small Qu... |
915 |
list_add(&tp->tsq_node, &tsq->head); |
a9b204d15 tcp: tsq: avoid o... |
916 917 |
if (empty) tasklet_schedule(&tsq->tasklet); |
46d3ceabd tcp: TCP Small Qu... |
918 |
local_irq_restore(flags); |
9b462d02d tcp: TCP Small Qu... |
919 |
return; |
46d3ceabd tcp: TCP Small Qu... |
920 |
} |
9b462d02d tcp: TCP Small Qu... |
921 922 |
out: sk_free(sk); |
46d3ceabd tcp: TCP Small Qu... |
923 |
} |
73a6bab5a tcp: switch pacin... |
924 925 |
/* Note: Called under soft irq. * We can call TCP stack right away, unless socket is owned by user. |
218af599f tcp: internal imp... |
926 927 928 929 930 |
*/ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer) { struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer); struct sock *sk = (struct sock *)tp; |
218af599f tcp: internal imp... |
931 |
|
73a6bab5a tcp: switch pacin... |
932 933 |
tcp_tsq_handler(sk); sock_put(sk); |
218af599f tcp: internal imp... |
934 |
|
218af599f tcp: internal imp... |
935 936 |
return HRTIMER_NORESTART; } |
a7a256306 tcp: mitigate sch... |
937 938 |
static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb, u64 prior_wstamp) |
e2080072e tcp: new list for... |
939 |
{ |
ab408b6dc tcp: switch tcp a... |
940 |
struct tcp_sock *tp = tcp_sk(sk); |
ab408b6dc tcp: switch tcp a... |
941 |
if (sk->sk_pacing_status != SK_PACING_NONE) { |
76a9ebe81 net: extend sk_pa... |
942 |
unsigned long rate = sk->sk_pacing_rate; |
ab408b6dc tcp: switch tcp a... |
943 944 945 946 947 |
/* Original sch_fq does not pace first 10 MSS * Note that tp->data_segs_out overflows after 2^32 packets, * this is a minor annoyance. */ |
76a9ebe81 net: extend sk_pa... |
948 |
if (rate != ~0UL && rate && tp->data_segs_out >= 10) { |
a7a256306 tcp: mitigate sch... |
949 950 951 952 953 954 |
u64 len_ns = div64_ul((u64)skb->len * NSEC_PER_SEC, rate); u64 credit = tp->tcp_wstamp_ns - prior_wstamp; /* take into account OS jitter */ len_ns -= min_t(u64, len_ns / 2, credit); tp->tcp_wstamp_ns += len_ns; |
ab408b6dc tcp: switch tcp a... |
955 956 |
} } |
e2080072e tcp: new list for... |
957 958 |
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); } |
1da177e4c Linux-2.6.12-rc2 |
959 960 961 962 963 964 965 966 967 968 969 |
/* This routine actually transmits TCP packets queued in by * tcp_do_sendmsg(). This is used by both the initial * transmission and possible later retransmissions. * All SKB's seen here are completely headerless. It is our * job to build the TCP header, and pass the packet down to * IP so it can do the same plus pass the packet off to the * device. * * We are working here with either a clone of the original * SKB, or a fresh unique copy made by the retransmit engine. */ |
2987babb6 tcp: helpers to s... |
970 971 |
static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask, u32 rcv_nxt) |
1da177e4c Linux-2.6.12-rc2 |
972 |
{ |
dfb4b9dce [TCP] Vegas: time... |
973 974 975 976 |
const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet; struct tcp_sock *tp; struct tcp_skb_cb *tcb; |
33ad798c9 tcp: options clea... |
977 |
struct tcp_out_options opts; |
95c961747 net: cleanup unsi... |
978 |
unsigned int tcp_options_size, tcp_header_size; |
8c72c65b4 tcp: update skb->... |
979 |
struct sk_buff *oskb = NULL; |
cfb6eeb4c [TCP]: MD5 Signat... |
980 |
struct tcp_md5sig_key *md5; |
dfb4b9dce [TCP] Vegas: time... |
981 |
struct tcphdr *th; |
a7a256306 tcp: mitigate sch... |
982 |
u64 prior_wstamp; |
dfb4b9dce [TCP] Vegas: time... |
983 984 985 |
int err; BUG_ON(!skb || !tcp_skb_pcount(skb)); |
6f094b9ec tcp: add in_fligh... |
986 |
tp = tcp_sk(sk); |
7f12422c4 tcp: always times... |
987 988 989 |
prior_wstamp = tp->tcp_wstamp_ns; tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache); skb->skb_mstamp_ns = tp->tcp_wstamp_ns; |
ccdbb6e96 tcp: tcp_transmit... |
990 |
if (clone_it) { |
6f094b9ec tcp: add in_fligh... |
991 992 |
TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq - tp->snd_una; |
8c72c65b4 tcp: update skb->... |
993 |
oskb = skb; |
e2080072e tcp: new list for... |
994 995 996 997 998 999 1000 |
tcp_skb_tsorted_save(oskb) { if (unlikely(skb_cloned(oskb))) skb = pskb_copy(oskb, gfp_mask); else skb = skb_clone(oskb, gfp_mask); } tcp_skb_tsorted_restore(oskb); |
dfb4b9dce [TCP] Vegas: time... |
1001 1002 1003 |
if (unlikely(!skb)) return -ENOBUFS; } |
5f6188a80 tcp: do not chang... |
1004 |
|
dfb4b9dce [TCP] Vegas: time... |
1005 |
inet = inet_sk(sk); |
dfb4b9dce [TCP] Vegas: time... |
1006 |
tcb = TCP_SKB_CB(skb); |
33ad798c9 tcp: options clea... |
1007 |
memset(&opts, 0, sizeof(opts)); |
1da177e4c Linux-2.6.12-rc2 |
1008 |
|
051ba6744 tcp: force a PSH ... |
1009 |
if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) { |
33ad798c9 tcp: options clea... |
1010 |
tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); |
051ba6744 tcp: force a PSH ... |
1011 |
} else { |
33ad798c9 tcp: options clea... |
1012 1013 |
tcp_options_size = tcp_established_options(sk, skb, &opts, &md5); |
051ba6744 tcp: force a PSH ... |
1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 |
/* Force a PSH flag on all (GSO) packets to expedite GRO flush * at receiver : This slightly improve GRO performance. * Note that we do not force the PSH flag for non GSO packets, * because they might be sent under high congestion events, * and in this case it is better to delay the delivery of 1-MSS * packets and thus the corresponding ACK packet that would * release the following packet. */ if (tcp_skb_pcount(skb) > 1) tcb->tcp_flags |= TCPHDR_PSH; } |
33ad798c9 tcp: options clea... |
1025 |
tcp_header_size = tcp_options_size + sizeof(struct tcphdr); |
e905a9eda [NET] IPV4: Fix w... |
1026 |
|
547669d48 tcp: xps: fix reo... |
1027 |
/* if no packet is in qdisc/device queue, then allow XPS to select |
b2532eb9a tcp: fix ooo_okay... |
1028 |
* another queue. We can be called from tcp_tsq_handler() |
73a6bab5a tcp: switch pacin... |
1029 |
* which holds one reference to sk. |
b2532eb9a tcp: fix ooo_okay... |
1030 1031 1032 |
* * TODO: Ideally, in-flight pure ACK packets should not matter here. * One way to get this would be to set skb->truesize = 2 on them. |
547669d48 tcp: xps: fix reo... |
1033 |
*/ |
b2532eb9a tcp: fix ooo_okay... |
1034 |
skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1); |
dfb4b9dce [TCP] Vegas: time... |
1035 |
|
38ab52e8e tcp: clear pfmema... |
1036 1037 1038 1039 1040 1041 |
/* If we had to use memory reserve to allocate this skb, * this might cause drops if packet is looped back : * Other socket might not have SOCK_MEMALLOC. * Packets not looped back do not care about pfmemalloc. */ skb->pfmemalloc = 0; |
aa8223c7b [SK_BUFF]: Introd... |
1042 1043 |
skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); |
46d3ceabd tcp: TCP Small Qu... |
1044 1045 1046 |
skb_orphan(skb); skb->sk = sk; |
1d2077ac0 net: add __sock_w... |
1047 |
skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree; |
b73c3d0e4 net: Save TX flow... |
1048 |
skb_set_hash_from_sk(skb, sk); |
14afee4b6 net: convert sock... |
1049 |
refcount_add(skb->truesize, &sk->sk_wmem_alloc); |
dfb4b9dce [TCP] Vegas: time... |
1050 |
|
c3a2e8370 tcp: replace dst_... |
1051 |
skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm); |
dfb4b9dce [TCP] Vegas: time... |
1052 |
/* Build TCP header and checksum it. */ |
ea1627c20 tcp: minor optimi... |
1053 |
th = (struct tcphdr *)skb->data; |
c720c7e83 inet: rename some... |
1054 1055 |
th->source = inet->inet_sport; th->dest = inet->inet_dport; |
dfb4b9dce [TCP] Vegas: time... |
1056 |
th->seq = htonl(tcb->seq); |
2987babb6 tcp: helpers to s... |
1057 |
th->ack_seq = htonl(rcv_nxt); |
df7a3b07c [TCP] net/ipv4/tc... |
1058 |
*(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | |
4de075e04 tcp: rename tcp_s... |
1059 |
tcb->tcp_flags); |
dfb4b9dce [TCP] Vegas: time... |
1060 |
|
dfb4b9dce [TCP] Vegas: time... |
1061 1062 |
th->check = 0; th->urg_ptr = 0; |
1da177e4c Linux-2.6.12-rc2 |
1063 |
|
33f5f57ee tcp: kill pointle... |
1064 |
/* The urg_mode check is necessary during a below snd_una win probe */ |
7691367d7 tcp: Always set u... |
1065 1066 1067 1068 1069 |
if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) { if (before(tp->snd_up, tcb->seq + 0x10000)) { th->urg_ptr = htons(tp->snd_up - tcb->seq); th->urg = 1; } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) { |
0eae88f31 net: Fix various ... |
1070 |
th->urg_ptr = htons(0xFFFF); |
7691367d7 tcp: Always set u... |
1071 1072 |
th->urg = 1; } |
dfb4b9dce [TCP] Vegas: time... |
1073 |
} |
1da177e4c Linux-2.6.12-rc2 |
1074 |
|
bd0388ae7 TCPCT part 1f: In... |
1075 |
tcp_options_write((__be32 *)(th + 1), tp, &opts); |
51466a754 tcp: fill shinfo-... |
1076 |
skb_shinfo(skb)->gso_type = sk->sk_gso_type; |
ea1627c20 tcp: minor optimi... |
1077 1078 1079 1080 1081 1082 1083 1084 1085 |
if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) { th->window = htons(tcp_select_window(sk)); tcp_ecn_send(sk, skb, th, tcp_header_size); } else { /* RFC1323: The window in SYN & SYN/ACK segments * is never scaled. */ th->window = htons(min(tp->rcv_wnd, 65535U)); } |
cfb6eeb4c [TCP]: MD5 Signat... |
1086 1087 1088 |
#ifdef CONFIG_TCP_MD5SIG /* Calculate the MD5 hash, as we have all we need now */ if (md5) { |
a465419b1 net: Introduce sk... |
1089 |
sk_nocaps_add(sk, NETIF_F_GSO_MASK); |
bd0388ae7 TCPCT part 1f: In... |
1090 |
tp->af_specific->calc_md5_hash(opts.hash_location, |
39f8e58e5 tcp: md5: remove ... |
1091 |
md5, sk, skb); |
cfb6eeb4c [TCP]: MD5 Signat... |
1092 1093 |
} #endif |
bb2962461 inet: Remove unus... |
1094 |
icsk->icsk_af_ops->send_check(sk, skb); |
1da177e4c Linux-2.6.12-rc2 |
1095 |
|
4de075e04 tcp: rename tcp_s... |
1096 |
if (likely(tcb->tcp_flags & TCPHDR_ACK)) |
27cde44a2 tcp: do not cance... |
1097 |
tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt); |
1da177e4c Linux-2.6.12-rc2 |
1098 |
|
a44d6eacd tcp: Add RFC4898 ... |
1099 |
if (skb->len != tcp_header_size) { |
cf533ea53 tcp: add const qu... |
1100 |
tcp_event_data_sent(tp, sk); |
a44d6eacd tcp: Add RFC4898 ... |
1101 |
tp->data_segs_out += tcp_skb_pcount(skb); |
ba113c3aa tcp: add data byt... |
1102 |
tp->bytes_sent += skb->len - tcp_header_size; |
a44d6eacd tcp: Add RFC4898 ... |
1103 |
} |
1da177e4c Linux-2.6.12-rc2 |
1104 |
|
bd37a0885 [TCP]: SNMPv2 tcp... |
1105 |
if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) |
aa2ea0586 tcp: fix outsegs ... |
1106 1107 |
TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb)); |
1da177e4c Linux-2.6.12-rc2 |
1108 |
|
2efd055c5 tcp: add tcpi_seg... |
1109 |
tp->segs_out += tcp_skb_pcount(skb); |
f69ad292c tcp: fill shinfo-... |
1110 |
/* OK, its time to fill skb_shinfo(skb)->gso_{segs|size} */ |
cd7d8498c tcp: change tcp_s... |
1111 |
skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb); |
f69ad292c tcp: fill shinfo-... |
1112 |
skb_shinfo(skb)->gso_size = tcp_skb_mss(skb); |
cd7d8498c tcp: change tcp_s... |
1113 |
|
d3edd06ea tcp: provide earl... |
1114 |
/* Leave earliest departure time in skb->tstamp (skb->skb_mstamp_ns) */ |
971f10eca tcp: better TCP_S... |
1115 1116 1117 1118 |
/* Cleanup our debris for IP stacks */ memset(skb->cb, 0, max(sizeof(struct inet_skb_parm), sizeof(struct inet6_skb_parm))); |
a842fe142 tcp: add optional... |
1119 |
tcp_add_tx_delay(skb, tp); |
b0270e910 ipv4: add a sock ... |
1120 |
err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); |
7faee5c0d tcp: remove TCP_S... |
1121 |
|
8c72c65b4 tcp: update skb->... |
1122 1123 1124 1125 |
if (unlikely(err > 0)) { tcp_enter_cwr(sk); err = net_xmit_eval(err); } |
fc2257991 tcp: fix data del... |
1126 |
if (!err && oskb) { |
a7a256306 tcp: mitigate sch... |
1127 |
tcp_update_skb_after_send(sk, oskb, prior_wstamp); |
fc2257991 tcp: fix data del... |
1128 1129 |
tcp_rate_skb_sent(sk, oskb); } |
8c72c65b4 tcp: update skb->... |
1130 |
return err; |
1da177e4c Linux-2.6.12-rc2 |
1131 |
} |
2987babb6 tcp: helpers to s... |
1132 1133 1134 1135 1136 1137 |
static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask) { return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask, tcp_sk(sk)->rcv_nxt); } |
67edfef78 TCP: Add comments... |
1138 |
/* This routine just queues the buffer for sending. |
1da177e4c Linux-2.6.12-rc2 |
1139 1140 1141 1142 1143 1144 1145 1146 1147 |
* * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, * otherwise socket can stall. */ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); /* Advance write_seq and place onto the write_queue. */ |
0f3174645 tcp: annotate tp-... |
1148 |
WRITE_ONCE(tp->write_seq, TCP_SKB_CB(skb)->end_seq); |
f4a775d14 net: introduce __... |
1149 |
__skb_header_release(skb); |
fe067e8ab [TCP]: Abstract o... |
1150 |
tcp_add_write_queue_tail(sk, skb); |
ab4e846a8 tcp: annotate sk-... |
1151 |
sk_wmem_queued_add(sk, skb->truesize); |
3ab224be6 [NET] CORE: Intro... |
1152 |
sk_mem_charge(sk, skb->truesize); |
1da177e4c Linux-2.6.12-rc2 |
1153 |
} |
67edfef78 TCP: Add comments... |
1154 |
/* Initialize TSO segments for a packet. */ |
5bbb432c8 tcp: tcp_set_skb_... |
1155 |
static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now) |
f6302d1d7 [TCP]: Move send ... |
1156 |
{ |
4a64fd6cc tcp: remove dead ... |
1157 |
if (skb->len <= mss_now) { |
f6302d1d7 [TCP]: Move send ... |
1158 1159 1160 |
/* Avoid the costly divide in the normal * non-TSO case. */ |
cd7d8498c tcp: change tcp_s... |
1161 |
tcp_skb_pcount_set(skb, 1); |
f69ad292c tcp: fill shinfo-... |
1162 |
TCP_SKB_CB(skb)->tcp_gso_size = 0; |
f6302d1d7 [TCP]: Move send ... |
1163 |
} else { |
cd7d8498c tcp: change tcp_s... |
1164 |
tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now)); |
f69ad292c tcp: fill shinfo-... |
1165 |
TCP_SKB_CB(skb)->tcp_gso_size = mss_now; |
1da177e4c Linux-2.6.12-rc2 |
1166 1167 |
} } |
797108d13 tcp: add helper f... |
1168 1169 1170 |
/* Pcount in the middle of the write queue got changed, we need to do various * tweaks to fix counters */ |
cf533ea53 tcp: add const qu... |
1171 |
static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr) |
797108d13 tcp: add helper f... |
1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 |
{ struct tcp_sock *tp = tcp_sk(sk); tp->packets_out -= decr; if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) tp->sacked_out -= decr; if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) tp->retrans_out -= decr; if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) tp->lost_out -= decr; /* Reno case is special. Sigh... */ if (tcp_is_reno(tp) && decr > 0) tp->sacked_out -= min_t(u32, tp->sacked_out, decr); |
797108d13 tcp: add helper f... |
1187 1188 |
if (tp->lost_skb_hint && before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) && |
713bafea9 tcp: retire FACK ... |
1189 |
(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) |
797108d13 tcp: add helper f... |
1190 1191 1192 1193 |
tp->lost_cnt_hint -= decr; tcp_verify_left_out(tp); } |
0a2cf20c3 tcp: remove SKBTX... |
1194 1195 1196 1197 1198 |
static bool tcp_has_tx_tstamp(const struct sk_buff *skb) { return TCP_SKB_CB(skb)->txstamp_ack || (skb_shinfo(skb)->tx_flags & SKBTX_ANY_TSTAMP); } |
490cc7d03 net-timestamp: fi... |
1199 1200 1201 |
static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2) { struct skb_shared_info *shinfo = skb_shinfo(skb); |
0a2cf20c3 tcp: remove SKBTX... |
1202 |
if (unlikely(tcp_has_tx_tstamp(skb)) && |
490cc7d03 net-timestamp: fi... |
1203 1204 1205 1206 1207 1208 1209 |
!before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) { struct skb_shared_info *shinfo2 = skb_shinfo(skb2); u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP; shinfo->tx_flags &= ~tsflags; shinfo2->tx_flags |= tsflags; swap(shinfo->tskey, shinfo2->tskey); |
b51e13faf tcp: Carry txstam... |
1210 1211 |
TCP_SKB_CB(skb2)->txstamp_ack = TCP_SKB_CB(skb)->txstamp_ack; TCP_SKB_CB(skb)->txstamp_ack = 0; |
490cc7d03 net-timestamp: fi... |
1212 1213 |
} } |
a166140e8 tcp: Handle eor b... |
1214 1215 1216 1217 1218 |
static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2) { TCP_SKB_CB(skb2)->eor = TCP_SKB_CB(skb)->eor; TCP_SKB_CB(skb)->eor = 0; } |
75c119afe tcp: implement rb... |
1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 |
/* Insert buff after skb on the write or rtx queue of sk. */ static void tcp_insert_write_queue_after(struct sk_buff *skb, struct sk_buff *buff, struct sock *sk, enum tcp_queue tcp_queue) { if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE) __skb_queue_after(&sk->sk_write_queue, skb, buff); else tcp_rbtree_insert(&sk->tcp_rtx_queue, buff); } |
1da177e4c Linux-2.6.12-rc2 |
1230 1231 |
/* Function to create two new TCP segments. Shrinks the given segment * to the specified size and appends a new segment with the rest of the |
e905a9eda [NET] IPV4: Fix w... |
1232 |
* packet to the list. This won't be called frequently, I hope. |
1da177e4c Linux-2.6.12-rc2 |
1233 1234 |
* Remember, these are still headerless SKBs at this point. */ |
75c119afe tcp: implement rb... |
1235 1236 |
int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, struct sk_buff *skb, u32 len, |
6cc55e096 tcp: add gfp para... |
1237 |
unsigned int mss_now, gfp_t gfp) |
1da177e4c Linux-2.6.12-rc2 |
1238 1239 1240 |
{ struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; |
6475be16f [TCP]: Keep TSO e... |
1241 |
int nsize, old_factor; |
b617158dc tcp: be more care... |
1242 |
long limit; |
b60b49ea6 [TCP]: Account sk... |
1243 |
int nlen; |
9ce014610 tcp: get rid of t... |
1244 |
u8 flags; |
1da177e4c Linux-2.6.12-rc2 |
1245 |
|
2fceec133 tcp: len check is... |
1246 1247 |
if (WARN_ON(len > skb->len)) return -EINVAL; |
6a438bbe6 [TCP]: speed up S... |
1248 |
|
1da177e4c Linux-2.6.12-rc2 |
1249 1250 1251 |
nsize = skb_headlen(skb) - len; if (nsize < 0) nsize = 0; |
b617158dc tcp: be more care... |
1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 |
/* tcp_sendmsg() can overshoot sk_wmem_queued by one full size skb. * We need some allowance to not penalize applications setting small * SO_SNDBUF values. * Also allow first and last skb in retransmit queue to be split. */ limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE); if (unlikely((sk->sk_wmem_queued >> 1) > limit && tcp_queue != TCP_FRAG_IN_WRITE_QUEUE && skb != tcp_rtx_queue_head(sk) && skb != tcp_rtx_queue_tail(sk))) { |
f070ef2ac tcp: tcp_fragment... |
1262 1263 1264 |
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG); return -ENOMEM; } |
6cc55e096 tcp: add gfp para... |
1265 |
if (skb_unclone(skb, gfp)) |
1da177e4c Linux-2.6.12-rc2 |
1266 1267 1268 |
return -ENOMEM; /* Get a new skb... force flag on. */ |
eb9344781 tcp: add a force_... |
1269 |
buff = sk_stream_alloc_skb(sk, nsize, gfp, true); |
51456b291 ipv4: coding styl... |
1270 |
if (!buff) |
1da177e4c Linux-2.6.12-rc2 |
1271 |
return -ENOMEM; /* We'll just try again later. */ |
414776621 net/tls: prevent ... |
1272 |
skb_copy_decrypted(buff, skb); |
ef5cb9738 [TCP]: Fix truesi... |
1273 |
|
ab4e846a8 tcp: annotate sk-... |
1274 |
sk_wmem_queued_add(sk, buff->truesize); |
3ab224be6 [NET] CORE: Intro... |
1275 |
sk_mem_charge(sk, buff->truesize); |
b60b49ea6 [TCP]: Account sk... |
1276 1277 1278 |
nlen = skb->len - len - nsize; buff->truesize += nlen; skb->truesize -= nlen; |
1da177e4c Linux-2.6.12-rc2 |
1279 1280 1281 1282 1283 1284 1285 |
/* Correct the sequence numbers. */ TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len; TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; /* PSH and FIN should only be set in the second packet. */ |
4de075e04 tcp: rename tcp_s... |
1286 1287 1288 |
flags = TCP_SKB_CB(skb)->tcp_flags; TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); TCP_SKB_CB(buff)->tcp_flags = flags; |
e14c3caf6 [TCP]: Handle SAC... |
1289 |
TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; |
a166140e8 tcp: Handle eor b... |
1290 |
tcp_skb_fragment_eor(skb, buff); |
1da177e4c Linux-2.6.12-rc2 |
1291 |
|
98be9b120 tcp: remove dead ... |
1292 |
skb_split(skb, buff, len); |
1da177e4c Linux-2.6.12-rc2 |
1293 |
|
98be9b120 tcp: remove dead ... |
1294 |
buff->ip_summed = CHECKSUM_PARTIAL; |
1da177e4c Linux-2.6.12-rc2 |
1295 |
|
a61bbcf28 [NET]: Store skb-... |
1296 |
buff->tstamp = skb->tstamp; |
490cc7d03 net-timestamp: fi... |
1297 |
tcp_fragment_tstamp(skb, buff); |
1da177e4c Linux-2.6.12-rc2 |
1298 |
|
6475be16f [TCP]: Keep TSO e... |
1299 |
old_factor = tcp_skb_pcount(skb); |
1da177e4c Linux-2.6.12-rc2 |
1300 |
/* Fix up tso_factor for both original and new SKB. */ |
5bbb432c8 tcp: tcp_set_skb_... |
1301 1302 |
tcp_set_skb_tso_segs(skb, mss_now); tcp_set_skb_tso_segs(buff, mss_now); |
1da177e4c Linux-2.6.12-rc2 |
1303 |
|
b9f64820f tcp: track data d... |
1304 1305 |
/* Update delivered info for the new segment */ TCP_SKB_CB(buff)->tx = TCP_SKB_CB(skb)->tx; |
6475be16f [TCP]: Keep TSO e... |
1306 1307 1308 |
/* If this packet has been sent out already, we must * adjust the various packet counters. */ |
cf0b450cd [TCP]: Fix off by... |
1309 |
if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) { |
6475be16f [TCP]: Keep TSO e... |
1310 1311 |
int diff = old_factor - tcp_skb_pcount(skb) - tcp_skb_pcount(buff); |
1da177e4c Linux-2.6.12-rc2 |
1312 |
|
797108d13 tcp: add helper f... |
1313 1314 |
if (diff) tcp_adjust_pcount(sk, skb, diff); |
1da177e4c Linux-2.6.12-rc2 |
1315 1316 1317 |
} /* Link BUFF into the send queue. */ |
f4a775d14 net: introduce __... |
1318 |
__skb_header_release(buff); |
75c119afe tcp: implement rb... |
1319 |
tcp_insert_write_queue_after(skb, buff, sk, tcp_queue); |
f67971e68 tcp: tcp_fragment... |
1320 1321 |
if (tcp_queue == TCP_FRAG_IN_RTX_QUEUE) list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor); |
1da177e4c Linux-2.6.12-rc2 |
1322 1323 1324 |
return 0; } |
f4d016666 tcp: remove unnec... |
1325 1326 |
/* This is similar to __pskb_pull_tail(). The difference is that pulled * data is not copied, but immediately discarded. |
1da177e4c Linux-2.6.12-rc2 |
1327 |
*/ |
7162fb242 tcp: do not under... |
1328 |
static int __pskb_trim_head(struct sk_buff *skb, int len) |
1da177e4c Linux-2.6.12-rc2 |
1329 |
{ |
7b7fc97aa tcp: optimize som... |
1330 |
struct skb_shared_info *shinfo; |
1da177e4c Linux-2.6.12-rc2 |
1331 |
int i, k, eat; |
4fa48bf3c tcp: fix tcp_trim... |
1332 1333 1334 1335 1336 |
eat = min_t(int, len, skb_headlen(skb)); if (eat) { __skb_pull(skb, eat); len -= eat; if (!len) |
7162fb242 tcp: do not under... |
1337 |
return 0; |
4fa48bf3c tcp: fix tcp_trim... |
1338 |
} |
1da177e4c Linux-2.6.12-rc2 |
1339 1340 |
eat = len; k = 0; |
7b7fc97aa tcp: optimize som... |
1341 1342 1343 |
shinfo = skb_shinfo(skb); for (i = 0; i < shinfo->nr_frags; i++) { int size = skb_frag_size(&shinfo->frags[i]); |
9e903e085 net: add skb frag... |
1344 1345 |
if (size <= eat) { |
aff65da0f net: ipv4: conver... |
1346 |
skb_frag_unref(skb, i); |
9e903e085 net: add skb frag... |
1347 |
eat -= size; |
1da177e4c Linux-2.6.12-rc2 |
1348 |
} else { |
7b7fc97aa tcp: optimize som... |
1349 |
shinfo->frags[k] = shinfo->frags[i]; |
1da177e4c Linux-2.6.12-rc2 |
1350 |
if (eat) { |
b54c9d5bd net: Use skb_frag... |
1351 |
skb_frag_off_add(&shinfo->frags[k], eat); |
7b7fc97aa tcp: optimize som... |
1352 |
skb_frag_size_sub(&shinfo->frags[k], eat); |
1da177e4c Linux-2.6.12-rc2 |
1353 1354 1355 1356 1357 |
eat = 0; } k++; } } |
7b7fc97aa tcp: optimize som... |
1358 |
shinfo->nr_frags = k; |
1da177e4c Linux-2.6.12-rc2 |
1359 |
|
1da177e4c Linux-2.6.12-rc2 |
1360 1361 |
skb->data_len -= len; skb->len = skb->data_len; |
7162fb242 tcp: do not under... |
1362 |
return len; |
1da177e4c Linux-2.6.12-rc2 |
1363 |
} |
67edfef78 TCP: Add comments... |
1364 |
/* Remove acked data from a packet in the transmit queue. */ |
1da177e4c Linux-2.6.12-rc2 |
1365 1366 |
int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) { |
7162fb242 tcp: do not under... |
1367 |
u32 delta_truesize; |
14bbd6a56 net: Add skb_uncl... |
1368 |
if (skb_unclone(skb, GFP_ATOMIC)) |
1da177e4c Linux-2.6.12-rc2 |
1369 |
return -ENOMEM; |
7162fb242 tcp: do not under... |
1370 |
delta_truesize = __pskb_trim_head(skb, len); |
1da177e4c Linux-2.6.12-rc2 |
1371 1372 |
TCP_SKB_CB(skb)->seq += len; |
84fa7933a [NET]: Replace CH... |
1373 |
skb->ip_summed = CHECKSUM_PARTIAL; |
1da177e4c Linux-2.6.12-rc2 |
1374 |
|
7162fb242 tcp: do not under... |
1375 1376 |
if (delta_truesize) { skb->truesize -= delta_truesize; |
ab4e846a8 tcp: annotate sk-... |
1377 |
sk_wmem_queued_add(sk, -delta_truesize); |
7162fb242 tcp: do not under... |
1378 1379 1380 |
sk_mem_uncharge(sk, delta_truesize); sock_set_flag(sk, SOCK_QUEUE_SHRUNK); } |
1da177e4c Linux-2.6.12-rc2 |
1381 |
|
5b35e1e6e tcp: fix tcp_trim... |
1382 |
/* Any change of skb->len requires recalculation of tso factor. */ |
1da177e4c Linux-2.6.12-rc2 |
1383 |
if (tcp_skb_pcount(skb) > 1) |
5bbb432c8 tcp: tcp_set_skb_... |
1384 |
tcp_set_skb_tso_segs(skb, tcp_skb_mss(skb)); |
1da177e4c Linux-2.6.12-rc2 |
1385 1386 1387 |
return 0; } |
1b63edd6e tcp: fix SYN-data... |
1388 1389 |
/* Calculate MSS not accounting any TCP options. */ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu) |
5d424d5a6 [TCP]: MTU probing |
1390 |
{ |
cf533ea53 tcp: add const qu... |
1391 1392 |
const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); |
5d424d5a6 [TCP]: MTU probing |
1393 1394 1395 1396 1397 1398 |
int mss_now; /* Calculate base mss without TCP options: It is MMS_S - sizeof(tcphdr) of rfc1122 */ mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr); |
674696014 ipv6: RTAX_FEATUR... |
1399 1400 1401 1402 1403 1404 1405 |
/* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */ if (icsk->icsk_af_ops->net_frag_header_len) { const struct dst_entry *dst = __sk_dst_get(sk); if (dst && dst_allfrag(dst)) mss_now -= icsk->icsk_af_ops->net_frag_header_len; } |
5d424d5a6 [TCP]: MTU probing |
1406 1407 1408 1409 1410 1411 1412 1413 |
/* Clamp it (mss_clamp does not include tcp options) */ if (mss_now > tp->rx_opt.mss_clamp) mss_now = tp->rx_opt.mss_clamp; /* Now subtract optional transport overhead */ mss_now -= icsk->icsk_ext_hdr_len; /* Then reserve room for full set of TCP options and 8 bytes of data */ |
5f3e2bf00 tcp: add tcp_min_... |
1414 |
mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss); |
5d424d5a6 [TCP]: MTU probing |
1415 1416 |
return mss_now; } |
1b63edd6e tcp: fix SYN-data... |
1417 1418 1419 1420 1421 1422 1423 |
/* Calculate MSS. Not accounting for SACKs here. */ int tcp_mtu_to_mss(struct sock *sk, int pmtu) { /* Subtract TCP options size, not including SACKs */ return __tcp_mtu_to_mss(sk, pmtu) - (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr)); } |
5d424d5a6 [TCP]: MTU probing |
1424 |
/* Inverse of above */ |
674696014 ipv6: RTAX_FEATUR... |
1425 |
int tcp_mss_to_mtu(struct sock *sk, int mss) |
5d424d5a6 [TCP]: MTU probing |
1426 |
{ |
cf533ea53 tcp: add const qu... |
1427 1428 |
const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); |
5d424d5a6 [TCP]: MTU probing |
1429 1430 1431 1432 1433 1434 |
int mtu; mtu = mss + tp->tcp_header_len + icsk->icsk_ext_hdr_len + icsk->icsk_af_ops->net_header_len; |
674696014 ipv6: RTAX_FEATUR... |
1435 1436 1437 1438 1439 1440 1441 |
/* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */ if (icsk->icsk_af_ops->net_frag_header_len) { const struct dst_entry *dst = __sk_dst_get(sk); if (dst && dst_allfrag(dst)) mtu += icsk->icsk_af_ops->net_frag_header_len; } |
5d424d5a6 [TCP]: MTU probing |
1442 1443 |
return mtu; } |
556c6b46d tcp: export tcp_m... |
1444 |
EXPORT_SYMBOL(tcp_mss_to_mtu); |
5d424d5a6 [TCP]: MTU probing |
1445 |
|
67edfef78 TCP: Add comments... |
1446 |
/* MTU probing init per socket */ |
5d424d5a6 [TCP]: MTU probing |
1447 1448 1449 1450 |
void tcp_mtup_init(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); |
b0f9ca53c ipv4: Namespecify... |
1451 |
struct net *net = sock_net(sk); |
5d424d5a6 [TCP]: MTU probing |
1452 |
|
b0f9ca53c ipv4: Namespecify... |
1453 |
icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1; |
5d424d5a6 [TCP]: MTU probing |
1454 |
icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) + |
e905a9eda [NET] IPV4: Fix w... |
1455 |
icsk->icsk_af_ops->net_header_len; |
b0f9ca53c ipv4: Namespecify... |
1456 |
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); |
5d424d5a6 [TCP]: MTU probing |
1457 |
icsk->icsk_mtup.probe_size = 0; |
05cbc0db0 ipv4: Create prob... |
1458 |
if (icsk->icsk_mtup.enabled) |
c74df29a8 tcp: use tcp_jiff... |
1459 |
icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; |
5d424d5a6 [TCP]: MTU probing |
1460 |
} |
4bc2f18ba net/ipv4: EXPORT_... |
1461 |
EXPORT_SYMBOL(tcp_mtup_init); |
5d424d5a6 [TCP]: MTU probing |
1462 |
|
1da177e4c Linux-2.6.12-rc2 |
1463 1464 1465 1466 1467 1468 |
/* This function synchronize snd mss to current pmtu/exthdr set. tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts for TCP options, but includes only bare TCP header. tp->rx_opt.mss_clamp is mss negotiated at connection setup. |
caa20d9ab [TCP]: spelling f... |
1469 |
It is minimum of user_mss and mss received with SYN. |
1da177e4c Linux-2.6.12-rc2 |
1470 |
It also does not include TCP options. |
d83d8461f [IP_SOCKGLUE]: Re... |
1471 |
inet_csk(sk)->icsk_pmtu_cookie is last pmtu, seen by this function. |
1da177e4c Linux-2.6.12-rc2 |
1472 1473 1474 1475 1476 1477 1478 1479 |
tp->mss_cache is current effective sending mss, including all tcp options except for SACKs. It is evaluated, taking into account current pmtu, but never exceeds tp->rx_opt.mss_clamp. NOTE1. rfc1122 clearly states that advertised MSS DOES NOT include either tcp or ip options. |
d83d8461f [IP_SOCKGLUE]: Re... |
1480 1481 |
NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache are READ ONLY outside this function. --ANK (980731) |
1da177e4c Linux-2.6.12-rc2 |
1482 |
*/ |
1da177e4c Linux-2.6.12-rc2 |
1483 1484 1485 |
unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) { struct tcp_sock *tp = tcp_sk(sk); |
d83d8461f [IP_SOCKGLUE]: Re... |
1486 |
struct inet_connection_sock *icsk = inet_csk(sk); |
5d424d5a6 [TCP]: MTU probing |
1487 |
int mss_now; |
1da177e4c Linux-2.6.12-rc2 |
1488 |
|
5d424d5a6 [TCP]: MTU probing |
1489 1490 |
if (icsk->icsk_mtup.search_high > pmtu) icsk->icsk_mtup.search_high = pmtu; |
1da177e4c Linux-2.6.12-rc2 |
1491 |
|
5d424d5a6 [TCP]: MTU probing |
1492 |
mss_now = tcp_mtu_to_mss(sk, pmtu); |
409d22b47 [TCP]: Code dupli... |
1493 |
mss_now = tcp_bound_to_half_wnd(tp, mss_now); |
1da177e4c Linux-2.6.12-rc2 |
1494 1495 |
/* And store cached results */ |
d83d8461f [IP_SOCKGLUE]: Re... |
1496 |
icsk->icsk_pmtu_cookie = pmtu; |
5d424d5a6 [TCP]: MTU probing |
1497 1498 |
if (icsk->icsk_mtup.enabled) mss_now = min(mss_now, tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low)); |
c1b4a7e69 [TCP]: Move to ne... |
1499 |
tp->mss_cache = mss_now; |
1da177e4c Linux-2.6.12-rc2 |
1500 1501 1502 |
return mss_now; } |
4bc2f18ba net/ipv4: EXPORT_... |
1503 |
EXPORT_SYMBOL(tcp_sync_mss); |
1da177e4c Linux-2.6.12-rc2 |
1504 1505 1506 |
/* Compute the current effective MSS, taking SACKs and IP options, * and even PMTU discovery events into account. |
1da177e4c Linux-2.6.12-rc2 |
1507 |
*/ |
0c54b85f2 tcp: simplify tcp... |
1508 |
unsigned int tcp_current_mss(struct sock *sk) |
1da177e4c Linux-2.6.12-rc2 |
1509 |
{ |
cf533ea53 tcp: add const qu... |
1510 1511 |
const struct tcp_sock *tp = tcp_sk(sk); const struct dst_entry *dst = __sk_dst_get(sk); |
c1b4a7e69 [TCP]: Move to ne... |
1512 |
u32 mss_now; |
95c961747 net: cleanup unsi... |
1513 |
unsigned int header_len; |
33ad798c9 tcp: options clea... |
1514 1515 |
struct tcp_out_options opts; struct tcp_md5sig_key *md5; |
c1b4a7e69 [TCP]: Move to ne... |
1516 1517 |
mss_now = tp->mss_cache; |
1da177e4c Linux-2.6.12-rc2 |
1518 1519 |
if (dst) { u32 mtu = dst_mtu(dst); |
d83d8461f [IP_SOCKGLUE]: Re... |
1520 |
if (mtu != inet_csk(sk)->icsk_pmtu_cookie) |
1da177e4c Linux-2.6.12-rc2 |
1521 1522 |
mss_now = tcp_sync_mss(sk, mtu); } |
33ad798c9 tcp: options clea... |
1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 |
header_len = tcp_established_options(sk, NULL, &opts, &md5) + sizeof(struct tcphdr); /* The mss_cache is sized based on tp->tcp_header_len, which assumes * some common options. If this is an odd packet (because we have SACK * blocks etc) then our calculated header_len will be different, and * we have to adjust mss_now correspondingly */ if (header_len != tp->tcp_header_len) { int delta = (int) header_len - tp->tcp_header_len; mss_now -= delta; } |
cfb6eeb4c [TCP]: MD5 Signat... |
1533 |
|
1da177e4c Linux-2.6.12-rc2 |
1534 1535 |
return mss_now; } |
86fd14ad1 tcp: make tcp_cwn... |
1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 |
/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto. * As additional protections, we do not touch cwnd in retransmission phases, * and if application hit its sndbuf limit recently. */ static void tcp_cwnd_application_limited(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { /* Limited by application or receiver window. */ u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk)); u32 win_used = max(tp->snd_cwnd_used, init_win); if (win_used < tp->snd_cwnd) { tp->snd_ssthresh = tcp_current_ssthresh(sk); tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; } tp->snd_cwnd_used = 0; } |
c2203cf75 tcp: use tcp_jiff... |
1555 |
tp->snd_cwnd_stamp = tcp_jiffies32; |
86fd14ad1 tcp: make tcp_cwn... |
1556 |
} |
ca8a22634 tcp: make cwnd-li... |
1557 |
static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) |
a762a9800 [TCP]: Kill extra... |
1558 |
{ |
1b1fc3fdd tcp: make congest... |
1559 |
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; |
9e412ba76 [TCP]: Sed magic ... |
1560 |
struct tcp_sock *tp = tcp_sk(sk); |
a762a9800 [TCP]: Kill extra... |
1561 |
|
ca8a22634 tcp: make cwnd-li... |
1562 1563 1564 1565 1566 1567 1568 1569 1570 |
/* Track the maximum number of outstanding packets in each * window, and remember whether we were cwnd-limited then. */ if (!before(tp->snd_una, tp->max_packets_seq) || tp->packets_out > tp->max_packets_out) { tp->max_packets_out = tp->packets_out; tp->max_packets_seq = tp->snd_nxt; tp->is_cwnd_limited = is_cwnd_limited; } |
e114a710a tcp: fix cwnd lim... |
1571 |
|
249015515 tcp: remove in_fl... |
1572 |
if (tcp_is_cwnd_limited(sk)) { |
a762a9800 [TCP]: Kill extra... |
1573 1574 |
/* Network is feed fully. */ tp->snd_cwnd_used = 0; |
c2203cf75 tcp: use tcp_jiff... |
1575 |
tp->snd_cwnd_stamp = tcp_jiffies32; |
a762a9800 [TCP]: Kill extra... |
1576 1577 1578 1579 |
} else { /* Network starves. */ if (tp->packets_out > tp->snd_cwnd_used) tp->snd_cwnd_used = tp->packets_out; |
b510f0d23 tcp: Namespace-if... |
1580 |
if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle && |
c2203cf75 tcp: use tcp_jiff... |
1581 |
(s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto && |
1b1fc3fdd tcp: make congest... |
1582 |
!ca_ops->cong_control) |
a762a9800 [TCP]: Kill extra... |
1583 |
tcp_cwnd_application_limited(sk); |
b0f71bd3e tcp: instrument h... |
1584 1585 1586 1587 1588 |
/* The following conditions together indicate the starvation * is caused by insufficient sender buffer: * 1) just sent some data (see tcp_write_xmit) * 2) not cwnd limited (this else condition) |
75c119afe tcp: implement rb... |
1589 |
* 3) no more data to send (tcp_write_queue_empty()) |
b0f71bd3e tcp: instrument h... |
1590 1591 |
* 4) application is hitting buffer limit (SOCK_NOSPACE) */ |
75c119afe tcp: implement rb... |
1592 |
if (tcp_write_queue_empty(sk) && sk->sk_socket && |
b0f71bd3e tcp: instrument h... |
1593 1594 1595 |
test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) && (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED); |
a762a9800 [TCP]: Kill extra... |
1596 1597 |
} } |
d4589926d tcp: refine TSO s... |
1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 |
/* Minshall's variant of the Nagle send check. */ static bool tcp_minshall_check(const struct tcp_sock *tp) { return after(tp->snd_sml, tp->snd_una) && !after(tp->snd_sml, tp->snd_nxt); } /* Update snd_sml if this skb is under mss * Note that a TSO packet might end with a sub-mss segment * The test is really : * if ((skb->len % mss) != 0) * tp->snd_sml = TCP_SKB_CB(skb)->end_seq; * But we can avoid doing the divide again given we already have * skb_pcount = skb->len / mss_now |
0e3a4803a [TCP]: Force TSO ... |
1612 |
*/ |
d4589926d tcp: refine TSO s... |
1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 |
static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now, const struct sk_buff *skb) { if (skb->len < tcp_skb_pcount(skb) * mss_now) tp->snd_sml = TCP_SKB_CB(skb)->end_seq; } /* Return false, if packet can be sent now without violation Nagle's rules: * 1. It is full sized. (provided by caller in %partial bool) * 2. Or it contains FIN. (already checked by caller) * 3. Or TCP_CORK is not set, and TCP_NODELAY is set. * 4. Or TCP_CORK is not set, and all sent packets are ACKed. * With Minshall's modification: all sent small packets are ACKed. */ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp, |
cc93fc51f tcp: delete unuse... |
1628 |
int nonagle) |
d4589926d tcp: refine TSO s... |
1629 1630 1631 1632 1633 |
{ return partial && ((nonagle & TCP_NAGLE_CORK) || (!nonagle && tp->packets_out && tcp_minshall_check(tp))); } |
605ad7f18 tcp: refine TSO a... |
1634 1635 1636 1637 |
/* Return how many segs we'd like on a TSO packet, * to send one TSO packet per ms */ |
dcb8c9b43 tcp_bbr: better d... |
1638 1639 |
static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, int min_tso_segs) |
605ad7f18 tcp: refine TSO a... |
1640 1641 |
{ u32 bytes, segs; |
76a9ebe81 net: extend sk_pa... |
1642 |
bytes = min_t(unsigned long, |
8f8e806c5 net: annotate loc... |
1643 |
sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift), |
76a9ebe81 net: extend sk_pa... |
1644 |
sk->sk_gso_max_size - 1 - MAX_TCP_HEADER); |
605ad7f18 tcp: refine TSO a... |
1645 1646 1647 1648 1649 1650 |
/* Goal is to send at least one packet per ms, * not one big TSO packet every 100 ms. * This preserves ACK clocking and is consistent * with tcp_tso_should_defer() heuristic. */ |
1b3878ca1 tcp: export tcp_t... |
1651 |
segs = max_t(u32, bytes / mss_now, min_tso_segs); |
605ad7f18 tcp: refine TSO a... |
1652 |
|
350c9f484 tcp_bbr: better d... |
1653 |
return segs; |
605ad7f18 tcp: refine TSO a... |
1654 |
} |
ed6e7268b tcp: allow conges... |
1655 1656 1657 1658 1659 1660 |
/* Return the number of segments we want in the skb we are transmitting. * See if congestion control module wants to decide; otherwise, autosize. */ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) { const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; |
dcb8c9b43 tcp_bbr: better d... |
1661 |
u32 min_tso, tso_segs; |
ed6e7268b tcp: allow conges... |
1662 |
|
dcb8c9b43 tcp_bbr: better d... |
1663 1664 1665 1666 1667 |
min_tso = ca_ops->min_tso_segs ? ca_ops->min_tso_segs(sk) : sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs; tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); |
350c9f484 tcp_bbr: better d... |
1668 |
return min_t(u32, tso_segs, sk->sk_gso_max_segs); |
ed6e7268b tcp: allow conges... |
1669 |
} |
d4589926d tcp: refine TSO s... |
1670 1671 1672 1673 1674 1675 |
/* Returns the portion of skb which can be sent right away */ static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, unsigned int mss_now, unsigned int max_segs, int nonagle) |
c1b4a7e69 [TCP]: Move to ne... |
1676 |
{ |
cf533ea53 tcp: add const qu... |
1677 |
const struct tcp_sock *tp = tcp_sk(sk); |
d4589926d tcp: refine TSO s... |
1678 |
u32 partial, needed, window, max_len; |
c1b4a7e69 [TCP]: Move to ne... |
1679 |
|
90840defa [TCP]: Introduce ... |
1680 |
window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; |
1485348d2 tcp: Apply device... |
1681 |
max_len = mss_now * max_segs; |
0e3a4803a [TCP]: Force TSO ... |
1682 |
|
1485348d2 tcp: Apply device... |
1683 1684 |
if (likely(max_len <= window && skb != tcp_write_queue_tail(sk))) return max_len; |
0e3a4803a [TCP]: Force TSO ... |
1685 |
|
5ea3a7480 [TCP]: Prevent se... |
1686 |
needed = min(skb->len, window); |
1485348d2 tcp: Apply device... |
1687 1688 |
if (max_len <= needed) return max_len; |
0e3a4803a [TCP]: Force TSO ... |
1689 |
|
d4589926d tcp: refine TSO s... |
1690 1691 1692 1693 1694 |
partial = needed % mss_now; /* If last segment is not a full MSS, check if Nagle rules allow us * to include this last segment in this skb. * Otherwise, we'll split the skb at last MSS boundary */ |
cc93fc51f tcp: delete unuse... |
1695 |
if (tcp_nagle_check(partial != 0, tp, nonagle)) |
d4589926d tcp: refine TSO s... |
1696 1697 1698 |
return needed - partial; return needed; |
c1b4a7e69 [TCP]: Move to ne... |
1699 1700 1701 1702 1703 |
} /* Can at least one segment of SKB be sent right now, according to the * congestion window rules? If so, return how many segments are allowed. */ |
cf533ea53 tcp: add const qu... |
1704 1705 |
static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp, const struct sk_buff *skb) |
c1b4a7e69 [TCP]: Move to ne... |
1706 |
{ |
d649a7a81 tcp: limit GSO pa... |
1707 |
u32 in_flight, cwnd, halfcwnd; |
c1b4a7e69 [TCP]: Move to ne... |
1708 1709 |
/* Don't be strict about the congestion window for the final FIN. */ |
4de075e04 tcp: rename tcp_s... |
1710 1711 |
if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) && tcp_skb_pcount(skb) == 1) |
c1b4a7e69 [TCP]: Move to ne... |
1712 1713 1714 1715 |
return 1; in_flight = tcp_packets_in_flight(tp); cwnd = tp->snd_cwnd; |
d649a7a81 tcp: limit GSO pa... |
1716 1717 |
if (in_flight >= cwnd) return 0; |
c1b4a7e69 [TCP]: Move to ne... |
1718 |
|
d649a7a81 tcp: limit GSO pa... |
1719 1720 1721 1722 1723 |
/* For better scheduling, ensure we have at least * 2 GSO packets in flight. */ halfcwnd = max(cwnd >> 1, 1U); return min(halfcwnd, cwnd - in_flight); |
c1b4a7e69 [TCP]: Move to ne... |
1724 |
} |
b595076a1 tree-wide: fix co... |
1725 |
/* Initialize TSO state of a skb. |
67edfef78 TCP: Add comments... |
1726 |
* This must be invoked the first time we consider transmitting |
c1b4a7e69 [TCP]: Move to ne... |
1727 1728 |
* SKB onto the wire. */ |
5bbb432c8 tcp: tcp_set_skb_... |
1729 |
static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now) |
c1b4a7e69 [TCP]: Move to ne... |
1730 1731 |
{ int tso_segs = tcp_skb_pcount(skb); |
f8269a495 tcp: make urg+gso... |
1732 |
if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) { |
5bbb432c8 tcp: tcp_set_skb_... |
1733 |
tcp_set_skb_tso_segs(skb, mss_now); |
c1b4a7e69 [TCP]: Move to ne... |
1734 1735 1736 1737 |
tso_segs = tcp_skb_pcount(skb); } return tso_segs; } |
c1b4a7e69 [TCP]: Move to ne... |
1738 |
|
a2a385d62 tcp: bool convers... |
1739 |
/* Return true if the Nagle test allows this packet to be |
c1b4a7e69 [TCP]: Move to ne... |
1740 1741 |
* sent now. */ |
a2a385d62 tcp: bool convers... |
1742 1743 |
static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb, unsigned int cur_mss, int nonagle) |
c1b4a7e69 [TCP]: Move to ne... |
1744 1745 1746 1747 1748 1749 1750 1751 |
{ /* Nagle rule does not apply to frames, which sit in the middle of the * write_queue (they have no chances to get new data). * * This is implemented in the callers, where they modify the 'nonagle' * argument based upon the location of SKB in the send queue. */ if (nonagle & TCP_NAGLE_PUSH) |
a2a385d62 tcp: bool convers... |
1752 |
return true; |
c1b4a7e69 [TCP]: Move to ne... |
1753 |
|
9b44190dc tcp: refactor F-RTO |
1754 1755 |
/* Don't use the nagle rule for urgent data (or for the final FIN). */ if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) |
a2a385d62 tcp: bool convers... |
1756 |
return true; |
c1b4a7e69 [TCP]: Move to ne... |
1757 |
|
cc93fc51f tcp: delete unuse... |
1758 |
if (!tcp_nagle_check(skb->len < cur_mss, tp, nonagle)) |
a2a385d62 tcp: bool convers... |
1759 |
return true; |
c1b4a7e69 [TCP]: Move to ne... |
1760 |
|
a2a385d62 tcp: bool convers... |
1761 |
return false; |
c1b4a7e69 [TCP]: Move to ne... |
1762 1763 1764 |
} /* Does at least the first segment of SKB fit into the send window? */ |
a2a385d62 tcp: bool convers... |
1765 1766 1767 |
static bool tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb, unsigned int cur_mss) |
c1b4a7e69 [TCP]: Move to ne... |
1768 1769 1770 1771 1772 |
{ u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (skb->len > cur_mss) end_seq = TCP_SKB_CB(skb)->seq + cur_mss; |
90840defa [TCP]: Introduce ... |
1773 |
return !after(end_seq, tcp_wnd_end(tp)); |
c1b4a7e69 [TCP]: Move to ne... |
1774 |
} |
c1b4a7e69 [TCP]: Move to ne... |
1775 1776 1777 1778 1779 1780 1781 |
/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet * which is put after SKB on the list. It is very much like * tcp_fragment() except that it may make several kinds of assumptions * in order to speed up the splitting operation. In particular, we * know that all the data is in scatter-gather pages, and that the * packet has never been sent out before (and thus is not cloned). */ |
564833419 tcp: remove tcp_q... |
1782 |
static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, |
c4ead4c59 tcp: tso_fragment... |
1783 |
unsigned int mss_now, gfp_t gfp) |
c1b4a7e69 [TCP]: Move to ne... |
1784 |
{ |
c1b4a7e69 [TCP]: Move to ne... |
1785 |
int nlen = skb->len - len; |
564833419 tcp: remove tcp_q... |
1786 |
struct sk_buff *buff; |
9ce014610 tcp: get rid of t... |
1787 |
u8 flags; |
c1b4a7e69 [TCP]: Move to ne... |
1788 1789 |
/* All of a TSO frame must be composed of paged data. */ |
c8ac37746 [TCP]: Fix bug #5... |
1790 |
if (skb->len != skb->data_len) |
564833419 tcp: remove tcp_q... |
1791 1792 |
return tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE, skb, len, mss_now, gfp); |
c1b4a7e69 [TCP]: Move to ne... |
1793 |
|
eb9344781 tcp: add a force_... |
1794 |
buff = sk_stream_alloc_skb(sk, 0, gfp, true); |
51456b291 ipv4: coding styl... |
1795 |
if (unlikely(!buff)) |
c1b4a7e69 [TCP]: Move to ne... |
1796 |
return -ENOMEM; |
414776621 net/tls: prevent ... |
1797 |
skb_copy_decrypted(buff, skb); |
c1b4a7e69 [TCP]: Move to ne... |
1798 |
|
ab4e846a8 tcp: annotate sk-... |
1799 |
sk_wmem_queued_add(sk, buff->truesize); |
3ab224be6 [NET] CORE: Intro... |
1800 |
sk_mem_charge(sk, buff->truesize); |
b60b49ea6 [TCP]: Account sk... |
1801 |
buff->truesize += nlen; |
c1b4a7e69 [TCP]: Move to ne... |
1802 1803 1804 1805 1806 1807 1808 1809 |
skb->truesize -= nlen; /* Correct the sequence numbers. */ TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len; TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; /* PSH and FIN should only be set in the second packet. */ |
4de075e04 tcp: rename tcp_s... |
1810 1811 1812 |
flags = TCP_SKB_CB(skb)->tcp_flags; TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); TCP_SKB_CB(buff)->tcp_flags = flags; |
c1b4a7e69 [TCP]: Move to ne... |
1813 1814 1815 |
/* This packet was never sent out yet, so no SACK bits. */ TCP_SKB_CB(buff)->sacked = 0; |
a166140e8 tcp: Handle eor b... |
1816 |
tcp_skb_fragment_eor(skb, buff); |
98be9b120 tcp: remove dead ... |
1817 |
buff->ip_summed = CHECKSUM_PARTIAL; |
c1b4a7e69 [TCP]: Move to ne... |
1818 |
skb_split(skb, buff, len); |
490cc7d03 net-timestamp: fi... |
1819 |
tcp_fragment_tstamp(skb, buff); |
c1b4a7e69 [TCP]: Move to ne... |
1820 1821 |
/* Fix up tso_factor for both original and new SKB. */ |
5bbb432c8 tcp: tcp_set_skb_... |
1822 1823 |
tcp_set_skb_tso_segs(skb, mss_now); tcp_set_skb_tso_segs(buff, mss_now); |
c1b4a7e69 [TCP]: Move to ne... |
1824 1825 |
/* Link BUFF into the send queue. */ |
f4a775d14 net: introduce __... |
1826 |
__skb_header_release(buff); |
564833419 tcp: remove tcp_q... |
1827 |
tcp_insert_write_queue_after(skb, buff, sk, TCP_FRAG_IN_WRITE_QUEUE); |
c1b4a7e69 [TCP]: Move to ne... |
1828 1829 1830 1831 1832 1833 1834 1835 1836 |
return 0; } /* Try to defer sending, if possible, in order to minimize the amount * of TSO splitting we do. View it as a kind of TSO Nagle test. * * This algorithm is from John Heffner. */ |
ca8a22634 tcp: make cwnd-li... |
1837 |
static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, |
f9bfe4e6a tcp: lack of avai... |
1838 1839 1840 |
bool *is_cwnd_limited, bool *is_rwnd_limited, u32 max_segs) |
c1b4a7e69 [TCP]: Move to ne... |
1841 |
{ |
6687e988d [ICSK]: Move TCP ... |
1842 |
const struct inet_connection_sock *icsk = inet_csk(sk); |
f1c6ea382 tcp: refine tcp_t... |
1843 |
u32 send_win, cong_win, limit, in_flight; |
50c8339e9 tcp: tso: restore... |
1844 |
struct tcp_sock *tp = tcp_sk(sk); |
50c8339e9 tcp: tso: restore... |
1845 |
struct sk_buff *head; |
ad9f4f50f tcp: avoid a poss... |
1846 |
int win_divisor; |
f1c6ea382 tcp: refine tcp_t... |
1847 |
s64 delta; |
c1b4a7e69 [TCP]: Move to ne... |
1848 |
|
99d7662a0 tcp: tso: allow d... |
1849 |
if (icsk->icsk_ca_state >= TCP_CA_Recovery) |
ae8064ac3 [TCP]: Bound TSO ... |
1850 |
goto send_now; |
5f852eb53 tcp: tso: remove ... |
1851 |
/* Avoid bursty behavior by allowing defer |
a682850a1 tcp: get rid of t... |
1852 1853 1854 |
* only if the last write was recent (1 ms). * Note that tp->tcp_wstamp_ns can be in the future if we have * packets waiting in a qdisc or device for EDT delivery. |
5f852eb53 tcp: tso: remove ... |
1855 |
*/ |
a682850a1 tcp: get rid of t... |
1856 1857 |
delta = tp->tcp_clock_cache - tp->tcp_wstamp_ns - NSEC_PER_MSEC; if (delta > 0) |
ae8064ac3 [TCP]: Bound TSO ... |
1858 |
goto send_now; |
908a75c17 [TCP]: Never TSO ... |
1859 |
|
c1b4a7e69 [TCP]: Move to ne... |
1860 |
in_flight = tcp_packets_in_flight(tp); |
c8c9aeb51 tcp: Split BUG_ON... |
1861 1862 |
BUG_ON(tcp_skb_pcount(skb) <= 1); BUG_ON(tp->snd_cwnd <= in_flight); |
c1b4a7e69 [TCP]: Move to ne... |
1863 |
|
90840defa [TCP]: Introduce ... |
1864 |
send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; |
c1b4a7e69 [TCP]: Move to ne... |
1865 1866 1867 1868 1869 |
/* From in_flight test above, we know that cwnd > in_flight. */ cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache; limit = min(send_win, cong_win); |
ba244fe90 [TCP]: Fix tcp_ts... |
1870 |
/* If a full-sized TSO skb can be sent, do it. */ |
605ad7f18 tcp: refine TSO a... |
1871 |
if (limit >= max_segs * tp->mss_cache) |
ae8064ac3 [TCP]: Bound TSO ... |
1872 |
goto send_now; |
ba244fe90 [TCP]: Fix tcp_ts... |
1873 |
|
62ad27619 tcp: deferring in... |
1874 1875 1876 |
/* Middle in queue won't get any more data, full sendable already? */ if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) goto send_now; |
5bbcc0f59 Merge git://git.k... |
1877 |
win_divisor = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor); |
ad9f4f50f tcp: avoid a poss... |
1878 |
if (win_divisor) { |
c1b4a7e69 [TCP]: Move to ne... |
1879 1880 1881 1882 1883 |
u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); /* If at least some fraction of a window is available, * just use it. */ |
ad9f4f50f tcp: avoid a poss... |
1884 |
chunk /= win_divisor; |
c1b4a7e69 [TCP]: Move to ne... |
1885 |
if (limit >= chunk) |
ae8064ac3 [TCP]: Bound TSO ... |
1886 |
goto send_now; |
c1b4a7e69 [TCP]: Move to ne... |
1887 1888 1889 1890 1891 1892 |
} else { /* Different approach, try not to defer past a single * ACK. Receiver should ACK every other full sized * frame, so if we have space for more than 3 frames * then send now. */ |
6b5a5c0db tcp: do not scale... |
1893 |
if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache) |
ae8064ac3 [TCP]: Bound TSO ... |
1894 |
goto send_now; |
c1b4a7e69 [TCP]: Move to ne... |
1895 |
} |
75c119afe tcp: implement rb... |
1896 1897 1898 1899 |
/* TODO : use tsorted_sent_queue ? */ head = tcp_rtx_queue_head(sk); if (!head) goto send_now; |
f1c6ea382 tcp: refine tcp_t... |
1900 |
delta = tp->tcp_clock_cache - head->tstamp; |
50c8339e9 tcp: tso: restore... |
1901 |
/* If next ACK is likely to come too late (half srtt), do not defer */ |
f1c6ea382 tcp: refine tcp_t... |
1902 |
if ((s64)(delta - (u64)NSEC_PER_USEC * (tp->srtt_us >> 4)) < 0) |
50c8339e9 tcp: tso: restore... |
1903 |
goto send_now; |
f9bfe4e6a tcp: lack of avai... |
1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 |
/* Ok, it looks like it is advisable to defer. * Three cases are tracked : * 1) We are cwnd-limited * 2) We are rwnd-limited * 3) We are application limited. */ if (cong_win < send_win) { if (cong_win <= skb->len) { *is_cwnd_limited = true; return true; } } else { if (send_win <= skb->len) { *is_rwnd_limited = true; return true; } } |
ae8064ac3 [TCP]: Bound TSO ... |
1921 |
|
f9bfe4e6a tcp: lack of avai... |
1922 |
/* If this packet won't get more data, do not wait. */ |
d8ed257f3 tcp: handle EOR a... |
1923 1924 |
if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) || TCP_SKB_CB(skb)->eor) |
f9bfe4e6a tcp: lack of avai... |
1925 |
goto send_now; |
ca8a22634 tcp: make cwnd-li... |
1926 |
|
a2a385d62 tcp: bool convers... |
1927 |
return true; |
ae8064ac3 [TCP]: Bound TSO ... |
1928 1929 |
send_now: |
a2a385d62 tcp: bool convers... |
1930 |
return false; |
c1b4a7e69 [TCP]: Move to ne... |
1931 |
} |
05cbc0db0 ipv4: Create prob... |
1932 1933 1934 1935 1936 1937 1938 1939 1940 |
static inline void tcp_mtu_check_reprobe(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); u32 interval; s32 delta; interval = net->ipv4.sysctl_tcp_probe_interval; |
c74df29a8 tcp: use tcp_jiff... |
1941 |
delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp; |
05cbc0db0 ipv4: Create prob... |
1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 |
if (unlikely(delta >= interval * HZ)) { int mss = tcp_current_mss(sk); /* Update current search range */ icsk->icsk_mtup.probe_size = 0; icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) + icsk->icsk_af_ops->net_header_len; icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); /* Update probe time stamp */ |
c74df29a8 tcp: use tcp_jiff... |
1953 |
icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; |
05cbc0db0 ipv4: Create prob... |
1954 1955 |
} } |
808cf9e38 tcp: Honor the eo... |
1956 1957 1958 1959 1960 1961 1962 1963 |
static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len) { struct sk_buff *skb, *next; skb = tcp_send_head(sk); tcp_for_write_queue_from_safe(skb, next, sk) { if (len <= skb->len) break; |
888a5c53c tcp: inherit time... |
1964 |
if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb)) |
808cf9e38 tcp: Honor the eo... |
1965 1966 1967 1968 1969 1970 1971 |
return false; len -= skb->len; } return true; } |
5d424d5a6 [TCP]: MTU probing |
1972 |
/* Create a new MTU probe if we are ready. |
67edfef78 TCP: Add comments... |
1973 1974 1975 1976 |
* MTU probe is regularly attempting to increase the path MTU by * deliberately sending larger packets. This discovers routing * changes resulting in larger path MTUs. * |
5d424d5a6 [TCP]: MTU probing |
1977 1978 |
* Returns 0 if we should wait to probe (no cwnd available), * 1 if a probe was sent, |
056834d9f [TCP]: cleanup tc... |
1979 1980 |
* -1 otherwise */ |
5d424d5a6 [TCP]: MTU probing |
1981 1982 |
static int tcp_mtu_probe(struct sock *sk) { |
5d424d5a6 [TCP]: MTU probing |
1983 |
struct inet_connection_sock *icsk = inet_csk(sk); |
12a59abc2 tcp: tcp_mtu_prob... |
1984 |
struct tcp_sock *tp = tcp_sk(sk); |
5d424d5a6 [TCP]: MTU probing |
1985 |
struct sk_buff *skb, *nskb, *next; |
6b58e0a5f ipv4: Use binary ... |
1986 |
struct net *net = sock_net(sk); |
5d424d5a6 [TCP]: MTU probing |
1987 |
int probe_size; |
91cc17c0e [TCP]: MTUprobe: ... |
1988 |
int size_needed; |
12a59abc2 tcp: tcp_mtu_prob... |
1989 |
int copy, len; |
5d424d5a6 [TCP]: MTU probing |
1990 |
int mss_now; |
6b58e0a5f ipv4: Use binary ... |
1991 |
int interval; |
5d424d5a6 [TCP]: MTU probing |
1992 1993 1994 1995 |
/* Not currently probing/verifying, * not in recovery, * have enough cwnd, and |
12a59abc2 tcp: tcp_mtu_prob... |
1996 1997 1998 1999 2000 2001 2002 |
* not SACKing (the variable headers throw things off) */ if (likely(!icsk->icsk_mtup.enabled || icsk->icsk_mtup.probe_size || inet_csk(sk)->icsk_ca_state != TCP_CA_Open || tp->snd_cwnd < 11 || tp->rx_opt.num_sacks || tp->rx_opt.dsack)) |
5d424d5a6 [TCP]: MTU probing |
2003 |
return -1; |
6b58e0a5f ipv4: Use binary ... |
2004 2005 2006 2007 |
/* Use binary search for probe_size between tcp_mss_base, * and current mss_clamp. if (search_high - search_low) * smaller than a threshold, backoff from probing. */ |
0c54b85f2 tcp: simplify tcp... |
2008 |
mss_now = tcp_current_mss(sk); |
6b58e0a5f ipv4: Use binary ... |
2009 2010 |
probe_size = tcp_mtu_to_mss(sk, (icsk->icsk_mtup.search_high + icsk->icsk_mtup.search_low) >> 1); |
91cc17c0e [TCP]: MTUprobe: ... |
2011 |
size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache; |
6b58e0a5f ipv4: Use binary ... |
2012 |
interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low; |
05cbc0db0 ipv4: Create prob... |
2013 2014 2015 2016 |
/* When misfortune happens, we are reprobing actively, * and then reprobe timer has expired. We stick with current * probing process by not resetting search range to its orignal. */ |
6b58e0a5f ipv4: Use binary ... |
2017 |
if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) || |
05cbc0db0 ipv4: Create prob... |
2018 2019 2020 2021 2022 |
interval < net->ipv4.sysctl_tcp_probe_threshold) { /* Check whether enough time has elaplased for * another round of probing. */ tcp_mtu_check_reprobe(sk); |
5d424d5a6 [TCP]: MTU probing |
2023 2024 2025 2026 |
return -1; } /* Have enough data in the send queue to probe? */ |
7f9c33e51 [TCP] MTUprobe: C... |
2027 |
if (tp->write_seq - tp->snd_nxt < size_needed) |
5d424d5a6 [TCP]: MTU probing |
2028 |
return -1; |
91cc17c0e [TCP]: MTUprobe: ... |
2029 2030 |
if (tp->snd_wnd < size_needed) return -1; |
90840defa [TCP]: Introduce ... |
2031 |
if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp))) |
91cc17c0e [TCP]: MTUprobe: ... |
2032 |
return 0; |
5d424d5a6 [TCP]: MTU probing |
2033 |
|
d67c58e9a [TCP]: Remove loc... |
2034 2035 2036 |
/* Do we need to wait to drain cwnd? With none in flight, don't stall */ if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) { if (!tcp_packets_in_flight(tp)) |
5d424d5a6 [TCP]: MTU probing |
2037 2038 2039 2040 |
return -1; else return 0; } |
808cf9e38 tcp: Honor the eo... |
2041 2042 |
if (!tcp_can_coalesce_send_queue_head(sk, probe_size)) return -1; |
5d424d5a6 [TCP]: MTU probing |
2043 |
/* We're allowed to probe. Build it now. */ |
eb9344781 tcp: add a force_... |
2044 |
nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false); |
51456b291 ipv4: coding styl... |
2045 |
if (!nskb) |
5d424d5a6 [TCP]: MTU probing |
2046 |
return -1; |
ab4e846a8 tcp: annotate sk-... |
2047 |
sk_wmem_queued_add(sk, nskb->truesize); |
3ab224be6 [NET] CORE: Intro... |
2048 |
sk_mem_charge(sk, nskb->truesize); |
5d424d5a6 [TCP]: MTU probing |
2049 |
|
fe067e8ab [TCP]: Abstract o... |
2050 |
skb = tcp_send_head(sk); |
414776621 net/tls: prevent ... |
2051 |
skb_copy_decrypted(nskb, skb); |
5d424d5a6 [TCP]: MTU probing |
2052 2053 2054 |
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; |
4de075e04 tcp: rename tcp_s... |
2055 |
TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK; |
5d424d5a6 [TCP]: MTU probing |
2056 2057 |
TCP_SKB_CB(nskb)->sacked = 0; nskb->csum = 0; |
98be9b120 tcp: remove dead ... |
2058 |
nskb->ip_summed = CHECKSUM_PARTIAL; |
5d424d5a6 [TCP]: MTU probing |
2059 |
|
50c4817e9 [TCP]: MTUprobe: ... |
2060 |
tcp_insert_write_queue_before(nskb, skb, sk); |
2b7cda9c3 tcp: fix tcp_mtu_... |
2061 |
tcp_highest_sack_replace(sk, skb, nskb); |
50c4817e9 [TCP]: MTUprobe: ... |
2062 |
|
5d424d5a6 [TCP]: MTU probing |
2063 |
len = 0; |
234b68607 [TCP]: Add tcp_fo... |
2064 |
tcp_for_write_queue_from_safe(skb, next, sk) { |
5d424d5a6 [TCP]: MTU probing |
2065 |
copy = min_t(int, skb->len, probe_size - len); |
98be9b120 tcp: remove dead ... |
2066 |
skb_copy_bits(skb, 0, skb_put(nskb, copy), copy); |
5d424d5a6 [TCP]: MTU probing |
2067 2068 2069 2070 |
if (skb->len <= copy) { /* We've eaten all the data from this skb. * Throw it away. */ |
4de075e04 tcp: rename tcp_s... |
2071 |
TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; |
808cf9e38 tcp: Honor the eo... |
2072 2073 2074 2075 |
/* If this is the last SKB we copy and eor is set * we need to propagate it to the new skb. */ TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor; |
888a5c53c tcp: inherit time... |
2076 |
tcp_skb_collapse_tstamp(nskb, skb); |
fe067e8ab [TCP]: Abstract o... |
2077 |
tcp_unlink_write_queue(skb, sk); |
3ab224be6 [NET] CORE: Intro... |
2078 |
sk_wmem_free_skb(sk, skb); |
5d424d5a6 [TCP]: MTU probing |
2079 |
} else { |
4de075e04 tcp: rename tcp_s... |
2080 |
TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags & |
a3433f35a tcp: unify tcp fl... |
2081 |
~(TCPHDR_FIN|TCPHDR_PSH); |
5d424d5a6 [TCP]: MTU probing |
2082 2083 |
if (!skb_shinfo(skb)->nr_frags) { skb_pull(skb, copy); |
5d424d5a6 [TCP]: MTU probing |
2084 2085 |
} else { __pskb_trim_head(skb, copy); |
5bbb432c8 tcp: tcp_set_skb_... |
2086 |
tcp_set_skb_tso_segs(skb, mss_now); |
5d424d5a6 [TCP]: MTU probing |
2087 2088 2089 2090 2091 |
} TCP_SKB_CB(skb)->seq += copy; } len += copy; |
234b68607 [TCP]: Add tcp_fo... |
2092 2093 2094 |
if (len >= probe_size) break; |
5d424d5a6 [TCP]: MTU probing |
2095 |
} |
5bbb432c8 tcp: tcp_set_skb_... |
2096 |
tcp_init_tso_segs(nskb, nskb->len); |
5d424d5a6 [TCP]: MTU probing |
2097 2098 |
/* We're ready to send. If this fails, the probe will |
7faee5c0d tcp: remove TCP_S... |
2099 2100 |
* be resegmented into mss-sized pieces by tcp_write_xmit(). */ |
5d424d5a6 [TCP]: MTU probing |
2101 2102 |
if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) { /* Decrement cwnd here because we are sending |
056834d9f [TCP]: cleanup tc... |
2103 |
* effectively two packets. */ |
5d424d5a6 [TCP]: MTU probing |
2104 |
tp->snd_cwnd--; |
66f5fe624 [TCP]: Rename upd... |
2105 |
tcp_event_new_data_sent(sk, nskb); |
5d424d5a6 [TCP]: MTU probing |
2106 2107 |
icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len); |
0e7b13685 [TCP] mtu probing... |
2108 2109 |
tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq; tp->mtu_probe.probe_seq_end = TCP_SKB_CB(nskb)->end_seq; |
5d424d5a6 [TCP]: MTU probing |
2110 2111 2112 2113 2114 2115 |
return 1; } return -1; } |
864e5c090 tcp: optimize tcp... |
2116 |
static bool tcp_pacing_check(struct sock *sk) |
218af599f tcp: internal imp... |
2117 |
{ |
864e5c090 tcp: optimize tcp... |
2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 |
struct tcp_sock *tp = tcp_sk(sk); if (!tcp_needs_internal_pacing(sk)) return false; if (tp->tcp_wstamp_ns <= tp->tcp_clock_cache) return false; if (!hrtimer_is_queued(&tp->pacing_timer)) { hrtimer_start(&tp->pacing_timer, ns_to_ktime(tp->tcp_wstamp_ns), HRTIMER_MODE_ABS_PINNED_SOFT); sock_hold(sk); } return true; |
218af599f tcp: internal imp... |
2133 |
} |
f9616c35a tcp: implement TS... |
2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 |
/* TCP Small Queues : * Control number of packets in qdisc/devices to two packets / or ~1 ms. * (These limits are doubled for retransmits) * This allows for : * - better RTT estimation and ACK scheduling * - faster recovery * - high rates * Alas, some drivers / subsystems require a fair amount * of queued bytes to ensure line rate. * One example is wifi aggregation (802.11 AMPDU) */ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, unsigned int factor) { |
76a9ebe81 net: extend sk_pa... |
2148 |
unsigned long limit; |
f9616c35a tcp: implement TS... |
2149 |
|
76a9ebe81 net: extend sk_pa... |
2150 2151 |
limit = max_t(unsigned long, 2 * skb->truesize, |
8f8e806c5 net: annotate loc... |
2152 |
sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift)); |
c73e5807e tcp: tsq: no long... |
2153 2154 2155 |
if (sk->sk_pacing_status == SK_PACING_NONE) limit = min_t(unsigned long, limit, sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); |
f9616c35a tcp: implement TS... |
2156 |
limit <<= factor; |
a842fe142 tcp: add optional... |
2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 |
if (static_branch_unlikely(&tcp_tx_delay_enabled) && tcp_sk(sk)->tcp_tx_delay) { u64 extra_bytes = (u64)sk->sk_pacing_rate * tcp_sk(sk)->tcp_tx_delay; /* TSQ is based on skb truesize sum (sk_wmem_alloc), so we * approximate our needs assuming an ~100% skb->truesize overhead. * USEC_PER_SEC is approximated by 2^20. * do_div(extra_bytes, USEC_PER_SEC/2) is replaced by a right shift. */ extra_bytes >>= (20 - 1); limit += extra_bytes; } |
14afee4b6 net: convert sock... |
2169 |
if (refcount_read(&sk->sk_wmem_alloc) > limit) { |
75c119afe tcp: implement rb... |
2170 |
/* Always send skb if rtx queue is empty. |
75eefc6c5 tcp: tsq: add a s... |
2171 2172 2173 2174 |
* No need to wait for TX completion to call us back, * after softirq/tasklet schedule. * This helps when TX completions are delayed too much. */ |
75c119afe tcp: implement rb... |
2175 |
if (tcp_rtx_queue_empty(sk)) |
75eefc6c5 tcp: tsq: add a s... |
2176 |
return false; |
7aa5470c2 tcp: tsq: move ts... |
2177 |
set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); |
f9616c35a tcp: implement TS... |
2178 2179 2180 2181 2182 |
/* It is possible TX completion already happened * before we set TSQ_THROTTLED, so we must * test again the condition. */ smp_mb__after_atomic(); |
14afee4b6 net: convert sock... |
2183 |
if (refcount_read(&sk->sk_wmem_alloc) > limit) |
f9616c35a tcp: implement TS... |
2184 2185 2186 2187 |
return true; } return false; } |
05b055e89 tcp: instrument t... |
2188 2189 |
static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new) { |
628174ccc tcp: uses jiffies... |
2190 |
const u32 now = tcp_jiffies32; |
efe967cde tcp: avoid bogus ... |
2191 |
enum tcp_chrono old = tp->chrono_type; |
05b055e89 tcp: instrument t... |
2192 |
|
efe967cde tcp: avoid bogus ... |
2193 2194 |
if (old > TCP_CHRONO_UNSPEC) tp->chrono_stat[old - 1] += now - tp->chrono_start; |
05b055e89 tcp: instrument t... |
2195 2196 2197 2198 2199 2200 2201 2202 2203 |
tp->chrono_start = now; tp->chrono_type = new; } void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type) { struct tcp_sock *tp = tcp_sk(sk); /* If there are multiple conditions worthy of tracking in a |
0f87230d1 tcp: instrument h... |
2204 2205 |
* chronograph then the highest priority enum takes precedence * over the other conditions. So that if something "more interesting" |
05b055e89 tcp: instrument t... |
2206 2207 2208 2209 2210 2211 2212 2213 2214 |
* starts happening, stop the previous chrono and start a new one. */ if (type > tp->chrono_type) tcp_chrono_set(tp, type); } void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type) { struct tcp_sock *tp = tcp_sk(sk); |
0f87230d1 tcp: instrument h... |
2215 2216 2217 2218 2219 2220 2221 2222 |
/* There are multiple conditions worthy of tracking in a * chronograph, so that the highest priority enum takes * precedence over the other conditions (see tcp_chrono_start). * If a condition stops, we only stop chrono tracking if * it's the "most interesting" or current chrono we are * tracking and starts busy chrono if we have pending data. */ |
75c119afe tcp: implement rb... |
2223 |
if (tcp_rtx_and_write_queues_empty(sk)) |
0f87230d1 tcp: instrument h... |
2224 2225 2226 |
tcp_chrono_set(tp, TCP_CHRONO_UNSPEC); else if (type == tp->chrono_type) tcp_chrono_set(tp, TCP_CHRONO_BUSY); |
05b055e89 tcp: instrument t... |
2227 |
} |
1da177e4c Linux-2.6.12-rc2 |
2228 2229 2230 2231 |
/* This routine writes packets to the network. It advances the * send_head. This happens as incoming acks open up the remote * window for us. * |
f8269a495 tcp: make urg+gso... |
2232 2233 2234 2235 |
* LARGESEND note: !tcp_urg_mode is overkill, only frames between * snd_up-64k-mss .. snd_up cannot be large. However, taking into * account rare use of URG, this is not a big flaw. * |
6ba8a3b19 tcp: Tail loss pr... |
2236 2237 |
* Send at most one packet when push_one > 0. Temporarily ignore * cwnd limit to force at most one packet out when push_one == 2. |
a2a385d62 tcp: bool convers... |
2238 2239 |
* Returns true, if no segments are in flight and we have queued segments, * but cannot send anything now because of SWS or another problem. |
1da177e4c Linux-2.6.12-rc2 |
2240 |
*/ |
a2a385d62 tcp: bool convers... |
2241 2242 |
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp) |
1da177e4c Linux-2.6.12-rc2 |
2243 2244 |
{ struct tcp_sock *tp = tcp_sk(sk); |
92df7b518 [TCP]: tcp_write_... |
2245 |
struct sk_buff *skb; |
c1b4a7e69 [TCP]: Move to ne... |
2246 2247 |
unsigned int tso_segs, sent_pkts; int cwnd_quota; |
5d424d5a6 [TCP]: MTU probing |
2248 |
int result; |
5615f8861 tcp: instrument h... |
2249 |
bool is_cwnd_limited = false, is_rwnd_limited = false; |
605ad7f18 tcp: refine TSO a... |
2250 |
u32 max_segs; |
1da177e4c Linux-2.6.12-rc2 |
2251 |
|
92df7b518 [TCP]: tcp_write_... |
2252 |
sent_pkts = 0; |
5d424d5a6 [TCP]: MTU probing |
2253 |
|
ee1836aec tcp: refresh tp t... |
2254 |
tcp_mstamp_refresh(tp); |
d5dd9175b tcp: use tcp_writ... |
2255 2256 2257 2258 |
if (!push_one) { /* Do MTU probing. */ result = tcp_mtu_probe(sk); if (!result) { |
a2a385d62 tcp: bool convers... |
2259 |
return false; |
d5dd9175b tcp: use tcp_writ... |
2260 2261 2262 |
} else if (result > 0) { sent_pkts = 1; } |
5d424d5a6 [TCP]: MTU probing |
2263 |
} |
ed6e7268b tcp: allow conges... |
2264 |
max_segs = tcp_tso_segs(sk, mss_now); |
fe067e8ab [TCP]: Abstract o... |
2265 |
while ((skb = tcp_send_head(sk))) { |
c8ac37746 [TCP]: Fix bug #5... |
2266 |
unsigned int limit; |
79861919b tcp: fix TCP_REPA... |
2267 2268 2269 2270 |
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { /* "skb_mstamp_ns" is used as a start point for the retransmit timer */ skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache; list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); |
bf50b606c tcp: repaired skb... |
2271 |
tcp_init_tso_segs(skb, mss_now); |
79861919b tcp: fix TCP_REPA... |
2272 2273 |
goto repair; /* Skip network transmission */ } |
218af599f tcp: internal imp... |
2274 2275 |
if (tcp_pacing_check(sk)) break; |
5bbb432c8 tcp: tcp_set_skb_... |
2276 |
tso_segs = tcp_init_tso_segs(skb, mss_now); |
c1b4a7e69 [TCP]: Move to ne... |
2277 |
BUG_ON(!tso_segs); |
aa93466bd [TCP]: Eliminate ... |
2278 |
|
b68e9f857 [PATCH] tcp: fix ... |
2279 |
cwnd_quota = tcp_cwnd_test(tp, skb); |
6ba8a3b19 tcp: Tail loss pr... |
2280 2281 2282 2283 2284 2285 2286 |
if (!cwnd_quota) { if (push_one == 2) /* Force out a loss probe pkt. */ cwnd_quota = 1; else break; } |
b68e9f857 [PATCH] tcp: fix ... |
2287 |
|
5615f8861 tcp: instrument h... |
2288 2289 |
if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) { is_rwnd_limited = true; |
b68e9f857 [PATCH] tcp: fix ... |
2290 |
break; |
5615f8861 tcp: instrument h... |
2291 |
} |
b68e9f857 [PATCH] tcp: fix ... |
2292 |
|
d6a4e26af tcp: tcp_tso_auto... |
2293 |
if (tso_segs == 1) { |
c1b4a7e69 [TCP]: Move to ne... |
2294 2295 2296 2297 2298 |
if (unlikely(!tcp_nagle_test(tp, skb, mss_now, (tcp_skb_is_last(sk, skb) ? nonagle : TCP_NAGLE_PUSH)))) break; } else { |
ca8a22634 tcp: make cwnd-li... |
2299 |
if (!push_one && |
605ad7f18 tcp: refine TSO a... |
2300 |
tcp_tso_should_defer(sk, skb, &is_cwnd_limited, |
f9bfe4e6a tcp: lack of avai... |
2301 |
&is_rwnd_limited, max_segs)) |
c1b4a7e69 [TCP]: Move to ne... |
2302 2303 |
break; } |
aa93466bd [TCP]: Eliminate ... |
2304 |
|
605ad7f18 tcp: refine TSO a... |
2305 |
limit = mss_now; |
d6a4e26af tcp: tcp_tso_auto... |
2306 |
if (tso_segs > 1 && !tcp_urg_mode(tp)) |
605ad7f18 tcp: refine TSO a... |
2307 2308 2309 2310 2311 2312 2313 |
limit = tcp_mss_split_point(sk, skb, mss_now, min_t(unsigned int, cwnd_quota, max_segs), nonagle); if (skb->len > limit && |
564833419 tcp: remove tcp_q... |
2314 |
unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) |
605ad7f18 tcp: refine TSO a... |
2315 |
break; |
f9616c35a tcp: implement TS... |
2316 2317 |
if (tcp_small_queue_check(sk, skb, 0)) break; |
c9eeec26e tcp: TSQ can use ... |
2318 |
|
703761d85 tcp: do not send ... |
2319 2320 2321 2322 2323 2324 2325 |
/* Argh, we hit an empty skb(), presumably a thread * is sleeping in sendmsg()/sk_stream_wait_memory(). * We do not want to send a pure-ack packet and have * a strange looking rtx queue with empty packet(s). */ if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) break; |
d5dd9175b tcp: use tcp_writ... |
2326 |
if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) |
92df7b518 [TCP]: tcp_write_... |
2327 |
break; |
1da177e4c Linux-2.6.12-rc2 |
2328 |
|
ec3423257 tcp: fix retransm... |
2329 |
repair: |
92df7b518 [TCP]: tcp_write_... |
2330 2331 2332 |
/* Advance the send_head. This one is sent out. * This call will increment packets_out. */ |
66f5fe624 [TCP]: Rename upd... |
2333 |
tcp_event_new_data_sent(sk, skb); |
1da177e4c Linux-2.6.12-rc2 |
2334 |
|
92df7b518 [TCP]: tcp_write_... |
2335 |
tcp_minshall_update(tp, mss_now, skb); |
a262f0cdf Proportional Rate... |
2336 |
sent_pkts += tcp_skb_pcount(skb); |
d5dd9175b tcp: use tcp_writ... |
2337 2338 2339 |
if (push_one) break; |
92df7b518 [TCP]: tcp_write_... |
2340 |
} |
1da177e4c Linux-2.6.12-rc2 |
2341 |
|
5615f8861 tcp: instrument h... |
2342 2343 2344 2345 |
if (is_rwnd_limited) tcp_chrono_start(sk, TCP_CHRONO_RWND_LIMITED); else tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED); |
aa93466bd [TCP]: Eliminate ... |
2346 |
if (likely(sent_pkts)) { |
684bad110 tcp: use PRR to r... |
2347 2348 |
if (tcp_in_cwnd_reduction(sk)) tp->prr_out += sent_pkts; |
6ba8a3b19 tcp: Tail loss pr... |
2349 2350 2351 |
/* Send one loss probe per tail loss episode. */ if (push_one != 2) |
ed66dfaf2 tcp: when schedul... |
2352 |
tcp_schedule_loss_probe(sk, false); |
d2e1339f4 tcp: Fix CWV bein... |
2353 |
is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd); |
ca8a22634 tcp: make cwnd-li... |
2354 |
tcp_cwnd_validate(sk, is_cwnd_limited); |
a2a385d62 tcp: bool convers... |
2355 |
return false; |
1da177e4c Linux-2.6.12-rc2 |
2356 |
} |
75c119afe tcp: implement rb... |
2357 |
return !tp->packets_out && !tcp_write_queue_empty(sk); |
6ba8a3b19 tcp: Tail loss pr... |
2358 |
} |
ed66dfaf2 tcp: when schedul... |
2359 |
bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto) |
6ba8a3b19 tcp: Tail loss pr... |
2360 2361 2362 |
{ struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); |
a2815817f tcp: enable xmit ... |
2363 |
u32 timeout, rto_delta_us; |
2ae21cf52 tcp: Namespace-if... |
2364 |
int early_retrans; |
6ba8a3b19 tcp: Tail loss pr... |
2365 |
|
6ba8a3b19 tcp: Tail loss pr... |
2366 2367 2368 |
/* Don't do any loss probe on a Fast Open connection before 3WHS * finishes. */ |
d983ea6f1 tcp: add rcu prot... |
2369 |
if (rcu_access_pointer(tp->fastopen_rsk)) |
6ba8a3b19 tcp: Tail loss pr... |
2370 |
return false; |
2ae21cf52 tcp: Namespace-if... |
2371 |
early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans; |
6ba8a3b19 tcp: Tail loss pr... |
2372 |
/* Schedule a loss probe in 2*RTT for SACK capable connections |
b4f70c3d4 tcp: allow TLP in... |
2373 |
* not in loss recovery, that are either limited by cwnd or application. |
6ba8a3b19 tcp: Tail loss pr... |
2374 |
*/ |
2ae21cf52 tcp: Namespace-if... |
2375 |
if ((early_retrans != 3 && early_retrans != 4) || |
bec41a11d tcp: remove early... |
2376 |
!tp->packets_out || !tcp_is_sack(tp) || |
b4f70c3d4 tcp: allow TLP in... |
2377 2378 |
(icsk->icsk_ca_state != TCP_CA_Open && icsk->icsk_ca_state != TCP_CA_CWR)) |
6ba8a3b19 tcp: Tail loss pr... |
2379 |
return false; |
bb4d991a2 tcp: adjust tail ... |
2380 |
/* Probe timeout is 2*rtt. Add minimum RTO to account |
f9b995822 tcp: send loss pr... |
2381 2382 |
* for delayed ack when there's one outstanding packet. If no RTT * sample is available then probe after TCP_TIMEOUT_INIT. |
6ba8a3b19 tcp: Tail loss pr... |
2383 |
*/ |
bb4d991a2 tcp: adjust tail ... |
2384 2385 2386 2387 2388 2389 2390 2391 2392 |
if (tp->srtt_us) { timeout = usecs_to_jiffies(tp->srtt_us >> 2); if (tp->packets_out == 1) timeout += TCP_RTO_MIN; else timeout += TCP_TIMEOUT_MIN; } else { timeout = TCP_TIMEOUT_INIT; } |
6ba8a3b19 tcp: Tail loss pr... |
2393 |
|
a2815817f tcp: enable xmit ... |
2394 |
/* If the RTO formula yields an earlier time, then use that time. */ |
ed66dfaf2 tcp: when schedul... |
2395 2396 2397 |
rto_delta_us = advancing_rto ? jiffies_to_usecs(inet_csk(sk)->icsk_rto) : tcp_rto_delta_us(sk); /* How far in future is RTO? */ |
a2815817f tcp: enable xmit ... |
2398 2399 |
if (rto_delta_us > 0) timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us)); |
6ba8a3b19 tcp: Tail loss pr... |
2400 |
|
3f80e08f4 tcp: add tcp_rese... |
2401 2402 |
tcp_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, TCP_RTO_MAX, NULL); |
6ba8a3b19 tcp: Tail loss pr... |
2403 2404 |
return true; } |
1f3279ae0 tcp: avoid retran... |
2405 2406 2407 |
/* Thanks to skb fast clones, we can detect if a prior transmit of * a packet is still in a qdisc or driver queue. * In this case, there is very little point doing a retransmit ! |
1f3279ae0 tcp: avoid retran... |
2408 2409 2410 2411 |
*/ static bool skb_still_in_host_queue(const struct sock *sk, const struct sk_buff *skb) { |
39bb5e628 net: skb_fclone_b... |
2412 |
if (unlikely(skb_fclone_busy(sk, skb))) { |
c10d9310e tcp: do not assum... |
2413 2414 |
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); |
1f3279ae0 tcp: avoid retran... |
2415 2416 2417 2418 |
return true; } return false; } |
b340b2645 tcp: TLP retransm... |
2419 |
/* When probe timeout (PTO) fires, try send a new segment if possible, else |
6ba8a3b19 tcp: Tail loss pr... |
2420 2421 2422 2423 |
* retransmit the last segment. */ void tcp_send_loss_probe(struct sock *sk) { |
9b717a8d2 tcp: TLP loss det... |
2424 |
struct tcp_sock *tp = tcp_sk(sk); |
6ba8a3b19 tcp: Tail loss pr... |
2425 2426 2427 |
struct sk_buff *skb; int pcount; int mss = tcp_current_mss(sk); |
6ba8a3b19 tcp: Tail loss pr... |
2428 |
|
b340b2645 tcp: TLP retransm... |
2429 |
skb = tcp_send_head(sk); |
75c119afe tcp: implement rb... |
2430 2431 2432 2433 2434 2435 |
if (skb && tcp_snd_wnd_test(tp, skb, mss)) { pcount = tp->packets_out; tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); if (tp->packets_out > pcount) goto probe_sent; goto rearm_timer; |
6ba8a3b19 tcp: Tail loss pr... |
2436 |
} |
75c119afe tcp: implement rb... |
2437 |
skb = skb_rb_last(&sk->tcp_rtx_queue); |
b2b7af861 tcp: fix NULL ref... |
2438 2439 2440 2441 2442 2443 2444 2445 |
if (unlikely(!skb)) { WARN_ONCE(tp->packets_out, "invalid inflight: %u state %u cwnd %u mss %d ", tp->packets_out, sk->sk_state, tp->snd_cwnd, mss); inet_csk(sk)->icsk_pending = 0; return; } |
6ba8a3b19 tcp: Tail loss pr... |
2446 |
|
9b717a8d2 tcp: TLP loss det... |
2447 2448 2449 |
/* At most one outstanding TLP retransmission. */ if (tp->tlp_high_seq) goto rearm_timer; |
1f3279ae0 tcp: avoid retran... |
2450 2451 |
if (skb_still_in_host_queue(sk, skb)) goto rearm_timer; |
6ba8a3b19 tcp: Tail loss pr... |
2452 2453 2454 2455 2456 |
pcount = tcp_skb_pcount(skb); if (WARN_ON(!pcount)) goto rearm_timer; if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { |
75c119afe tcp: implement rb... |
2457 2458 |
if (unlikely(tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, (pcount - 1) * mss, mss, |
6cc55e096 tcp: add gfp para... |
2459 |
GFP_ATOMIC))) |
6ba8a3b19 tcp: Tail loss pr... |
2460 |
goto rearm_timer; |
75c119afe tcp: implement rb... |
2461 |
skb = skb_rb_next(skb); |
6ba8a3b19 tcp: Tail loss pr... |
2462 2463 2464 2465 |
} if (WARN_ON(!skb || !tcp_skb_pcount(skb))) goto rearm_timer; |
10d3be569 tcp-tso: do not s... |
2466 |
if (__tcp_retransmit_skb(sk, skb, 1)) |
b340b2645 tcp: TLP retransm... |
2467 |
goto rearm_timer; |
6ba8a3b19 tcp: Tail loss pr... |
2468 |
|
9b717a8d2 tcp: TLP loss det... |
2469 |
/* Record snd_nxt for loss detection. */ |
b340b2645 tcp: TLP retransm... |
2470 |
tp->tlp_high_seq = tp->snd_nxt; |
9b717a8d2 tcp: TLP loss det... |
2471 |
|
b340b2645 tcp: TLP retransm... |
2472 |
probe_sent: |
c10d9310e tcp: do not assum... |
2473 |
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES); |
b340b2645 tcp: TLP retransm... |
2474 2475 |
/* Reset s.t. tcp_rearm_rto will restart timer from now */ inet_csk(sk)->icsk_pending = 0; |
6ba8a3b19 tcp: Tail loss pr... |
2476 |
rearm_timer: |
fcd16c0a9 tcp: don't extend... |
2477 |
tcp_rearm_rto(sk); |
1da177e4c Linux-2.6.12-rc2 |
2478 |
} |
a762a9800 [TCP]: Kill extra... |
2479 2480 2481 2482 |
/* Push out any pending frames which were held back due to * TCP_CORK or attempt at coalescing tiny packets. * The socket must be locked by the caller. */ |
9e412ba76 [TCP]: Sed magic ... |
2483 2484 |
void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle) |
a762a9800 [TCP]: Kill extra... |
2485 |
{ |
726e07a8a tcp: move some pa... |
2486 2487 2488 2489 2490 2491 |
/* If we are closed, the bytes will have to remain here. * In time closedown will finish, we empty the write queue and * all will be happy. */ if (unlikely(sk->sk_state == TCP_CLOSE)) return; |
99a1dec70 net: introduce sk... |
2492 |
if (tcp_write_xmit(sk, cur_mss, nonagle, 0, |
7450aaf61 tcp: suppress too... |
2493 |
sk_gfp_mask(sk, GFP_ATOMIC))) |
726e07a8a tcp: move some pa... |
2494 |
tcp_check_probe_timer(sk); |
a762a9800 [TCP]: Kill extra... |
2495 |
} |
c1b4a7e69 [TCP]: Move to ne... |
2496 2497 2498 2499 2500 |
/* Send _single_ skb sitting at the send head. This function requires * true push pending frames to setup probe timer etc. */ void tcp_push_one(struct sock *sk, unsigned int mss_now) { |
fe067e8ab [TCP]: Abstract o... |
2501 |
struct sk_buff *skb = tcp_send_head(sk); |
c1b4a7e69 [TCP]: Move to ne... |
2502 2503 |
BUG_ON(!skb || skb->len < mss_now); |
d5dd9175b tcp: use tcp_writ... |
2504 |
tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation); |
c1b4a7e69 [TCP]: Move to ne... |
2505 |
} |
1da177e4c Linux-2.6.12-rc2 |
2506 2507 |
/* This function returns the amount that we can raise the * usable window based on the following constraints |
e905a9eda [NET] IPV4: Fix w... |
2508 |
* |
1da177e4c Linux-2.6.12-rc2 |
2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 |
* 1. The window can never be shrunk once it is offered (RFC 793) * 2. We limit memory per socket * * RFC 1122: * "the suggested [SWS] avoidance algorithm for the receiver is to keep * RECV.NEXT + RCV.WIN fixed until: * RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)" * * i.e. don't raise the right edge of the window until you can raise * it at least MSS bytes. * * Unfortunately, the recommended algorithm breaks header prediction, * since header prediction assumes th->window stays fixed. * * Strictly speaking, keeping th->window fixed violates the receiver * side SWS prevention criteria. The problem is that under this rule * a stream of single byte packets will cause the right side of the * window to always advance by a single byte. |
e905a9eda [NET] IPV4: Fix w... |
2527 |
* |
1da177e4c Linux-2.6.12-rc2 |
2528 2529 |
* Of course, if the sender implements sender side SWS prevention * then this will not be a problem. |
e905a9eda [NET] IPV4: Fix w... |
2530 |
* |
1da177e4c Linux-2.6.12-rc2 |
2531 |
* BSD seems to make the following compromise: |
e905a9eda [NET] IPV4: Fix w... |
2532 |
* |
1da177e4c Linux-2.6.12-rc2 |
2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 |
* If the free space is less than the 1/4 of the maximum * space available and the free space is less than 1/2 mss, * then set the window to 0. * [ Actually, bsd uses MSS and 1/4 of maximal _window_ ] * Otherwise, just prevent the window from shrinking * and from being larger than the largest representable value. * * This prevents incremental opening of the window in the regime * where TCP is limited by the speed of the reader side taking * data out of the TCP receive queue. It does nothing about * those cases where the window is constrained on the sender side * because the pipeline is full. * * BSD also seems to "accidentally" limit itself to windows that are a * multiple of MSS, at least until the free space gets quite small. * This would appear to be a side effect of the mbuf implementation. * Combining these two algorithms results in the observed behavior * of having a fixed window size at almost all times. * * Below we obtain similar behavior by forcing the offered window to * a multiple of the mss when it is feasible to do so. * * Note, we don't "adjust" for TIMESTAMP or SACK option bytes. * Regular options like TIMESTAMP are taken into account. */ u32 __tcp_select_window(struct sock *sk) { |
463c84b97 [NET]: Introduce ... |
2560 |
struct inet_connection_sock *icsk = inet_csk(sk); |
1da177e4c Linux-2.6.12-rc2 |
2561 |
struct tcp_sock *tp = tcp_sk(sk); |
caa20d9ab [TCP]: spelling f... |
2562 |
/* MSS for the peer's data. Previous versions used mss_clamp |
1da177e4c Linux-2.6.12-rc2 |
2563 2564 2565 2566 2567 |
* here. I don't know if the value based on our guesses * of peer's MSS is better for the performance. It's more correct * but may be worse for the performance because of rcv_mss * fluctuations. --SAW 1998/11/1 */ |
463c84b97 [NET]: Introduce ... |
2568 |
int mss = icsk->icsk_ack.rcv_mss; |
1da177e4c Linux-2.6.12-rc2 |
2569 |
int free_space = tcp_space(sk); |
86c1a0456 tcp: use zero-win... |
2570 2571 |
int allowed_space = tcp_full_space(sk); int full_space = min_t(int, tp->window_clamp, allowed_space); |
1da177e4c Linux-2.6.12-rc2 |
2572 |
int window; |
06425c308 tcp: fix 0 divide... |
2573 |
if (unlikely(mss > full_space)) { |
e905a9eda [NET] IPV4: Fix w... |
2574 |
mss = full_space; |
06425c308 tcp: fix 0 divide... |
2575 2576 2577 |
if (mss <= 0) return 0; } |
b92edbe0b [TCP] Avoid two d... |
2578 |
if (free_space < (full_space >> 1)) { |
463c84b97 [NET]: Introduce ... |
2579 |
icsk->icsk_ack.quick = 0; |
1da177e4c Linux-2.6.12-rc2 |
2580 |
|
b8da51ebb tcp: introduce tc... |
2581 |
if (tcp_under_memory_pressure(sk)) |
056834d9f [TCP]: cleanup tc... |
2582 2583 |
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); |
1da177e4c Linux-2.6.12-rc2 |
2584 |
|
86c1a0456 tcp: use zero-win... |
2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 |
/* free_space might become our new window, make sure we don't * increase it due to wscale. */ free_space = round_down(free_space, 1 << tp->rx_opt.rcv_wscale); /* if free space is less than mss estimate, or is below 1/16th * of the maximum allowed, try to move to zero-window, else * tcp_clamp_window() will grow rcv buf up to tcp_rmem[2], and * new incoming data is dropped due to memory limits. * With large window, mss test triggers way too late in order * to announce zero window in time before rmem limit kicks in. */ if (free_space < (allowed_space >> 4) || free_space < mss) |
1da177e4c Linux-2.6.12-rc2 |
2598 2599 2600 2601 2602 2603 2604 2605 2606 |
return 0; } if (free_space > tp->rcv_ssthresh) free_space = tp->rcv_ssthresh; /* Don't do rounding if we are using window scaling, since the * scaled window will not line up with the MSS boundary anyway. */ |
1da177e4c Linux-2.6.12-rc2 |
2607 2608 2609 2610 2611 2612 2613 |
if (tp->rx_opt.rcv_wscale) { window = free_space; /* Advertise enough space so that it won't get scaled away. * Import case: prevent zero window announcement if * 1<<rcv_wscale > mss. */ |
1935299d9 net: tcp: Refine ... |
2614 |
window = ALIGN(window, (1 << tp->rx_opt.rcv_wscale)); |
1da177e4c Linux-2.6.12-rc2 |
2615 |
} else { |
1935299d9 net: tcp: Refine ... |
2616 |
window = tp->rcv_wnd; |
1da177e4c Linux-2.6.12-rc2 |
2617 2618 2619 2620 2621 2622 2623 2624 2625 |
/* Get the largest window that is a nice multiple of mss. * Window clamp already applied above. * If our current window offering is within 1 mss of the * free space we just keep it. This prevents the divide * and multiply from happening most of the time. * We also don't do any window rounding when the free space * is too small. */ if (window <= free_space - mss || window > free_space) |
1935299d9 net: tcp: Refine ... |
2626 |
window = rounddown(free_space, mss); |
84565070e [TCP]: Do receive... |
2627 |
else if (mss == full_space && |
b92edbe0b [TCP] Avoid two d... |
2628 |
free_space > window + (full_space >> 1)) |
84565070e [TCP]: Do receive... |
2629 |
window = free_space; |
1da177e4c Linux-2.6.12-rc2 |
2630 2631 2632 2633 |
} return window; } |
cfea5a688 tcp: Merge tx_fla... |
2634 2635 |
void tcp_skb_collapse_tstamp(struct sk_buff *skb, const struct sk_buff *next_skb) |
082ac2d51 tcp: Merge tx_fla... |
2636 |
{ |
0a2cf20c3 tcp: remove SKBTX... |
2637 2638 2639 |
if (unlikely(tcp_has_tx_tstamp(next_skb))) { const struct skb_shared_info *next_shinfo = skb_shinfo(next_skb); |
082ac2d51 tcp: Merge tx_fla... |
2640 |
struct skb_shared_info *shinfo = skb_shinfo(skb); |
0a2cf20c3 tcp: remove SKBTX... |
2641 |
shinfo->tx_flags |= next_shinfo->tx_flags & SKBTX_ANY_TSTAMP; |
082ac2d51 tcp: Merge tx_fla... |
2642 |
shinfo->tskey = next_shinfo->tskey; |
2de8023e7 tcp: Merge txstam... |
2643 2644 |
TCP_SKB_CB(skb)->txstamp_ack |= TCP_SKB_CB(next_skb)->txstamp_ack; |
082ac2d51 tcp: Merge tx_fla... |
2645 2646 |
} } |
4a17fc3ad tcp: collapse mor... |
2647 |
/* Collapses two adjacent SKB's during retransmission. */ |
f8071cde7 tcp: enhance tcp_... |
2648 |
static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) |
1da177e4c Linux-2.6.12-rc2 |
2649 2650 |
{ struct tcp_sock *tp = tcp_sk(sk); |
75c119afe tcp: implement rb... |
2651 |
struct sk_buff *next_skb = skb_rb_next(skb); |
13dde04f5 tcp: remove set b... |
2652 |
int next_skb_size; |
1da177e4c Linux-2.6.12-rc2 |
2653 |
|
058dc3342 [TCP]: reduce tcp... |
2654 |
next_skb_size = next_skb->len; |
1da177e4c Linux-2.6.12-rc2 |
2655 |
|
058dc3342 [TCP]: reduce tcp... |
2656 |
BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); |
a6963a6b3 [TCP]: Re-place h... |
2657 |
|
f8071cde7 tcp: enhance tcp_... |
2658 2659 2660 2661 |
if (next_skb_size) { if (next_skb_size <= skb_availroom(skb)) skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size), next_skb_size); |
3b4929f65 tcp: limit payloa... |
2662 |
else if (!tcp_skb_shift(skb, next_skb, 1, next_skb_size)) |
f8071cde7 tcp: enhance tcp_... |
2663 2664 |
return false; } |
2b7cda9c3 tcp: fix tcp_mtu_... |
2665 |
tcp_highest_sack_replace(sk, next_skb, skb); |
1da177e4c Linux-2.6.12-rc2 |
2666 |
|
058dc3342 [TCP]: reduce tcp... |
2667 2668 |
/* Update sequence range on original skb. */ TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; |
1da177e4c Linux-2.6.12-rc2 |
2669 |
|
e6c7d0857 tcp: drop unneces... |
2670 |
/* Merge over control information. This moves PSH/FIN etc. over */ |
4de075e04 tcp: rename tcp_s... |
2671 |
TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags; |
058dc3342 [TCP]: reduce tcp... |
2672 2673 2674 2675 2676 |
/* All done, get rid of second SKB and account for it so * packet counting does not break. */ TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS; |
a643b5d41 tcp: Handle eor b... |
2677 |
TCP_SKB_CB(skb)->eor = TCP_SKB_CB(next_skb)->eor; |
058dc3342 [TCP]: reduce tcp... |
2678 2679 |
/* changed transmit queue under us so clear hints */ |
ef9da47c7 tcp: don't clear ... |
2680 2681 2682 |
tcp_clear_retrans_hints_partial(tp); if (next_skb == tp->retransmit_skb_hint) tp->retransmit_skb_hint = skb; |
058dc3342 [TCP]: reduce tcp... |
2683 |
|
797108d13 tcp: add helper f... |
2684 |
tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb)); |
082ac2d51 tcp: Merge tx_fla... |
2685 |
tcp_skb_collapse_tstamp(skb, next_skb); |
75c119afe tcp: implement rb... |
2686 |
tcp_rtx_queue_unlink_and_free(next_skb, sk); |
f8071cde7 tcp: enhance tcp_... |
2687 |
return true; |
1da177e4c Linux-2.6.12-rc2 |
2688 |
} |
67edfef78 TCP: Add comments... |
2689 |
/* Check if coalescing SKBs is legal. */ |
a2a385d62 tcp: bool convers... |
2690 |
static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb) |
4a17fc3ad tcp: collapse mor... |
2691 2692 |
{ if (tcp_skb_pcount(skb) > 1) |
a2a385d62 tcp: bool convers... |
2693 |
return false; |
4a17fc3ad tcp: collapse mor... |
2694 |
if (skb_cloned(skb)) |
a2a385d62 tcp: bool convers... |
2695 |
return false; |
2331ccc5b tcp: enhance tcp ... |
2696 |
/* Some heuristics for collapsing over SACK'd could be invented */ |
4a17fc3ad tcp: collapse mor... |
2697 |
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) |
a2a385d62 tcp: bool convers... |
2698 |
return false; |
4a17fc3ad tcp: collapse mor... |
2699 |
|
a2a385d62 tcp: bool convers... |
2700 |
return true; |
4a17fc3ad tcp: collapse mor... |
2701 |
} |
67edfef78 TCP: Add comments... |
2702 2703 2704 |
/* Collapse packets in the retransmit queue to make to create * less packets on the wire. This is only done on retransmission. */ |
4a17fc3ad tcp: collapse mor... |
2705 2706 2707 2708 2709 |
static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, int space) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb = to, *tmp; |
a2a385d62 tcp: bool convers... |
2710 |
bool first = true; |
4a17fc3ad tcp: collapse mor... |
2711 |
|
e0a1e5b51 tcp: Namespace-if... |
2712 |
if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse) |
4a17fc3ad tcp: collapse mor... |
2713 |
return; |
4de075e04 tcp: rename tcp_s... |
2714 |
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) |
4a17fc3ad tcp: collapse mor... |
2715 |
return; |
75c119afe tcp: implement rb... |
2716 |
skb_rbtree_walk_from_safe(skb, tmp) { |
4a17fc3ad tcp: collapse mor... |
2717 2718 |
if (!tcp_can_collapse(sk, skb)) break; |
a643b5d41 tcp: Handle eor b... |
2719 2720 |
if (!tcp_skb_can_collapse_to(to)) break; |
4a17fc3ad tcp: collapse mor... |
2721 2722 2723 |
space -= skb->len; if (first) { |
a2a385d62 tcp: bool convers... |
2724 |
first = false; |
4a17fc3ad tcp: collapse mor... |
2725 2726 2727 2728 2729 |
continue; } if (space < 0) break; |
4a17fc3ad tcp: collapse mor... |
2730 2731 2732 |
if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp))) break; |
f8071cde7 tcp: enhance tcp_... |
2733 2734 |
if (!tcp_collapse_retrans(sk, to)) break; |
4a17fc3ad tcp: collapse mor... |
2735 2736 |
} } |
1da177e4c Linux-2.6.12-rc2 |
2737 2738 2739 2740 |
/* This retransmits one SKB. Policy decisions and retransmit queue * state updates are done by the caller. Returns non-zero if an * error occurred which prevented the send. */ |
10d3be569 tcp-tso: do not s... |
2741 |
int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) |
1da177e4c Linux-2.6.12-rc2 |
2742 |
{ |
5d424d5a6 [TCP]: MTU probing |
2743 |
struct inet_connection_sock *icsk = inet_csk(sk); |
10d3be569 tcp-tso: do not s... |
2744 |
struct tcp_sock *tp = tcp_sk(sk); |
7d227cd23 tcp: TCP connecti... |
2745 |
unsigned int cur_mss; |
10d3be569 tcp-tso: do not s... |
2746 |
int diff, len, err; |
1da177e4c Linux-2.6.12-rc2 |
2747 |
|
10d3be569 tcp-tso: do not s... |
2748 2749 |
/* Inconclusive MTU probe */ if (icsk->icsk_mtup.probe_size) |
5d424d5a6 [TCP]: MTU probing |
2750 |
icsk->icsk_mtup.probe_size = 0; |
5d424d5a6 [TCP]: MTU probing |
2751 |
|
1da177e4c Linux-2.6.12-rc2 |
2752 |
/* Do not sent more than we queued. 1/4 is reserved for possible |
caa20d9ab [TCP]: spelling f... |
2753 |
* copying overhead: fragmentation, tunneling, mangling etc. |
1da177e4c Linux-2.6.12-rc2 |
2754 |
*/ |
14afee4b6 net: convert sock... |
2755 |
if (refcount_read(&sk->sk_wmem_alloc) > |
ffb4d6c85 tcp: fix overflow... |
2756 2757 |
min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) |
1da177e4c Linux-2.6.12-rc2 |
2758 |
return -EAGAIN; |
1f3279ae0 tcp: avoid retran... |
2759 2760 |
if (skb_still_in_host_queue(sk, skb)) return -EBUSY; |
1da177e4c Linux-2.6.12-rc2 |
2761 |
if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { |
7f582b248 tcp: purge write ... |
2762 2763 2764 2765 |
if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) { WARN_ON_ONCE(1); return -EINVAL; } |
1da177e4c Linux-2.6.12-rc2 |
2766 2767 2768 |
if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) return -ENOMEM; } |
7d227cd23 tcp: TCP connecti... |
2769 2770 |
if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) return -EHOSTUNREACH; /* Routing failure or similar. */ |
0c54b85f2 tcp: simplify tcp... |
2771 |
cur_mss = tcp_current_mss(sk); |
7d227cd23 tcp: TCP connecti... |
2772 |
|
1da177e4c Linux-2.6.12-rc2 |
2773 2774 2775 2776 2777 |
/* If receiver has shrunk his window, and skb is out of * new window, do not retransmit it. The exception is the * case, when window is shrunk to zero. In this case * our retransmit serves as a zero window probe. */ |
9d4fb27db net/ipv4: Move &&... |
2778 2779 |
if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) && TCP_SKB_CB(skb)->seq != tp->snd_una) |
1da177e4c Linux-2.6.12-rc2 |
2780 |
return -EAGAIN; |
10d3be569 tcp-tso: do not s... |
2781 2782 |
len = cur_mss * segs; if (skb->len > len) { |
75c119afe tcp: implement rb... |
2783 2784 |
if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len, cur_mss, GFP_ATOMIC)) |
1da177e4c Linux-2.6.12-rc2 |
2785 |
return -ENOMEM; /* We'll try again later. */ |
02276f3c9 tcp: fix corner c... |
2786 |
} else { |
10d3be569 tcp-tso: do not s... |
2787 2788 |
if (skb_unclone(skb, GFP_ATOMIC)) return -ENOMEM; |
9eb9362e5 tcp: miscounts du... |
2789 |
|
10d3be569 tcp-tso: do not s... |
2790 2791 2792 2793 2794 2795 2796 |
diff = tcp_skb_pcount(skb); tcp_set_skb_tso_segs(skb, cur_mss); diff -= tcp_skb_pcount(skb); if (diff) tcp_adjust_pcount(sk, skb, diff); if (skb->len < cur_mss) tcp_retrans_try_collapse(sk, skb, cur_mss); |
1da177e4c Linux-2.6.12-rc2 |
2797 |
} |
492135557 tcp: add rfc3168,... |
2798 2799 2800 |
/* RFC3168, section 6.1.1.1. ECN fallback */ if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN) tcp_ecn_clear_syn(sk, skb); |
678550c65 tcp: include loca... |
2801 2802 2803 2804 2805 2806 |
/* Update global and local TCP statistics. */ segs = tcp_skb_pcount(skb); TCP_ADD_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS, segs); if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); tp->total_retrans += segs; |
fb31c9b9f tcp: add data byt... |
2807 |
tp->bytes_retrans += skb->len; |
678550c65 tcp: include loca... |
2808 |
|
50bceae9b tcp: Reallocate h... |
2809 2810 2811 2812 2813 2814 |
/* make sure skb->data is aligned on arches that require it * and check if ack-trimming & collapsing extended the headroom * beyond what csum_start can cover. */ if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) || skb_headroom(skb) >= 0xFFFF)) { |
10a81980f tcp: refresh skb ... |
2815 |
struct sk_buff *nskb; |
e2080072e tcp: new list for... |
2816 2817 2818 2819 2820 |
tcp_skb_tsorted_save(skb) { nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : -ENOBUFS; } tcp_skb_tsorted_restore(skb); |
5889e2c0e tcp: call tcp_rat... |
2821 |
if (!err) { |
a7a256306 tcp: mitigate sch... |
2822 |
tcp_update_skb_after_send(sk, skb, tp->tcp_wstamp_ns); |
5889e2c0e tcp: call tcp_rat... |
2823 2824 |
tcp_rate_skb_sent(sk, skb); } |
117632e64 tcp: take care of... |
2825 |
} else { |
c84a57113 tcp: fix bogus RT... |
2826 |
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
117632e64 tcp: take care of... |
2827 |
} |
c84a57113 tcp: fix bogus RT... |
2828 |
|
7f12422c4 tcp: always times... |
2829 2830 2831 2832 |
/* To avoid taking spuriously low RTT samples based on a timestamp * for a transmit that never happened, always mark EVER_RETRANS */ TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; |
a31ad29e6 bpf: Add BPF_SOCK... |
2833 2834 2835 |
if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG)) tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB, TCP_SKB_CB(skb)->seq, segs, err); |
fc9f35010 tcp: increment re... |
2836 |
if (likely(!err)) { |
e086101b1 tcp: add a tracep... |
2837 |
trace_tcp_retransmit_skb(sk, skb); |
678550c65 tcp: include loca... |
2838 |
} else if (err != -EBUSY) { |
ec641b394 tcp: fix SNMP und... |
2839 |
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs); |
fc9f35010 tcp: increment re... |
2840 |
} |
c84a57113 tcp: fix bogus RT... |
2841 |
return err; |
93b174ad7 tcp: bug fix Fast... |
2842 |
} |
10d3be569 tcp-tso: do not s... |
2843 |
int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) |
93b174ad7 tcp: bug fix Fast... |
2844 2845 |
{ struct tcp_sock *tp = tcp_sk(sk); |
10d3be569 tcp-tso: do not s... |
2846 |
int err = __tcp_retransmit_skb(sk, skb, segs); |
1da177e4c Linux-2.6.12-rc2 |
2847 2848 |
if (err == 0) { |
1da177e4c Linux-2.6.12-rc2 |
2849 |
#if FASTRETRANS_DEBUG > 0 |
056834d9f [TCP]: cleanup tc... |
2850 |
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { |
e87cc4728 net: Convert net_... |
2851 2852 |
net_dbg_ratelimited("retrans_out leaked "); |
1da177e4c Linux-2.6.12-rc2 |
2853 2854 2855 2856 |
} #endif TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS; tp->retrans_out += tcp_skb_pcount(skb); |
1da177e4c Linux-2.6.12-rc2 |
2857 |
} |
6e08d5e3c tcp: fix false un... |
2858 |
|
7ae189759 tcp: always set r... |
2859 2860 2861 |
/* Save stamp of the first (attempted) retransmit. */ if (!tp->retrans_stamp) tp->retrans_stamp = tcp_skb_timestamp(skb); |
6e08d5e3c tcp: fix false un... |
2862 2863 2864 |
if (tp->undo_retrans < 0) tp->undo_retrans = 0; tp->undo_retrans += tcp_skb_pcount(skb); |
1da177e4c Linux-2.6.12-rc2 |
2865 2866 2867 2868 2869 2870 2871 |
return err; } /* This gets called after a retransmit timeout, and the initially * retransmitted data is acknowledged. It tries to continue * resending the rest of the retransmit queue, until either * we've sent it all or the congestion window limit is reached. |
1da177e4c Linux-2.6.12-rc2 |
2872 2873 2874 |
*/ void tcp_xmit_retransmit_queue(struct sock *sk) { |
6687e988d [ICSK]: Move TCP ... |
2875 |
const struct inet_connection_sock *icsk = inet_csk(sk); |
b9f1f1ce8 tcp: fix tcp_xmit... |
2876 |
struct sk_buff *skb, *rtx_head, *hole = NULL; |
1da177e4c Linux-2.6.12-rc2 |
2877 |
struct tcp_sock *tp = tcp_sk(sk); |
840a3cbe8 tcp: remove forwa... |
2878 |
u32 max_segs; |
61eb55f4d tcp: Reorganize s... |
2879 |
int mib_idx; |
6a438bbe6 [TCP]: speed up S... |
2880 |
|
45e77d314 tcp: fix crash in... |
2881 2882 |
if (!tp->packets_out) return; |
b9f1f1ce8 tcp: fix tcp_xmit... |
2883 2884 |
rtx_head = tcp_rtx_queue_head(sk); skb = tp->retransmit_skb_hint ?: rtx_head; |
ed6e7268b tcp: allow conges... |
2885 |
max_segs = tcp_tso_segs(sk, tcp_current_mss(sk)); |
75c119afe tcp: implement rb... |
2886 |
skb_rbtree_walk_from(skb) { |
dca0aaf84 tcp: defer sacked... |
2887 |
__u8 sacked; |
10d3be569 tcp-tso: do not s... |
2888 |
int segs; |
1da177e4c Linux-2.6.12-rc2 |
2889 |
|
218af599f tcp: internal imp... |
2890 2891 |
if (tcp_pacing_check(sk)) break; |
08ebd1721 tcp: remove tp->l... |
2892 |
/* we could do better than to assign each time */ |
51456b291 ipv4: coding styl... |
2893 |
if (!hole) |
0e1c54c2a tcp: reorganize r... |
2894 |
tp->retransmit_skb_hint = skb; |
08ebd1721 tcp: remove tp->l... |
2895 |
|
10d3be569 tcp-tso: do not s... |
2896 2897 |
segs = tp->snd_cwnd - tcp_packets_in_flight(tp); if (segs <= 0) |
08ebd1721 tcp: remove tp->l... |
2898 |
return; |
dca0aaf84 tcp: defer sacked... |
2899 |
sacked = TCP_SKB_CB(skb)->sacked; |
a3d2e9f8e tcp: do not send ... |
2900 2901 2902 2903 |
/* In case tcp_shift_skb_data() have aggregated large skbs, * we need to make sure not sending too bigs TSO packets */ segs = min_t(int, segs, max_segs); |
1da177e4c Linux-2.6.12-rc2 |
2904 |
|
840a3cbe8 tcp: remove forwa... |
2905 2906 |
if (tp->retrans_out >= tp->lost_out) { break; |
0e1c54c2a tcp: reorganize r... |
2907 |
} else if (!(sacked & TCPCB_LOST)) { |
51456b291 ipv4: coding styl... |
2908 |
if (!hole && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED))) |
0e1c54c2a tcp: reorganize r... |
2909 2910 |
hole = skb; continue; |
1da177e4c Linux-2.6.12-rc2 |
2911 |
|
0e1c54c2a tcp: reorganize r... |
2912 2913 2914 2915 2916 2917 |
} else { if (icsk->icsk_ca_state != TCP_CA_Loss) mib_idx = LINUX_MIB_TCPFASTRETRANS; else mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS; } |
1da177e4c Linux-2.6.12-rc2 |
2918 |
|
0e1c54c2a tcp: reorganize r... |
2919 |
if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS)) |
1da177e4c Linux-2.6.12-rc2 |
2920 |
continue; |
f9616c35a tcp: implement TS... |
2921 2922 |
if (tcp_small_queue_check(sk, skb, 1)) return; |
10d3be569 tcp-tso: do not s... |
2923 |
if (tcp_retransmit_skb(sk, skb, segs)) |
0e1c54c2a tcp: reorganize r... |
2924 |
return; |
24ab6bec8 tcp: account all ... |
2925 |
|
de1d65781 tcp: fix under-ac... |
2926 |
NET_ADD_STATS(sock_net(sk), mib_idx, tcp_skb_pcount(skb)); |
1da177e4c Linux-2.6.12-rc2 |
2927 |
|
684bad110 tcp: use PRR to r... |
2928 |
if (tcp_in_cwnd_reduction(sk)) |
a262f0cdf Proportional Rate... |
2929 |
tp->prr_out += tcp_skb_pcount(skb); |
75c119afe tcp: implement rb... |
2930 |
if (skb == rtx_head && |
57dde7f70 tcp: add reorderi... |
2931 |
icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT) |
3f80e08f4 tcp: add tcp_rese... |
2932 2933 2934 2935 |
tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX, skb); |
1da177e4c Linux-2.6.12-rc2 |
2936 2937 |
} } |
d83769a58 tcp: fix possible... |
2938 2939 |
/* We allow to exceed memory limits for FIN packets to expedite * connection tear down and (memory) recovery. |
845704a53 tcp: avoid loopin... |
2940 2941 |
* Otherwise tcp_send_fin() could be tempted to either delay FIN * or even be forced to close flow without any FIN. |
a6c5ea4cc tcp: rename sk_fo... |
2942 2943 |
* In general, we want to allow one skb per socket to avoid hangs * with edge trigger epoll() |
d83769a58 tcp: fix possible... |
2944 |
*/ |
a6c5ea4cc tcp: rename sk_fo... |
2945 |
void sk_forced_mem_schedule(struct sock *sk, int size) |
d83769a58 tcp: fix possible... |
2946 |
{ |
e805605c7 net: tcp_memcontr... |
2947 |
int amt; |
d83769a58 tcp: fix possible... |
2948 2949 2950 2951 2952 |
if (size <= sk->sk_forward_alloc) return; amt = sk_mem_pages(size); sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; |
e805605c7 net: tcp_memcontr... |
2953 |
sk_memory_allocated_add(sk, amt); |
baac50bbc net: tcp_memcontr... |
2954 2955 |
if (mem_cgroup_sockets_enabled && sk->sk_memcg) mem_cgroup_charge_skmem(sk->sk_memcg, amt); |
d83769a58 tcp: fix possible... |
2956 |
} |
845704a53 tcp: avoid loopin... |
2957 2958 |
/* Send a FIN. The caller locks the socket for us. * We should try to send a FIN packet really hard, but eventually give up. |
1da177e4c Linux-2.6.12-rc2 |
2959 2960 2961 |
*/ void tcp_send_fin(struct sock *sk) { |
845704a53 tcp: avoid loopin... |
2962 |
struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk); |
e905a9eda [NET] IPV4: Fix w... |
2963 |
struct tcp_sock *tp = tcp_sk(sk); |
e905a9eda [NET] IPV4: Fix w... |
2964 |
|
845704a53 tcp: avoid loopin... |
2965 2966 2967 2968 |
/* Optimization, tack on the FIN if we have one skb in write queue and * this skb was not yet sent, or we are under memory pressure. * Note: in the latter case, FIN packet will be sent after a timeout, * as TCP stack thinks it has already been transmitted. |
1da177e4c Linux-2.6.12-rc2 |
2969 |
*/ |
75c119afe tcp: implement rb... |
2970 2971 2972 2973 |
if (!tskb && tcp_under_memory_pressure(sk)) tskb = skb_rb_last(&sk->tcp_rtx_queue); if (tskb) { |
845704a53 tcp: avoid loopin... |
2974 2975 |
TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; TCP_SKB_CB(tskb)->end_seq++; |
1da177e4c Linux-2.6.12-rc2 |
2976 |
tp->write_seq++; |
75c119afe tcp: implement rb... |
2977 |
if (tcp_write_queue_empty(sk)) { |
845704a53 tcp: avoid loopin... |
2978 2979 2980 2981 2982 2983 |
/* This means tskb was already sent. * Pretend we included the FIN on previous transmit. * We need to set tp->snd_nxt to the value it would have * if FIN had been sent. This is because retransmit path * does not change tp->snd_nxt. */ |
e0d694d63 tcp: annotate tp-... |
2984 |
WRITE_ONCE(tp->snd_nxt, tp->snd_nxt + 1); |
845704a53 tcp: avoid loopin... |
2985 2986 |
return; } |
1da177e4c Linux-2.6.12-rc2 |
2987 |
} else { |
845704a53 tcp: avoid loopin... |
2988 |
skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); |
d1edc0855 tcp: remove redun... |
2989 |
if (unlikely(!skb)) |
845704a53 tcp: avoid loopin... |
2990 |
return; |
d1edc0855 tcp: remove redun... |
2991 |
|
e2080072e tcp: new list for... |
2992 |
INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); |
d83769a58 tcp: fix possible... |
2993 |
skb_reserve(skb, MAX_TCP_HEADER); |
a6c5ea4cc tcp: rename sk_fo... |
2994 |
sk_forced_mem_schedule(sk, skb->truesize); |
1da177e4c Linux-2.6.12-rc2 |
2995 |
/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ |
e870a8efc [TCP]: Perform se... |
2996 |
tcp_init_nondata_skb(skb, tp->write_seq, |
a3433f35a tcp: unify tcp fl... |
2997 |
TCPHDR_ACK | TCPHDR_FIN); |
1da177e4c Linux-2.6.12-rc2 |
2998 2999 |
tcp_queue_skb(sk, skb); } |
845704a53 tcp: avoid loopin... |
3000 |
__tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF); |
1da177e4c Linux-2.6.12-rc2 |
3001 3002 3003 3004 3005 |
} /* We get here when a process closes a file descriptor (either due to * an explicit close() or as a byproduct of exit()'ing) and there * was unread data in the receive queue. This behavior is recommended |
65bb723c9 [TCP]: Update ref... |
3006 |
* by RFC 2525, section 2.17. -DaveM |
1da177e4c Linux-2.6.12-rc2 |
3007 |
*/ |
dd0fc66fb [PATCH] gfp flags... |
3008 |
void tcp_send_active_reset(struct sock *sk, gfp_t priority) |
1da177e4c Linux-2.6.12-rc2 |
3009 |
{ |
1da177e4c Linux-2.6.12-rc2 |
3010 |
struct sk_buff *skb; |
7cc2b043b net: tcp: Increas... |
3011 |
TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS); |
1da177e4c Linux-2.6.12-rc2 |
3012 3013 3014 |
/* NOTE: No TCP options attached and we never retransmit this. */ skb = alloc_skb(MAX_TCP_HEADER, priority); if (!skb) { |
4e6734447 mib: add net to N... |
3015 |
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); |
1da177e4c Linux-2.6.12-rc2 |
3016 3017 3018 3019 3020 |
return; } /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, MAX_TCP_HEADER); |
e870a8efc [TCP]: Perform se... |
3021 |
tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), |
a3433f35a tcp: unify tcp fl... |
3022 |
TCPHDR_ACK | TCPHDR_RST); |
9a568de48 tcp: switch TCP T... |
3023 |
tcp_mstamp_refresh(tcp_sk(sk)); |
1da177e4c Linux-2.6.12-rc2 |
3024 |
/* Send it off. */ |
dfb4b9dce [TCP] Vegas: time... |
3025 |
if (tcp_transmit_skb(sk, skb, 0, priority)) |
4e6734447 mib: add net to N... |
3026 |
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); |
c24b14c46 tcp: add tracepoi... |
3027 3028 3029 3030 3031 |
/* skb of trace_tcp_send_reset() keeps the skb that caused RST, * skb here is different to the troublesome skb, so use NULL */ trace_tcp_send_reset(sk, NULL); |
1da177e4c Linux-2.6.12-rc2 |
3032 |
} |
67edfef78 TCP: Add comments... |
3033 3034 |
/* Send a crossed SYN-ACK during socket establishment. * WARNING: This routine must only be called when we have already sent |
1da177e4c Linux-2.6.12-rc2 |
3035 3036 3037 3038 3039 3040 |
* a SYN packet that crossed the incoming SYN that caused this routine * to get called. If this assumption fails then the initial rcv_wnd * and rcv_wscale values will not be correct. */ int tcp_send_synack(struct sock *sk) { |
056834d9f [TCP]: cleanup tc... |
3041 |
struct sk_buff *skb; |
1da177e4c Linux-2.6.12-rc2 |
3042 |
|
75c119afe tcp: implement rb... |
3043 |
skb = tcp_rtx_queue_head(sk); |
51456b291 ipv4: coding styl... |
3044 |
if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { |
75c119afe tcp: implement rb... |
3045 3046 |
pr_err("%s: wrong queue state ", __func__); |
1da177e4c Linux-2.6.12-rc2 |
3047 3048 |
return -EFAULT; } |
4de075e04 tcp: rename tcp_s... |
3049 |
if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) { |
1da177e4c Linux-2.6.12-rc2 |
3050 |
if (skb_cloned(skb)) { |
e2080072e tcp: new list for... |
3051 3052 3053 3054 3055 |
struct sk_buff *nskb; tcp_skb_tsorted_save(skb) { nskb = skb_copy(skb, GFP_ATOMIC); } tcp_skb_tsorted_restore(skb); |
51456b291 ipv4: coding styl... |
3056 |
if (!nskb) |
1da177e4c Linux-2.6.12-rc2 |
3057 |
return -ENOMEM; |
e2080072e tcp: new list for... |
3058 |
INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor); |
69486bfa0 tcp: do not leave... |
3059 |
tcp_highest_sack_replace(sk, skb, nskb); |
75c119afe tcp: implement rb... |
3060 |
tcp_rtx_queue_unlink_and_free(skb, sk); |
f4a775d14 net: introduce __... |
3061 |
__skb_header_release(nskb); |
75c119afe tcp: implement rb... |
3062 |
tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb); |
ab4e846a8 tcp: annotate sk-... |
3063 |
sk_wmem_queued_add(sk, nskb->truesize); |
3ab224be6 [NET] CORE: Intro... |
3064 |
sk_mem_charge(sk, nskb->truesize); |
1da177e4c Linux-2.6.12-rc2 |
3065 3066 |
skb = nskb; } |
4de075e04 tcp: rename tcp_s... |
3067 |
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK; |
735d38311 tcp: change TCP_E... |
3068 |
tcp_ecn_send_synack(sk, skb); |
1da177e4c Linux-2.6.12-rc2 |
3069 |
} |
dfb4b9dce [TCP] Vegas: time... |
3070 |
return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
1da177e4c Linux-2.6.12-rc2 |
3071 |
} |
4aea39c11 tcp: tcp_make_syn... |
3072 3073 3074 3075 3076 |
/** * tcp_make_synack - Prepare a SYN-ACK. * sk: listener socket * dst: dst entry attached to the SYNACK * req: request_sock pointer |
4aea39c11 tcp: tcp_make_syn... |
3077 3078 3079 3080 |
* * Allocate one skb and build a SYNACK packet. * @dst is consumed : Caller should not use it again. */ |
5d062de7f tcp: constify tcp... |
3081 |
struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, |
e6b4d1136 TCPCT part 1a: ad... |
3082 |
struct request_sock *req, |
ca6fb0651 tcp: attach SYNAC... |
3083 |
struct tcp_fastopen_cookie *foc, |
b3d051477 tcp: do not mess ... |
3084 |
enum tcp_synack_type synack_type) |
1da177e4c Linux-2.6.12-rc2 |
3085 |
{ |
2e6599cb8 [NET] Generalise ... |
3086 |
struct inet_request_sock *ireq = inet_rsk(req); |
5d062de7f tcp: constify tcp... |
3087 |
const struct tcp_sock *tp = tcp_sk(sk); |
80f03e27a tcp: md5: fix rcu... |
3088 |
struct tcp_md5sig_key *md5 = NULL; |
5d062de7f tcp: constify tcp... |
3089 3090 |
struct tcp_out_options opts; struct sk_buff *skb; |
bd0388ae7 TCPCT part 1f: In... |
3091 |
int tcp_header_size; |
5d062de7f tcp: constify tcp... |
3092 |
struct tcphdr *th; |
f5fff5dc8 tcp: advertise MS... |
3093 |
int mss; |
a842fe142 tcp: add optional... |
3094 |
u64 now; |
1da177e4c Linux-2.6.12-rc2 |
3095 |
|
ca6fb0651 tcp: attach SYNAC... |
3096 |
skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); |
4aea39c11 tcp: tcp_make_syn... |
3097 3098 |
if (unlikely(!skb)) { dst_release(dst); |
1da177e4c Linux-2.6.12-rc2 |
3099 |
return NULL; |
4aea39c11 tcp: tcp_make_syn... |
3100 |
} |
1da177e4c Linux-2.6.12-rc2 |
3101 3102 |
/* Reserve space for headers. */ skb_reserve(skb, MAX_TCP_HEADER); |
b3d051477 tcp: do not mess ... |
3103 3104 |
switch (synack_type) { case TCP_SYNACK_NORMAL: |
9e17f8a47 net: make skb_set... |
3105 |
skb_set_owner_w(skb, req_to_sk(req)); |
b3d051477 tcp: do not mess ... |
3106 3107 3108 3109 3110 3111 3112 |
break; case TCP_SYNACK_COOKIE: /* Under synflood, we do not attach skb to a socket, * to avoid false sharing. */ break; case TCP_SYNACK_FASTOPEN: |
ca6fb0651 tcp: attach SYNAC... |
3113 3114 3115 3116 3117 |
/* sk is a const pointer, because we want to express multiple * cpu might call us concurrently. * sk->sk_wmem_alloc in an atomic, we can promote to rw. */ skb_set_owner_w(skb, (struct sock *)sk); |
b3d051477 tcp: do not mess ... |
3118 |
break; |
ca6fb0651 tcp: attach SYNAC... |
3119 |
} |
4aea39c11 tcp: tcp_make_syn... |
3120 |
skb_dst_set(skb, dst); |
1da177e4c Linux-2.6.12-rc2 |
3121 |
|
3541f9e8b tcp: add tcp_mss_... |
3122 |
mss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); |
f5fff5dc8 tcp: advertise MS... |
3123 |
|
33ad798c9 tcp: options clea... |
3124 |
memset(&opts, 0, sizeof(opts)); |
a842fe142 tcp: add optional... |
3125 |
now = tcp_clock_ns(); |
8b5f12d04 syncookies: fix i... |
3126 3127 |
#ifdef CONFIG_SYN_COOKIES if (unlikely(req->cookie_ts)) |
d3edd06ea tcp: provide earl... |
3128 |
skb->skb_mstamp_ns = cookie_init_timestamp(req); |
8b5f12d04 syncookies: fix i... |
3129 3130 |
else #endif |
9e450c1ec tcp: better SYNAC... |
3131 |
{ |
a842fe142 tcp: add optional... |
3132 |
skb->skb_mstamp_ns = now; |
9e450c1ec tcp: better SYNAC... |
3133 3134 3135 |
if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */ tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb); } |
80f03e27a tcp: md5: fix rcu... |
3136 3137 3138 |
#ifdef CONFIG_TCP_MD5SIG rcu_read_lock(); |
fd3a154a0 tcp: md5: get rid... |
3139 |
md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); |
80f03e27a tcp: md5: fix rcu... |
3140 |
#endif |
58d607d3e tcp: provide skb-... |
3141 |
skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4); |
60e2a7780 tcp: TCP experime... |
3142 3143 |
tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, foc) + sizeof(*th); |
cfb6eeb4c [TCP]: MD5 Signat... |
3144 |
|
aa8223c7b [SK_BUFF]: Introd... |
3145 3146 |
skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); |
1da177e4c Linux-2.6.12-rc2 |
3147 |
|
ea1627c20 tcp: minor optimi... |
3148 |
th = (struct tcphdr *)skb->data; |
1da177e4c Linux-2.6.12-rc2 |
3149 3150 3151 |
memset(th, 0, sizeof(struct tcphdr)); th->syn = 1; th->ack = 1; |
6ac705b18 tcp: remove tcp_e... |
3152 |
tcp_ecn_make_synack(req, th); |
b44084c2c inet: rename ir_l... |
3153 |
th->source = htons(ireq->ir_num); |
634fb979e inet: includes a ... |
3154 |
th->dest = ireq->ir_rmt_port; |
e05a90ec9 net: reflect mark... |
3155 |
skb->mark = ireq->ir_mark; |
3b1177503 tcp: do not mangl... |
3156 3157 |
skb->ip_summed = CHECKSUM_PARTIAL; th->seq = htonl(tcp_rsk(req)->snt_isn); |
8336886f7 tcp: TCP Fast Ope... |
3158 3159 |
/* XXX data is queued and acked as is. No buffer/window check */ th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); |
1da177e4c Linux-2.6.12-rc2 |
3160 3161 |
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ |
ed53d0ab7 net: shrink struc... |
3162 |
th->window = htons(min(req->rsk_rcv_wnd, 65535U)); |
5d062de7f tcp: constify tcp... |
3163 |
tcp_options_write((__be32 *)(th + 1), NULL, &opts); |
1da177e4c Linux-2.6.12-rc2 |
3164 |
th->doff = (tcp_header_size >> 2); |
90bbcc608 net: tcp: rename ... |
3165 |
__TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); |
cfb6eeb4c [TCP]: MD5 Signat... |
3166 3167 3168 |
#ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ |
80f03e27a tcp: md5: fix rcu... |
3169 |
if (md5) |
bd0388ae7 TCPCT part 1f: In... |
3170 |
tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location, |
39f8e58e5 tcp: md5: remove ... |
3171 |
md5, req_to_sk(req), skb); |
80f03e27a tcp: md5: fix rcu... |
3172 |
rcu_read_unlock(); |
cfb6eeb4c [TCP]: MD5 Signat... |
3173 |
#endif |
a842fe142 tcp: add optional... |
3174 3175 |
skb->skb_mstamp_ns = now; tcp_add_tx_delay(skb, tp); |
1da177e4c Linux-2.6.12-rc2 |
3176 3177 |
return skb; } |
4bc2f18ba net/ipv4: EXPORT_... |
3178 |
EXPORT_SYMBOL(tcp_make_synack); |
1da177e4c Linux-2.6.12-rc2 |
3179 |
|
81164413a net: tcp: add per... |
3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 |
static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst) { struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_congestion_ops *ca; u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); if (ca_key == TCP_CA_UNSPEC) return; rcu_read_lock(); ca = tcp_ca_find_key(ca_key); if (likely(ca && try_module_get(ca->owner))) { module_put(icsk->icsk_ca_ops->owner); icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst); icsk->icsk_ca_ops = ca; } rcu_read_unlock(); } |
67edfef78 TCP: Add comments... |
3198 |
/* Do all connect socket setups that can be done AF independent. */ |
f7e56a76a tcp: make local f... |
3199 |
static void tcp_connect_init(struct sock *sk) |
1da177e4c Linux-2.6.12-rc2 |
3200 |
{ |
cf533ea53 tcp: add const qu... |
3201 |
const struct dst_entry *dst = __sk_dst_get(sk); |
1da177e4c Linux-2.6.12-rc2 |
3202 3203 |
struct tcp_sock *tp = tcp_sk(sk); __u8 rcv_wscale; |
13d3b1ebe bpf: Support for ... |
3204 |
u32 rcv_wnd; |
1da177e4c Linux-2.6.12-rc2 |
3205 3206 3207 3208 |
/* We'll fix this up when we get a response from the other end. * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. */ |
5d2ed0521 tcp: Namespaceify... |
3209 3210 3211 |
tp->tcp_header_len = sizeof(struct tcphdr); if (sock_net(sk)->ipv4.sysctl_tcp_timestamps) tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED; |
1da177e4c Linux-2.6.12-rc2 |
3212 |
|
cfb6eeb4c [TCP]: MD5 Signat... |
3213 |
#ifdef CONFIG_TCP_MD5SIG |
00db41243 ipv4: coding styl... |
3214 |
if (tp->af_specific->md5_lookup(sk, sk)) |
cfb6eeb4c [TCP]: MD5 Signat... |
3215 3216 |
tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED; #endif |
1da177e4c Linux-2.6.12-rc2 |
3217 3218 3219 3220 |
/* If user gave his TCP_MAXSEG, record it to clamp */ if (tp->rx_opt.user_mss) tp->rx_opt.mss_clamp = tp->rx_opt.user_mss; tp->max_window = 0; |
5d424d5a6 [TCP]: MTU probing |
3221 |
tcp_mtup_init(sk); |
1da177e4c Linux-2.6.12-rc2 |
3222 |
tcp_sync_mss(sk, dst_mtu(dst)); |
81164413a net: tcp: add per... |
3223 |
tcp_ca_dst_init(sk, dst); |
1da177e4c Linux-2.6.12-rc2 |
3224 3225 |
if (!tp->window_clamp) tp->window_clamp = dst_metric(dst, RTAX_WINDOW); |
3541f9e8b tcp: add tcp_mss_... |
3226 |
tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); |
f5fff5dc8 tcp: advertise MS... |
3227 |
|
1da177e4c Linux-2.6.12-rc2 |
3228 |
tcp_initialize_rcv_mss(sk); |
1da177e4c Linux-2.6.12-rc2 |
3229 |
|
e88c64f0a tcp: allow effect... |
3230 3231 3232 3233 |
/* limit the window selection if the user enforce a smaller rx buffer */ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0)) tp->window_clamp = tcp_full_space(sk); |
13d3b1ebe bpf: Support for ... |
3234 3235 3236 |
rcv_wnd = tcp_rwnd_init_bpf(sk); if (rcv_wnd == 0) rcv_wnd = dst_metric(dst, RTAX_INITRWND); |
ceef9ab6b tcp: Namespace-if... |
3237 |
tcp_select_initial_window(sk, tcp_full_space(sk), |
1da177e4c Linux-2.6.12-rc2 |
3238 3239 3240 |
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, |
9bb37ef00 tcp: Namespaceify... |
3241 |
sock_net(sk)->ipv4.sysctl_tcp_window_scaling, |
31d12926e net: Add rtnetlin... |
3242 |
&rcv_wscale, |
13d3b1ebe bpf: Support for ... |
3243 |
rcv_wnd); |
1da177e4c Linux-2.6.12-rc2 |
3244 3245 3246 3247 3248 3249 3250 |
tp->rx_opt.rcv_wscale = rcv_wscale; tp->rcv_ssthresh = tp->rcv_wnd; sk->sk_err = 0; sock_reset_flag(sk, SOCK_DONE); tp->snd_wnd = 0; |
ee7537b63 tcp: tcp_init_wl ... |
3251 |
tcp_init_wl(tp, 0); |
7f582b248 tcp: purge write ... |
3252 |
tcp_write_queue_purge(sk); |
1da177e4c Linux-2.6.12-rc2 |
3253 3254 |
tp->snd_una = tp->write_seq; tp->snd_sml = tp->write_seq; |
33f5f57ee tcp: kill pointle... |
3255 |
tp->snd_up = tp->write_seq; |
e0d694d63 tcp: annotate tp-... |
3256 |
WRITE_ONCE(tp->snd_nxt, tp->write_seq); |
ee9952831 tcp: Initial repa... |
3257 3258 3259 |
if (likely(!tp->repair)) tp->rcv_nxt = 0; |
c7781a6e3 tcp: initialize r... |
3260 |
else |
70eabf0e1 tcp: use tcp_jiff... |
3261 |
tp->rcv_tstamp = tcp_jiffies32; |
ee9952831 tcp: Initial repa... |
3262 |
tp->rcv_wup = tp->rcv_nxt; |
7db48e983 tcp: annotate tp-... |
3263 |
WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); |
1da177e4c Linux-2.6.12-rc2 |
3264 |
|
8550f328f bpf: Support for ... |
3265 |
inet_csk(sk)->icsk_rto = tcp_timeout_init(sk); |
463c84b97 [NET]: Introduce ... |
3266 |
inet_csk(sk)->icsk_retransmits = 0; |
1da177e4c Linux-2.6.12-rc2 |
3267 3268 |
tcp_clear_retrans(tp); } |
783237e8d net-tcp: Fast Ope... |
3269 3270 3271 3272 3273 3274 |
static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); tcb->end_seq += skb->len; |
f4a775d14 net: introduce __... |
3275 |
__skb_header_release(skb); |
ab4e846a8 tcp: annotate sk-... |
3276 |
sk_wmem_queued_add(sk, skb->truesize); |
783237e8d net-tcp: Fast Ope... |
3277 |
sk_mem_charge(sk, skb->truesize); |
0f3174645 tcp: annotate tp-... |
3278 |
WRITE_ONCE(tp->write_seq, tcb->end_seq); |
783237e8d net-tcp: Fast Ope... |
3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 |
tp->packets_out += tcp_skb_pcount(skb); } /* Build and send a SYN with data and (cached) Fast Open cookie. However, * queue a data-only packet after the regular SYN, such that regular SYNs * are retransmitted on timeouts. Also if the remote SYN-ACK acknowledges * only the SYN sequence, the data are retransmitted in the first ACK. * If cookie is not cached or other error occurs, falls back to send a * regular SYN with Fast Open cookie request option. */ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_request *fo = tp->fastopen_req; |
065263f40 net/tcp-fastopen:... |
3293 |
int space, err = 0; |
355a901e6 tcp: make connect... |
3294 |
struct sk_buff *syn_data; |
aab487435 net-tcp: Fast Ope... |
3295 |
|
67da22d23 net-tcp: Fast Ope... |
3296 |
tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */ |
065263f40 net/tcp-fastopen:... |
3297 |
if (!tcp_fastopen_cookie_check(sk, &tp->rx_opt.mss_clamp, &fo->cookie)) |
783237e8d net-tcp: Fast Ope... |
3298 3299 3300 3301 3302 3303 |
goto fallback; /* MSS for SYN-data is based on cached MSS and bounded by PMTU and * user-MSS. Reserve maximum option space for middleboxes that add * private TCP options. The cost is reduced data space in SYN :( */ |
3541f9e8b tcp: add tcp_mss_... |
3304 |
tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp); |
1b63edd6e tcp: fix SYN-data... |
3305 |
space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - |
783237e8d net-tcp: Fast Ope... |
3306 |
MAX_TCP_OPTION_SPACE; |
f5ddcbbb4 net-tcp: fastopen... |
3307 3308 3309 3310 |
space = min_t(size_t, space, fo->size); /* limit to order-0 allocations */ space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); |
eb9344781 tcp: add a force_... |
3311 |
syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation, false); |
355a901e6 tcp: make connect... |
3312 |
if (!syn_data) |
783237e8d net-tcp: Fast Ope... |
3313 |
goto fallback; |
355a901e6 tcp: make connect... |
3314 3315 |
syn_data->ip_summed = CHECKSUM_PARTIAL; memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); |
07e100f98 tcp: restore fast... |
3316 3317 3318 3319 |
if (space) { int copied = copy_from_iter(skb_put(syn_data, space), space, &fo->data->msg_iter); if (unlikely(!copied)) { |
ba233b347 tcp: fix tcp_send... |
3320 |
tcp_skb_tsorted_anchor_cleanup(syn_data); |
07e100f98 tcp: restore fast... |
3321 3322 3323 3324 3325 3326 3327 |
kfree_skb(syn_data); goto fallback; } if (copied != space) { skb_trim(syn_data, copied); space = copied; } |
f859a4484 tcp: allow zeroco... |
3328 |
skb_zcopy_set(syn_data, fo->uarg, NULL); |
57be5bdad ip: convert tcp_s... |
3329 |
} |
355a901e6 tcp: make connect... |
3330 3331 3332 3333 |
/* No more data pending in inet_wait_for_connect() */ if (space == fo->size) fo->data = NULL; fo->copied = space; |
783237e8d net-tcp: Fast Ope... |
3334 |
|
355a901e6 tcp: make connect... |
3335 |
tcp_connect_queue_skb(sk, syn_data); |
0f87230d1 tcp: instrument h... |
3336 3337 |
if (syn_data->len) tcp_chrono_start(sk, TCP_CHRONO_BUSY); |
783237e8d net-tcp: Fast Ope... |
3338 |
|
355a901e6 tcp: make connect... |
3339 |
err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation); |
783237e8d net-tcp: Fast Ope... |
3340 |
|
d3edd06ea tcp: provide earl... |
3341 |
syn->skb_mstamp_ns = syn_data->skb_mstamp_ns; |
431a91242 tcp: timestamp SY... |
3342 |
|
355a901e6 tcp: make connect... |
3343 3344 3345 3346 3347 3348 3349 3350 |
/* Now full SYN+DATA was cloned and sent (or not), * remove the SYN from the original skb (syn_data) * we keep in write queue in case of a retransmit, as we * also have the SYN packet (with no data) in the same queue. */ TCP_SKB_CB(syn_data)->seq++; TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH; if (!err) { |
67da22d23 net-tcp: Fast Ope... |
3351 |
tp->syn_data = (fo->copied > 0); |
75c119afe tcp: implement rb... |
3352 |
tcp_rbtree_insert(&sk->tcp_rtx_queue, syn_data); |
f19c29e3e tcp: snmp stats f... |
3353 |
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT); |
783237e8d net-tcp: Fast Ope... |
3354 3355 |
goto done; } |
783237e8d net-tcp: Fast Ope... |
3356 |
|
75c119afe tcp: implement rb... |
3357 3358 |
/* data was not sent, put it in write_queue */ __skb_queue_tail(&sk->sk_write_queue, syn_data); |
b5b7db8d6 tcp: fastopen: fi... |
3359 |
tp->packets_out -= tcp_skb_pcount(syn_data); |
783237e8d net-tcp: Fast Ope... |
3360 3361 3362 3363 3364 3365 3366 |
fallback: /* Send a regular SYN with Fast Open cookie request option */ if (fo->cookie.len > 0) fo->cookie.len = 0; err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation); if (err) tp->syn_fastopen = 0; |
783237e8d net-tcp: Fast Ope... |
3367 3368 3369 3370 |
done: fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */ return err; } |
67edfef78 TCP: Add comments... |
3371 |
/* Build a SYN and send it off. */ |
1da177e4c Linux-2.6.12-rc2 |
3372 3373 3374 3375 |
int tcp_connect(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; |
ee5868119 network: tcp_conn... |
3376 |
int err; |
1da177e4c Linux-2.6.12-rc2 |
3377 |
|
de525be2c bpf: Support pass... |
3378 |
tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB, 0, NULL); |
8ba609247 tcp: fastopen: tc... |
3379 3380 3381 |
if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) return -EHOSTUNREACH; /* Routing failure or similar. */ |
1da177e4c Linux-2.6.12-rc2 |
3382 |
tcp_connect_init(sk); |
2b9164771 ipv6: adapt conne... |
3383 3384 3385 3386 |
if (unlikely(tp->repair)) { tcp_finish_connect(sk, NULL); return 0; } |
eb9344781 tcp: add a force_... |
3387 |
buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true); |
355a901e6 tcp: make connect... |
3388 |
if (unlikely(!buff)) |
1da177e4c Linux-2.6.12-rc2 |
3389 |
return -ENOBUFS; |
a3433f35a tcp: unify tcp fl... |
3390 |
tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); |
9a568de48 tcp: switch TCP T... |
3391 3392 |
tcp_mstamp_refresh(tp); tp->retrans_stamp = tcp_time_stamp(tp); |
783237e8d net-tcp: Fast Ope... |
3393 |
tcp_connect_queue_skb(sk, buff); |
735d38311 tcp: change TCP_E... |
3394 |
tcp_ecn_send_syn(sk, buff); |
75c119afe tcp: implement rb... |
3395 |
tcp_rbtree_insert(&sk->tcp_rtx_queue, buff); |
1da177e4c Linux-2.6.12-rc2 |
3396 |
|
783237e8d net-tcp: Fast Ope... |
3397 3398 3399 |
/* Send off SYN; include data in Fast Open. */ err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); |
ee5868119 network: tcp_conn... |
3400 3401 |
if (err == -ECONNREFUSED) return err; |
bd37a0885 [TCP]: SNMPv2 tcp... |
3402 3403 3404 3405 |
/* We change tp->snd_nxt after the tcp_transmit_skb() call * in order to make this packet get counted in tcpOutSegs. */ |
e0d694d63 tcp: annotate tp-... |
3406 |
WRITE_ONCE(tp->snd_nxt, tp->write_seq); |
bd37a0885 [TCP]: SNMPv2 tcp... |
3407 |
tp->pushed_seq = tp->write_seq; |
b5b7db8d6 tcp: fastopen: fi... |
3408 3409 |
buff = tcp_send_head(sk); if (unlikely(buff)) { |
e0d694d63 tcp: annotate tp-... |
3410 |
WRITE_ONCE(tp->snd_nxt, TCP_SKB_CB(buff)->seq); |
b5b7db8d6 tcp: fastopen: fi... |
3411 3412 |
tp->pushed_seq = TCP_SKB_CB(buff)->seq; } |
81cc8a75d mib: add net to T... |
3413 |
TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS); |
1da177e4c Linux-2.6.12-rc2 |
3414 3415 |
/* Timer for repeating the SYN until an answer. */ |
3f421baa4 [NET]: Just move ... |
3416 3417 |
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); |
1da177e4c Linux-2.6.12-rc2 |
3418 3419 |
return 0; } |
4bc2f18ba net/ipv4: EXPORT_... |
3420 |
EXPORT_SYMBOL(tcp_connect); |
1da177e4c Linux-2.6.12-rc2 |
3421 3422 3423 3424 3425 3426 3427 |
/* Send out a delayed ack, the caller does the policy checking * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check() * for details. */ void tcp_send_delayed_ack(struct sock *sk) { |
463c84b97 [NET]: Introduce ... |
3428 3429 |
struct inet_connection_sock *icsk = inet_csk(sk); int ato = icsk->icsk_ack.ato; |
1da177e4c Linux-2.6.12-rc2 |
3430 3431 3432 |
unsigned long timeout; if (ato > TCP_DELACK_MIN) { |
463c84b97 [NET]: Introduce ... |
3433 |
const struct tcp_sock *tp = tcp_sk(sk); |
056834d9f [TCP]: cleanup tc... |
3434 |
int max_ato = HZ / 2; |
1da177e4c Linux-2.6.12-rc2 |
3435 |
|
31954cd8b tcp: Refactor pin... |
3436 |
if (inet_csk_in_pingpong_mode(sk) || |
056834d9f [TCP]: cleanup tc... |
3437 |
(icsk->icsk_ack.pending & ICSK_ACK_PUSHED)) |
1da177e4c Linux-2.6.12-rc2 |
3438 3439 3440 3441 3442 |
max_ato = TCP_DELACK_MAX; /* Slow path, intersegment interval is "high". */ /* If some rtt estimate is known, use it to bound delayed ack. |
463c84b97 [NET]: Introduce ... |
3443 |
* Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements |
1da177e4c Linux-2.6.12-rc2 |
3444 3445 |
* directly. */ |
740b0f184 tcp: switch rtt e... |
3446 3447 3448 |
if (tp->srtt_us) { int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3), TCP_DELACK_MIN); |
1da177e4c Linux-2.6.12-rc2 |
3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 |
if (rtt < max_ato) max_ato = rtt; } ato = min(ato, max_ato); } /* Stay within the limit we were given */ timeout = jiffies + ato; /* Use new timeout only if there wasn't a older one earlier. */ |
463c84b97 [NET]: Introduce ... |
3461 |
if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) { |
1da177e4c Linux-2.6.12-rc2 |
3462 3463 3464 |
/* If delack timer was blocked or is about to expire, * send ACK now. */ |
463c84b97 [NET]: Introduce ... |
3465 3466 |
if (icsk->icsk_ack.blocked || time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) { |
1da177e4c Linux-2.6.12-rc2 |
3467 3468 3469 |
tcp_send_ack(sk); return; } |
463c84b97 [NET]: Introduce ... |
3470 3471 |
if (!time_before(timeout, icsk->icsk_ack.timeout)) timeout = icsk->icsk_ack.timeout; |
1da177e4c Linux-2.6.12-rc2 |
3472 |
} |
463c84b97 [NET]: Introduce ... |
3473 3474 3475 |
icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; icsk->icsk_ack.timeout = timeout; sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); |
1da177e4c Linux-2.6.12-rc2 |
3476 3477 3478 |
} /* This routine sends an ack and also updates the window. */ |
2987babb6 tcp: helpers to s... |
3479 |
void __tcp_send_ack(struct sock *sk, u32 rcv_nxt) |
1da177e4c Linux-2.6.12-rc2 |
3480 |
{ |
058dc3342 [TCP]: reduce tcp... |
3481 |
struct sk_buff *buff; |
1da177e4c Linux-2.6.12-rc2 |
3482 |
|
058dc3342 [TCP]: reduce tcp... |
3483 3484 3485 |
/* If we have been reset, we may not send again. */ if (sk->sk_state == TCP_CLOSE) return; |
1da177e4c Linux-2.6.12-rc2 |
3486 |
|
058dc3342 [TCP]: reduce tcp... |
3487 3488 3489 3490 |
/* We are not putting this on the write queue, so * tcp_transmit_skb() will set the ownership to this * sock. */ |
7450aaf61 tcp: suppress too... |
3491 3492 3493 |
buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN)); if (unlikely(!buff)) { |
058dc3342 [TCP]: reduce tcp... |
3494 3495 3496 3497 3498 |
inet_csk_schedule_ack(sk); inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX); return; |
1da177e4c Linux-2.6.12-rc2 |
3499 |
} |
058dc3342 [TCP]: reduce tcp... |
3500 3501 3502 |
/* Reserve space for headers and prepare control bits. */ skb_reserve(buff, MAX_TCP_HEADER); |
a3433f35a tcp: unify tcp fl... |
3503 |
tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); |
058dc3342 [TCP]: reduce tcp... |
3504 |
|
987819657 tcp: do not pace ... |
3505 3506 3507 |
/* We do not want pure acks influencing TCP Small Queues or fq/pacing * too much. * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784 |
987819657 tcp: do not pace ... |
3508 3509 |
*/ skb_set_tcp_pure_ack(buff); |
058dc3342 [TCP]: reduce tcp... |
3510 |
/* Send it off, this clears delayed acks for us. */ |
2987babb6 tcp: helpers to s... |
3511 3512 |
__tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0, rcv_nxt); } |
27cde44a2 tcp: do not cance... |
3513 |
EXPORT_SYMBOL_GPL(__tcp_send_ack); |
2987babb6 tcp: helpers to s... |
3514 3515 3516 3517 |
void tcp_send_ack(struct sock *sk) { __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt); |
1da177e4c Linux-2.6.12-rc2 |
3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 |
} /* This routine sends a packet with an out of date sequence * number. It assumes the other end will try to ack it. * * Question: what should we make while urgent mode? * 4.4BSD forces sending single byte of data. We cannot send * out of window data, because we have SND.NXT==SND.MAX... * * Current solution: to send TWO zero-length segments in urgent mode: * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is * out-of-date with SND.UNA-1 to probe window. */ |
e520af48c tcp: add TCPWinPr... |
3531 |
static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) |
1da177e4c Linux-2.6.12-rc2 |
3532 3533 3534 3535 3536 |
{ struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; /* We don't queue it, tcp_transmit_skb() sets ownership. */ |
7450aaf61 tcp: suppress too... |
3537 3538 |
skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN)); |
51456b291 ipv4: coding styl... |
3539 |
if (!skb) |
1da177e4c Linux-2.6.12-rc2 |
3540 3541 3542 3543 |
return -1; /* Reserve space for headers and set control bits. */ skb_reserve(skb, MAX_TCP_HEADER); |
1da177e4c Linux-2.6.12-rc2 |
3544 3545 3546 3547 |
/* Use a previous sequence. This should cause the other * end to send an ack. Don't queue or clone SKB, just * send it. */ |
a3433f35a tcp: unify tcp fl... |
3548 |
tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); |
e2e8009ff tcp: remove impro... |
3549 |
NET_INC_STATS(sock_net(sk), mib); |
7450aaf61 tcp: suppress too... |
3550 |
return tcp_transmit_skb(sk, skb, 0, (__force gfp_t)0); |
1da177e4c Linux-2.6.12-rc2 |
3551 |
} |
385e20706 tcp: use tp->tcp_... |
3552 |
/* Called from setsockopt( ... TCP_REPAIR ) */ |
ee9952831 tcp: Initial repa... |
3553 3554 3555 3556 |
void tcp_send_window_probe(struct sock *sk) { if (sk->sk_state == TCP_ESTABLISHED) { tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1; |
9a568de48 tcp: switch TCP T... |
3557 |
tcp_mstamp_refresh(tcp_sk(sk)); |
e520af48c tcp: add TCPWinPr... |
3558 |
tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE); |
ee9952831 tcp: Initial repa... |
3559 3560 |
} } |
67edfef78 TCP: Add comments... |
3561 |
/* Initiate keepalive or window probe from timer. */ |
e520af48c tcp: add TCPWinPr... |
3562 |
int tcp_write_wakeup(struct sock *sk, int mib) |
1da177e4c Linux-2.6.12-rc2 |
3563 |
{ |
058dc3342 [TCP]: reduce tcp... |
3564 3565 |
struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; |
1da177e4c Linux-2.6.12-rc2 |
3566 |
|
058dc3342 [TCP]: reduce tcp... |
3567 3568 |
if (sk->sk_state == TCP_CLOSE) return -1; |
00db41243 ipv4: coding styl... |
3569 3570 |
skb = tcp_send_head(sk); if (skb && before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) { |
058dc3342 [TCP]: reduce tcp... |
3571 |
int err; |
0c54b85f2 tcp: simplify tcp... |
3572 |
unsigned int mss = tcp_current_mss(sk); |
058dc3342 [TCP]: reduce tcp... |
3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 |
unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) tp->pushed_seq = TCP_SKB_CB(skb)->end_seq; /* We are probing the opening of a window * but the window size is != 0 * must have been a result SWS avoidance ( sender ) */ if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq || skb->len > mss) { seg_size = min(seg_size, mss); |
4de075e04 tcp: rename tcp_s... |
3585 |
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; |
75c119afe tcp: implement rb... |
3586 3587 |
if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE, skb, seg_size, mss, GFP_ATOMIC)) |
058dc3342 [TCP]: reduce tcp... |
3588 3589 |
return -1; } else if (!tcp_skb_pcount(skb)) |
5bbb432c8 tcp: tcp_set_skb_... |
3590 |
tcp_set_skb_tso_segs(skb, mss); |
058dc3342 [TCP]: reduce tcp... |
3591 |
|
4de075e04 tcp: rename tcp_s... |
3592 |
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; |
058dc3342 [TCP]: reduce tcp... |
3593 3594 3595 3596 3597 |
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); if (!err) tcp_event_new_data_sent(sk, skb); return err; } else { |
33f5f57ee tcp: kill pointle... |
3598 |
if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF)) |
e520af48c tcp: add TCPWinPr... |
3599 3600 |
tcp_xmit_probe_skb(sk, 1, mib); return tcp_xmit_probe_skb(sk, 0, mib); |
1da177e4c Linux-2.6.12-rc2 |
3601 |
} |
1da177e4c Linux-2.6.12-rc2 |
3602 3603 3604 3605 3606 3607 3608 |
} /* A window probe timeout has occurred. If window is not closed send * a partial packet else a zero probe. */ void tcp_send_probe0(struct sock *sk) { |
463c84b97 [NET]: Introduce ... |
3609 |
struct inet_connection_sock *icsk = inet_csk(sk); |
1da177e4c Linux-2.6.12-rc2 |
3610 |
struct tcp_sock *tp = tcp_sk(sk); |
c6214a97c ipv4: Namespaceif... |
3611 |
struct net *net = sock_net(sk); |
c1d5674f8 tcp: less aggress... |
3612 |
unsigned long timeout; |
1da177e4c Linux-2.6.12-rc2 |
3613 |
int err; |
e520af48c tcp: add TCPWinPr... |
3614 |
err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE); |
1da177e4c Linux-2.6.12-rc2 |
3615 |
|
75c119afe tcp: implement rb... |
3616 |
if (tp->packets_out || tcp_write_queue_empty(sk)) { |
1da177e4c Linux-2.6.12-rc2 |
3617 |
/* Cancel probe timer, if it is not required. */ |
6687e988d [ICSK]: Move TCP ... |
3618 |
icsk->icsk_probes_out = 0; |
463c84b97 [NET]: Introduce ... |
3619 |
icsk->icsk_backoff = 0; |
1da177e4c Linux-2.6.12-rc2 |
3620 3621 |
return; } |
c1d5674f8 tcp: less aggress... |
3622 |
icsk->icsk_probes_out++; |
1da177e4c Linux-2.6.12-rc2 |
3623 |
if (err <= 0) { |
c6214a97c ipv4: Namespaceif... |
3624 |
if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2) |
463c84b97 [NET]: Introduce ... |
3625 |
icsk->icsk_backoff++; |
c1d5674f8 tcp: less aggress... |
3626 |
timeout = tcp_probe0_when(sk, TCP_RTO_MAX); |
1da177e4c Linux-2.6.12-rc2 |
3627 3628 |
} else { /* If packet was not sent due to local congestion, |
c1d5674f8 tcp: less aggress... |
3629 |
* Let senders fight for local resources conservatively. |
1da177e4c Linux-2.6.12-rc2 |
3630 |
*/ |
c1d5674f8 tcp: less aggress... |
3631 3632 3633 |
timeout = TCP_RESOURCE_PROBE_INTERVAL; } tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX, NULL); |
1da177e4c Linux-2.6.12-rc2 |
3634 |
} |
5db92c994 tcp: unify tcp_v4... |
3635 |
|
ea3bea3a1 tcp/dccp: constif... |
3636 |
int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) |
5db92c994 tcp: unify tcp_v4... |
3637 3638 3639 3640 |
{ const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific; struct flowi fl; int res; |
58d607d3e tcp: provide skb-... |
3641 |
tcp_rsk(req)->txhash = net_tx_rndhash(); |
b3d051477 tcp: do not mess ... |
3642 |
res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL); |
5db92c994 tcp: unify tcp_v4... |
3643 |
if (!res) { |
90bbcc608 net: tcp: rename ... |
3644 |
__TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); |
02a1d6e7a net: rename NET_{... |
3645 |
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); |
7e32b4436 tcp: properly acc... |
3646 3647 |
if (unlikely(tcp_passive_fastopen(sk))) tcp_sk(sk)->total_retrans++; |
cf34ce3da tcp: add tracepoi... |
3648 |
trace_tcp_retransmit_synack(sk, req); |
5db92c994 tcp: unify tcp_v4... |
3649 3650 3651 3652 |
} return res; } EXPORT_SYMBOL(tcp_rtx_synack); |