Blame view
net/ipv4/tcp_output.c
107 KB
1da177e4c Linux-2.6.12-rc2 |
1 2 3 4 5 6 7 |
/* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Implementation of the Transmission Control Protocol(TCP). * |
02c30a84e [PATCH] update Ro... |
8 |
* Authors: Ross Biro |
1da177e4c Linux-2.6.12-rc2 |
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * Corey Minyard <wf-rch!minyard@relay.EU.net> * Florian La Roche, <flla@stud.uni-sb.de> * Charles Hedrick, <hedrick@klinzhai.rutgers.edu> * Linus Torvalds, <torvalds@cs.helsinki.fi> * Alan Cox, <gw4pts@gw4pts.ampr.org> * Matthew Dillon, <dillon@apollo.west.oic.com> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * Jorge Cwik, <jorge@laser.satlink.net> */ /* * Changes: Pedro Roque : Retransmit queue handled by TCP. * : Fragmentation on mtu decrease * : Segment collapse on retransmit * : AF independence * * Linus Torvalds : send_delayed_ack * David S. Miller : Charge memory using the right skb * during syn/ack processing. * David S. Miller : Output engine completely rewritten. * Andrea Arcangeli: SYNACK carry ts_recent in tsecr. * Cacophonix Gaul : draft-minshall-nagle-01 * J Hadi Salim : ECN support * */ |
91df42bed net: ipv4 and ipv... |
36 |
#define pr_fmt(fmt) "TCP: " fmt |
1da177e4c Linux-2.6.12-rc2 |
37 38 39 |
#include <net/tcp.h> #include <linux/compiler.h> |
5a0e3ad6a include cleanup: ... |
40 |
#include <linux/gfp.h> |
1da177e4c Linux-2.6.12-rc2 |
41 |
#include <linux/module.h> |
1da177e4c Linux-2.6.12-rc2 |
42 43 |
/* People can turn this off for buggy TCP's found in printers etc. */ |
ab32ea5d8 [NET/IPV4/IPV6]: ... |
44 |
int sysctl_tcp_retrans_collapse __read_mostly = 1; |
1da177e4c Linux-2.6.12-rc2 |
45 |
|
09cb105ea net: clean up net... |
46 |
/* People can turn this on to work with those rare, broken TCPs that |
15d99e02b [TCP]: sysctl to ... |
47 48 |
* interpret the window field as a signed quantity. */ |
ab32ea5d8 [NET/IPV4/IPV6]: ... |
49 |
int sysctl_tcp_workaround_signed_windows __read_mostly = 0; |
15d99e02b [TCP]: sysctl to ... |
50 |
|
c39c4c6ab tcp: double defau... |
51 52 |
/* Default TSQ limit of four TSO segments */ int sysctl_tcp_limit_output_bytes __read_mostly = 262144; |
46d3ceabd tcp: TCP Small Qu... |
53 |
|
1da177e4c Linux-2.6.12-rc2 |
54 55 56 57 |
/* This limits the percentage of the congestion window which we * will allow a single TSO frame to consume. Building TSO frames * which are too large can cause TCP streams to be bursty. */ |
ab32ea5d8 [NET/IPV4/IPV6]: ... |
58 |
int sysctl_tcp_tso_win_divisor __read_mostly = 3; |
1da177e4c Linux-2.6.12-rc2 |
59 |
|
35089bb20 [TCP]: Add tcp_sl... |
60 |
/* By default, RFC2861 behavior. */ |
ab32ea5d8 [NET/IPV4/IPV6]: ... |
61 |
int sysctl_tcp_slow_start_after_idle __read_mostly = 1; |
35089bb20 [TCP]: Add tcp_sl... |
62 |
|
46d3ceabd tcp: TCP Small Qu... |
63 64 |
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp); |
519855c50 TCPCT part 1c: sy... |
65 |
|
67edfef78 TCP: Add comments... |
66 |
/* Account for new data that has been sent to the network. */ |
cf533ea53 tcp: add const qu... |
67 |
static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) |
1da177e4c Linux-2.6.12-rc2 |
68 |
{ |
6ba8a3b19 tcp: Tail loss pr... |
69 |
struct inet_connection_sock *icsk = inet_csk(sk); |
9e412ba76 [TCP]: Sed magic ... |
70 |
struct tcp_sock *tp = tcp_sk(sk); |
66f5fe624 [TCP]: Rename upd... |
71 |
unsigned int prior_packets = tp->packets_out; |
9e412ba76 [TCP]: Sed magic ... |
72 |
|
fe067e8ab [TCP]: Abstract o... |
73 |
tcp_advance_send_head(sk, skb); |
1da177e4c Linux-2.6.12-rc2 |
74 |
tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; |
8512430e5 [TCP]: Move FRTO ... |
75 |
|
66f5fe624 [TCP]: Rename upd... |
76 |
tp->packets_out += tcp_skb_pcount(skb); |
bec41a11d tcp: remove early... |
77 |
if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) |
750ea2baf tcp: early retran... |
78 |
tcp_rearm_rto(sk); |
f19c29e3e tcp: snmp stats f... |
79 |
|
f7324acd9 tcp: Use NET_ADD_... |
80 81 |
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT, tcp_skb_pcount(skb)); |
1da177e4c Linux-2.6.12-rc2 |
82 |
} |
a4ecb15a2 tcp: accommodate ... |
83 84 |
/* SND.NXT, if window was not shrunk or the amount of shrunk was less than one * window scaling factor due to loss of precision. |
1da177e4c Linux-2.6.12-rc2 |
85 86 87 88 89 |
* If window has been shrunk, what should we make? It is not clear at all. * Using SND.UNA we will fail to open window, SND.NXT is out of window. :-( * Anything in between SND.UNA...SND.UNA+SND.WND also can be already * invalid. OK, let's make this for now: */ |
cf533ea53 tcp: add const qu... |
90 |
static inline __u32 tcp_acceptable_seq(const struct sock *sk) |
1da177e4c Linux-2.6.12-rc2 |
91 |
{ |
cf533ea53 tcp: add const qu... |
92 |
const struct tcp_sock *tp = tcp_sk(sk); |
9e412ba76 [TCP]: Sed magic ... |
93 |
|
a4ecb15a2 tcp: accommodate ... |
94 95 96 |
if (!before(tcp_wnd_end(tp), tp->snd_nxt) || (tp->rx_opt.wscale_ok && ((tp->snd_nxt - tcp_wnd_end(tp)) < (1 << tp->rx_opt.rcv_wscale)))) |
1da177e4c Linux-2.6.12-rc2 |
97 98 |
return tp->snd_nxt; else |
90840defa [TCP]: Introduce ... |
99 |
return tcp_wnd_end(tp); |
1da177e4c Linux-2.6.12-rc2 |
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
} /* Calculate mss to advertise in SYN segment. * RFC1122, RFC1063, draft-ietf-tcpimpl-pmtud-01 state that: * * 1. It is independent of path mtu. * 2. Ideally, it is maximal possible segment size i.e. 65535-40. * 3. For IPv4 it is reasonable to calculate it from maximal MTU of * attached devices, because some buggy hosts are confused by * large MSS. * 4. We do not make 3, we advertise MSS, calculated from first * hop device mtu, but allow to raise it to ip_rt_min_advmss. * This may be overridden via information stored in routing table. * 5. Value 65535 for MSS is valid in IPv6 and means "as large as possible, * probably even Jumbo". */ static __u16 tcp_advertise_mss(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); |
cf533ea53 tcp: add const qu... |
119 |
const struct dst_entry *dst = __sk_dst_get(sk); |
1da177e4c Linux-2.6.12-rc2 |
120 |
int mss = tp->advmss; |
0dbaee3b3 net: Abstract def... |
121 122 123 124 125 126 127 |
if (dst) { unsigned int metric = dst_metric_advmss(dst); if (metric < mss) { mss = metric; tp->advmss = mss; } |
1da177e4c Linux-2.6.12-rc2 |
128 129 130 131 132 133 |
} return (__u16)mss; } /* RFC2861. Reset CWND after idle period longer RTO to "restart window". |
6f021c62d tcp: fix slow sta... |
134 135 136 |
* This is the first part of cwnd validation mechanism. */ void tcp_cwnd_restart(struct sock *sk, s32 delta) |
1da177e4c Linux-2.6.12-rc2 |
137 |
{ |
463c84b97 [NET]: Introduce ... |
138 |
struct tcp_sock *tp = tcp_sk(sk); |
6f021c62d tcp: fix slow sta... |
139 |
u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk)); |
1da177e4c Linux-2.6.12-rc2 |
140 |
u32 cwnd = tp->snd_cwnd; |
6687e988d [ICSK]: Move TCP ... |
141 |
tcp_ca_event(sk, CA_EVENT_CWND_RESTART); |
1da177e4c Linux-2.6.12-rc2 |
142 |
|
6687e988d [ICSK]: Move TCP ... |
143 |
tp->snd_ssthresh = tcp_current_ssthresh(sk); |
1da177e4c Linux-2.6.12-rc2 |
144 |
restart_cwnd = min(restart_cwnd, cwnd); |
463c84b97 [NET]: Introduce ... |
145 |
while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd) |
1da177e4c Linux-2.6.12-rc2 |
146 147 |
cwnd >>= 1; tp->snd_cwnd = max(cwnd, restart_cwnd); |
c2203cf75 tcp: use tcp_jiff... |
148 |
tp->snd_cwnd_stamp = tcp_jiffies32; |
1da177e4c Linux-2.6.12-rc2 |
149 150 |
tp->snd_cwnd_used = 0; } |
67edfef78 TCP: Add comments... |
151 |
/* Congestion state accounting after a packet has been sent. */ |
40efc6fa1 [TCP]: less inline's |
152 |
static void tcp_event_data_sent(struct tcp_sock *tp, |
cf533ea53 tcp: add const qu... |
153 |
struct sock *sk) |
1da177e4c Linux-2.6.12-rc2 |
154 |
{ |
463c84b97 [NET]: Introduce ... |
155 |
struct inet_connection_sock *icsk = inet_csk(sk); |
d635fbe27 tcp: use tcp_jiff... |
156 |
const u32 now = tcp_jiffies32; |
1da177e4c Linux-2.6.12-rc2 |
157 |
|
05c5a46d7 tcp: generate CA_... |
158 159 |
if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); |
1da177e4c Linux-2.6.12-rc2 |
160 161 162 163 164 |
tp->lsndtime = now; /* If it is a reply for ato after last received * packet, enter pingpong mode. */ |
2251ae46a tcp: v1 always se... |
165 166 |
if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) icsk->icsk_ack.pingpong = 1; |
1da177e4c Linux-2.6.12-rc2 |
167 |
} |
67edfef78 TCP: Add comments... |
168 |
/* Account for an ACK we sent. */ |
78636179f tcp: do not cance... |
169 170 |
static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts, u32 rcv_nxt) |
1da177e4c Linux-2.6.12-rc2 |
171 |
{ |
78636179f tcp: do not cance... |
172 173 174 175 |
struct tcp_sock *tp = tcp_sk(sk); if (unlikely(rcv_nxt != tp->rcv_nxt)) return; /* Special ACK sent by DCTCP to reflect ECN */ |
463c84b97 [NET]: Introduce ... |
176 177 |
tcp_dec_quickack_mode(sk, pkts); inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); |
1da177e4c Linux-2.6.12-rc2 |
178 |
} |
85f16525a tcp: properly sen... |
179 180 181 182 |
u32 tcp_default_init_rwnd(u32 mss) { /* Initial receive window should be twice of TCP_INIT_CWND to |
9ef71e0c8 tcp:typo unset sh... |
183 |
* enable proper sending of new unsent data during fast recovery |
85f16525a tcp: properly sen... |
184 185 186 187 188 189 190 191 192 |
* (RFC 3517, Section 4, NextSeg() rule (2)). Further place a * limit when mss is larger than 1460. */ u32 init_rwnd = TCP_INIT_CWND * 2; if (mss > 1460) init_rwnd = max((1460 * init_rwnd) / mss, 2U); return init_rwnd; } |
1da177e4c Linux-2.6.12-rc2 |
193 194 195 196 197 198 199 200 201 |
/* Determine a window scaling and initial window to offer. * Based on the assumption that the given amount of space * will be offered. Store the results in the tp structure. * NOTE: for smooth operation initial space offering should * be a multiple of mss if possible. We assume here that mss >= 1. * This MUST be enforced by all callers. */ void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, |
31d12926e net: Add rtnetlin... |
202 203 |
int wscale_ok, __u8 *rcv_wscale, __u32 init_rcv_wnd) |
1da177e4c Linux-2.6.12-rc2 |
204 205 206 207 208 |
{ unsigned int space = (__space < 0 ? 0 : __space); /* If no clamp set the clamp to the max possible scaled window */ if (*window_clamp == 0) |
589c49cbf net: tcp: Define ... |
209 |
(*window_clamp) = (U16_MAX << TCP_MAX_WSCALE); |
1da177e4c Linux-2.6.12-rc2 |
210 211 212 213 |
space = min(*window_clamp, space); /* Quantize space offering to a multiple of mss if possible. */ if (space > mss) |
589c49cbf net: tcp: Define ... |
214 |
space = rounddown(space, mss); |
1da177e4c Linux-2.6.12-rc2 |
215 216 |
/* NOTE: offering an initial window larger than 32767 |
15d99e02b [TCP]: sysctl to ... |
217 218 219 220 221 222 |
* will break some buggy TCP stacks. If the admin tells us * it is likely we could be speaking with such a buggy stack * we will truncate our initial window offering to 32K-1 * unless the remote has sent us a window scaling option, * which we interpret as a sign the remote TCP is not * misinterpreting the window field as a signed quantity. |
1da177e4c Linux-2.6.12-rc2 |
223 |
*/ |
15d99e02b [TCP]: sysctl to ... |
224 225 226 227 |
if (sysctl_tcp_workaround_signed_windows) (*rcv_wnd) = min(space, MAX_TCP_WINDOW); else (*rcv_wnd) = space; |
1da177e4c Linux-2.6.12-rc2 |
228 229 |
(*rcv_wscale) = 0; if (wscale_ok) { |
589c49cbf net: tcp: Define ... |
230 |
/* Set window scaling on max possible window */ |
f626300a3 tcp: consider rec... |
231 232 |
space = max_t(u32, space, sysctl_tcp_rmem[2]); space = max_t(u32, space, sysctl_rmem_max); |
316c1592b [TCP]: Limit wind... |
233 |
space = min_t(u32, space, *window_clamp); |
589c49cbf net: tcp: Define ... |
234 |
while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) { |
1da177e4c Linux-2.6.12-rc2 |
235 236 237 238 |
space >>= 1; (*rcv_wscale)++; } } |
056834d9f [TCP]: cleanup tc... |
239 |
if (mss > (1 << *rcv_wscale)) { |
85f16525a tcp: properly sen... |
240 241 242 |
if (!init_rcv_wnd) /* Use default unless specified otherwise */ init_rcv_wnd = tcp_default_init_rwnd(mss); *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss); |
1da177e4c Linux-2.6.12-rc2 |
243 244 245 |
} /* Set the clamp no higher than max representable value */ |
589c49cbf net: tcp: Define ... |
246 |
(*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp); |
1da177e4c Linux-2.6.12-rc2 |
247 |
} |
4bc2f18ba net/ipv4: EXPORT_... |
248 |
EXPORT_SYMBOL(tcp_select_initial_window); |
1da177e4c Linux-2.6.12-rc2 |
249 250 251 252 253 254 |
/* Chose a new window to advertise, update state in tcp_sock for the * socket, and return result with RFC1323 scaling applied. The return * value can be stuffed directly into th->window for an outgoing * frame. */ |
40efc6fa1 [TCP]: less inline's |
255 |
static u16 tcp_select_window(struct sock *sk) |
1da177e4c Linux-2.6.12-rc2 |
256 257 |
{ struct tcp_sock *tp = tcp_sk(sk); |
8e165e203 net: tcp: add mib... |
258 |
u32 old_win = tp->rcv_wnd; |
1da177e4c Linux-2.6.12-rc2 |
259 260 261 262 |
u32 cur_win = tcp_receive_window(tp); u32 new_win = __tcp_select_window(sk); /* Never shrink the offered window */ |
2de979bd7 [TCP]: whitespace... |
263 |
if (new_win < cur_win) { |
1da177e4c Linux-2.6.12-rc2 |
264 265 266 267 268 269 270 |
/* Danger Will Robinson! * Don't update rcv_wup/rcv_wnd here or else * we will not be able to advertise a zero * window in time. --DaveM * * Relax Will Robinson. */ |
8e165e203 net: tcp: add mib... |
271 272 273 |
if (new_win == 0) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWANTZEROWINDOWADV); |
607bfbf2d [TCP]: Fix shrink... |
274 |
new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale); |
1da177e4c Linux-2.6.12-rc2 |
275 276 277 278 279 280 281 |
} tp->rcv_wnd = new_win; tp->rcv_wup = tp->rcv_nxt; /* Make sure we do not exceed the maximum possible * scaled window. */ |
15d99e02b [TCP]: sysctl to ... |
282 |
if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows) |
1da177e4c Linux-2.6.12-rc2 |
283 284 285 286 287 288 |
new_win = min(new_win, MAX_TCP_WINDOW); else new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); /* RFC1323 scaling applied */ new_win >>= tp->rx_opt.rcv_wscale; |
31770e34e tcp: Revert "tcp:... |
289 |
/* If we advertise zero window, disable fast path. */ |
8e165e203 net: tcp: add mib... |
290 |
if (new_win == 0) { |
31770e34e tcp: Revert "tcp:... |
291 |
tp->pred_flags = 0; |
8e165e203 net: tcp: add mib... |
292 293 294 295 296 297 |
if (old_win) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTOZEROWINDOWADV); } else if (old_win == 0) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV); } |
1da177e4c Linux-2.6.12-rc2 |
298 299 300 |
return new_win; } |
67edfef78 TCP: Add comments... |
301 |
/* Packet ECN state for a SYN-ACK */ |
735d38311 tcp: change TCP_E... |
302 |
static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) |
bdf1ee5d3 [TCP]: Move code ... |
303 |
{ |
30e502a34 net: tcp: add fla... |
304 |
const struct tcp_sock *tp = tcp_sk(sk); |
4de075e04 tcp: rename tcp_s... |
305 |
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; |
056834d9f [TCP]: cleanup tc... |
306 |
if (!(tp->ecn_flags & TCP_ECN_OK)) |
4de075e04 tcp: rename tcp_s... |
307 |
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; |
91b5b21c7 bpf: Add support ... |
308 309 |
else if (tcp_ca_needs_ecn(sk) || tcp_bpf_ca_needs_ecn(sk)) |
30e502a34 net: tcp: add fla... |
310 |
INET_ECN_xmit(sk); |
bdf1ee5d3 [TCP]: Move code ... |
311 |
} |
67edfef78 TCP: Add comments... |
312 |
/* Packet ECN state for a SYN. */ |
735d38311 tcp: change TCP_E... |
313 |
static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) |
bdf1ee5d3 [TCP]: Move code ... |
314 315 |
{ struct tcp_sock *tp = tcp_sk(sk); |
91b5b21c7 bpf: Add support ... |
316 |
bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); |
f7b3bec6f net: allow settin... |
317 |
bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 || |
91b5b21c7 bpf: Add support ... |
318 |
tcp_ca_needs_ecn(sk) || bpf_needs_ecn; |
f7b3bec6f net: allow settin... |
319 320 321 322 323 324 325 |
if (!use_ecn) { const struct dst_entry *dst = __sk_dst_get(sk); if (dst && dst_feature(dst, RTAX_FEATURE_ECN)) use_ecn = true; } |
bdf1ee5d3 [TCP]: Move code ... |
326 327 |
tp->ecn_flags = 0; |
f7b3bec6f net: allow settin... |
328 329 |
if (use_ecn) { |
4de075e04 tcp: rename tcp_s... |
330 |
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; |
bdf1ee5d3 [TCP]: Move code ... |
331 |
tp->ecn_flags = TCP_ECN_OK; |
91b5b21c7 bpf: Add support ... |
332 |
if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) |
30e502a34 net: tcp: add fla... |
333 |
INET_ECN_xmit(sk); |
bdf1ee5d3 [TCP]: Move code ... |
334 335 |
} } |
492135557 tcp: add rfc3168,... |
336 337 338 339 340 341 342 343 |
static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) { if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback) /* tp->ecn_flags are cleared at a later point in time when * SYN ACK is ultimatively being received. */ TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR); } |
735d38311 tcp: change TCP_E... |
344 |
static void |
6ac705b18 tcp: remove tcp_e... |
345 |
tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th) |
bdf1ee5d3 [TCP]: Move code ... |
346 |
{ |
6ac705b18 tcp: remove tcp_e... |
347 |
if (inet_rsk(req)->ecn_ok) |
bdf1ee5d3 [TCP]: Move code ... |
348 349 |
th->ece = 1; } |
67edfef78 TCP: Add comments... |
350 351 352 |
/* Set up ECN state for a packet on a ESTABLISHED socket that is about to * be sent. */ |
735d38311 tcp: change TCP_E... |
353 |
static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, |
ea1627c20 tcp: minor optimi... |
354 |
struct tcphdr *th, int tcp_header_len) |
bdf1ee5d3 [TCP]: Move code ... |
355 356 357 358 359 360 361 362 |
{ struct tcp_sock *tp = tcp_sk(sk); if (tp->ecn_flags & TCP_ECN_OK) { /* Not-retransmitted data segment: set ECT and inject CWR. */ if (skb->len != tcp_header_len && !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) { INET_ECN_xmit(sk); |
056834d9f [TCP]: cleanup tc... |
363 |
if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) { |
bdf1ee5d3 [TCP]: Move code ... |
364 |
tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; |
ea1627c20 tcp: minor optimi... |
365 |
th->cwr = 1; |
bdf1ee5d3 [TCP]: Move code ... |
366 367 |
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; } |
30e502a34 net: tcp: add fla... |
368 |
} else if (!tcp_ca_needs_ecn(sk)) { |
bdf1ee5d3 [TCP]: Move code ... |
369 370 371 372 |
/* ACK or retransmitted segment: clear ECT|CE */ INET_ECN_dontxmit(sk); } if (tp->ecn_flags & TCP_ECN_DEMAND_CWR) |
ea1627c20 tcp: minor optimi... |
373 |
th->ece = 1; |
bdf1ee5d3 [TCP]: Move code ... |
374 375 |
} } |
e870a8efc [TCP]: Perform se... |
376 377 378 379 380 |
/* Constructs common control bits of non-data skb. If SYN/FIN is present, * auto increment end seqno. */ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) { |
2e8e18ef5 tcp: Set CHECKSUM... |
381 |
skb->ip_summed = CHECKSUM_PARTIAL; |
e870a8efc [TCP]: Perform se... |
382 |
skb->csum = 0; |
4de075e04 tcp: rename tcp_s... |
383 |
TCP_SKB_CB(skb)->tcp_flags = flags; |
e870a8efc [TCP]: Perform se... |
384 |
TCP_SKB_CB(skb)->sacked = 0; |
cd7d8498c tcp: change tcp_s... |
385 |
tcp_skb_pcount_set(skb, 1); |
e870a8efc [TCP]: Perform se... |
386 387 |
TCP_SKB_CB(skb)->seq = seq; |
a3433f35a tcp: unify tcp fl... |
388 |
if (flags & (TCPHDR_SYN | TCPHDR_FIN)) |
e870a8efc [TCP]: Perform se... |
389 390 391 |
seq++; TCP_SKB_CB(skb)->end_seq = seq; } |
a2a385d62 tcp: bool convers... |
392 |
static inline bool tcp_urg_mode(const struct tcp_sock *tp) |
33f5f57ee tcp: kill pointle... |
393 394 395 |
{ return tp->snd_una != tp->snd_up; } |
33ad798c9 tcp: options clea... |
396 397 398 |
#define OPTION_SACK_ADVERTISE (1 << 0) #define OPTION_TS (1 << 1) #define OPTION_MD5 (1 << 2) |
89e95a613 IPv4 TCP fails to... |
399 |
#define OPTION_WSCALE (1 << 3) |
2100c8d2d net-tcp: Fast Ope... |
400 |
#define OPTION_FAST_OPEN_COOKIE (1 << 8) |
33ad798c9 tcp: options clea... |
401 402 |
struct tcp_out_options { |
2100c8d2d net-tcp: Fast Ope... |
403 404 |
u16 options; /* bit field of OPTION_* */ u16 mss; /* 0 to disable */ |
33ad798c9 tcp: options clea... |
405 406 |
u8 ws; /* window scale, 0 to disable */ u8 num_sack_blocks; /* number of SACK blocks to include */ |
bd0388ae7 TCPCT part 1f: In... |
407 |
u8 hash_size; /* bytes in hash_location */ |
bd0388ae7 TCPCT part 1f: In... |
408 |
__u8 *hash_location; /* temporary pointer, overloaded */ |
2100c8d2d net-tcp: Fast Ope... |
409 410 |
__u32 tsval, tsecr; /* need to include OPTION_TS */ struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ |
33ad798c9 tcp: options clea... |
411 |
}; |
67edfef78 TCP: Add comments... |
412 413 414 |
/* Write previously computed TCP options to the packet. * * Beware: Something in the Internet is very sensitive to the ordering of |
fd6149d33 tcp: Restore orde... |
415 416 |
* TCP options, we learned this through the hard way, so be careful here. * Luckily we can at least blame others for their non-compliance but from |
8e3bff96a net: more spellin... |
417 |
* inter-operability perspective it seems that we're somewhat stuck with |
fd6149d33 tcp: Restore orde... |
418 419 420 421 422 423 424 |
* the ordering which we have been using if we want to keep working with * those broken things (not that it currently hurts anybody as there isn't * particular reason why the ordering would need to be changed). * * At least SACK_PERM as the first option is known to lead to a disaster * (but it may well be that other scenarios fail similarly). */ |
33ad798c9 tcp: options clea... |
425 |
static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, |
bd0388ae7 TCPCT part 1f: In... |
426 427 |
struct tcp_out_options *opts) { |
2100c8d2d net-tcp: Fast Ope... |
428 |
u16 options = opts->options; /* mungable copy */ |
bd0388ae7 TCPCT part 1f: In... |
429 |
|
bd0388ae7 TCPCT part 1f: In... |
430 |
if (unlikely(OPTION_MD5 & options)) { |
1a2c6181c tcp: Remove TCPCT |
431 432 |
*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); |
bd0388ae7 TCPCT part 1f: In... |
433 434 |
/* overload cookie hash location */ opts->hash_location = (__u8 *)ptr; |
33ad798c9 tcp: options clea... |
435 |
ptr += 4; |
40efc6fa1 [TCP]: less inline's |
436 |
} |
33ad798c9 tcp: options clea... |
437 |
|
fd6149d33 tcp: Restore orde... |
438 439 440 441 442 |
if (unlikely(opts->mss)) { *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | opts->mss); } |
bd0388ae7 TCPCT part 1f: In... |
443 444 |
if (likely(OPTION_TS & options)) { if (unlikely(OPTION_SACK_ADVERTISE & options)) { |
33ad798c9 tcp: options clea... |
445 446 447 448 |
*ptr++ = htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); |
bd0388ae7 TCPCT part 1f: In... |
449 |
options &= ~OPTION_SACK_ADVERTISE; |
33ad798c9 tcp: options clea... |
450 451 452 453 454 455 456 457 458 |
} else { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); } *ptr++ = htonl(opts->tsval); *ptr++ = htonl(opts->tsecr); } |
bd0388ae7 TCPCT part 1f: In... |
459 |
if (unlikely(OPTION_SACK_ADVERTISE & options)) { |
33ad798c9 tcp: options clea... |
460 461 462 463 464 |
*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM); } |
bd0388ae7 TCPCT part 1f: In... |
465 |
if (unlikely(OPTION_WSCALE & options)) { |
33ad798c9 tcp: options clea... |
466 467 468 469 470 471 472 473 474 |
*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | opts->ws); } if (unlikely(opts->num_sack_blocks)) { struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks; |
40efc6fa1 [TCP]: less inline's |
475 476 477 478 479 |
int this_sack; *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_SACK << 8) | |
33ad798c9 tcp: options clea... |
480 |
(TCPOLEN_SACK_BASE + (opts->num_sack_blocks * |
40efc6fa1 [TCP]: less inline's |
481 |
TCPOLEN_SACK_PERBLOCK))); |
2de979bd7 [TCP]: whitespace... |
482 |
|
33ad798c9 tcp: options clea... |
483 484 |
for (this_sack = 0; this_sack < opts->num_sack_blocks; ++this_sack) { |
40efc6fa1 [TCP]: less inline's |
485 486 487 |
*ptr++ = htonl(sp[this_sack].start_seq); *ptr++ = htonl(sp[this_sack].end_seq); } |
2de979bd7 [TCP]: whitespace... |
488 |
|
5861f8e58 tcp: remove point... |
489 |
tp->rx_opt.dsack = 0; |
40efc6fa1 [TCP]: less inline's |
490 |
} |
2100c8d2d net-tcp: Fast Ope... |
491 492 493 |
if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) { struct tcp_fastopen_cookie *foc = opts->fastopen_cookie; |
7f9b838b7 tcp: RFC7413 opti... |
494 495 496 497 498 499 500 501 502 503 504 505 506 |
u8 *p = (u8 *)ptr; u32 len; /* Fast Open option length */ if (foc->exp) { len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; *ptr = htonl((TCPOPT_EXP << 24) | (len << 16) | TCPOPT_FASTOPEN_MAGIC); p += TCPOLEN_EXP_FASTOPEN_BASE; } else { len = TCPOLEN_FASTOPEN_BASE + foc->len; *p++ = TCPOPT_FASTOPEN; *p++ = len; } |
2100c8d2d net-tcp: Fast Ope... |
507 |
|
7f9b838b7 tcp: RFC7413 opti... |
508 509 510 511 |
memcpy(p, foc->val, foc->len); if ((len & 3) == 2) { p[foc->len] = TCPOPT_NOP; p[foc->len + 1] = TCPOPT_NOP; |
2100c8d2d net-tcp: Fast Ope... |
512 |
} |
7f9b838b7 tcp: RFC7413 opti... |
513 |
ptr += (len + 3) >> 2; |
2100c8d2d net-tcp: Fast Ope... |
514 |
} |
33ad798c9 tcp: options clea... |
515 |
} |
67edfef78 TCP: Add comments... |
516 517 518 |
/* Compute TCP options for SYN packets. This is not the final * network wire format yet. */ |
95c961747 net: cleanup unsi... |
519 |
static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, |
33ad798c9 tcp: options clea... |
520 |
struct tcp_out_options *opts, |
cf533ea53 tcp: add const qu... |
521 522 |
struct tcp_md5sig_key **md5) { |
33ad798c9 tcp: options clea... |
523 |
struct tcp_sock *tp = tcp_sk(sk); |
95c961747 net: cleanup unsi... |
524 |
unsigned int remaining = MAX_TCP_OPTION_SPACE; |
783237e8d net-tcp: Fast Ope... |
525 |
struct tcp_fastopen_request *fastopen = tp->fastopen_req; |
33ad798c9 tcp: options clea... |
526 |
|
cfb6eeb4c [TCP]: MD5 Signat... |
527 |
#ifdef CONFIG_TCP_MD5SIG |
33ad798c9 tcp: options clea... |
528 529 530 |
*md5 = tp->af_specific->md5_lookup(sk, sk); if (*md5) { opts->options |= OPTION_MD5; |
bd0388ae7 TCPCT part 1f: In... |
531 |
remaining -= TCPOLEN_MD5SIG_ALIGNED; |
cfb6eeb4c [TCP]: MD5 Signat... |
532 |
} |
33ad798c9 tcp: options clea... |
533 534 |
#else *md5 = NULL; |
cfb6eeb4c [TCP]: MD5 Signat... |
535 |
#endif |
33ad798c9 tcp: options clea... |
536 537 538 539 540 541 542 543 544 545 546 |
/* We always get an MSS option. The option bytes which will be seen in * normal data packets should timestamps be used, must be in the MSS * advertised. But we subtract them from tp->mss_cache so that * calculations in tcp_sendmsg are simpler etc. So account for this * fact here if necessary. If we don't do this correctly, as a * receiver we won't recognize data packets as being full sized when we * should, and thus we won't abide by the delayed ACK rules correctly. * SACKs don't matter, we never delay an ACK when we have any of those * going out. */ opts->mss = tcp_advertise_mss(sk); |
bd0388ae7 TCPCT part 1f: In... |
547 |
remaining -= TCPOLEN_MSS_ALIGNED; |
33ad798c9 tcp: options clea... |
548 |
|
5d2ed0521 tcp: Namespaceify... |
549 |
if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) { |
33ad798c9 tcp: options clea... |
550 |
opts->options |= OPTION_TS; |
7faee5c0d tcp: remove TCP_S... |
551 |
opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; |
33ad798c9 tcp: options clea... |
552 |
opts->tsecr = tp->rx_opt.ts_recent; |
bd0388ae7 TCPCT part 1f: In... |
553 |
remaining -= TCPOLEN_TSTAMP_ALIGNED; |
33ad798c9 tcp: options clea... |
554 |
} |
9bb37ef00 tcp: Namespaceify... |
555 |
if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) { |
33ad798c9 tcp: options clea... |
556 |
opts->ws = tp->rx_opt.rcv_wscale; |
89e95a613 IPv4 TCP fails to... |
557 |
opts->options |= OPTION_WSCALE; |
bd0388ae7 TCPCT part 1f: In... |
558 |
remaining -= TCPOLEN_WSCALE_ALIGNED; |
33ad798c9 tcp: options clea... |
559 |
} |
f93010342 tcp: Namespaceify... |
560 |
if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) { |
33ad798c9 tcp: options clea... |
561 |
opts->options |= OPTION_SACK_ADVERTISE; |
b32d13102 tcp: Fix bitmask ... |
562 |
if (unlikely(!(OPTION_TS & opts->options))) |
bd0388ae7 TCPCT part 1f: In... |
563 |
remaining -= TCPOLEN_SACKPERM_ALIGNED; |
33ad798c9 tcp: options clea... |
564 |
} |
783237e8d net-tcp: Fast Ope... |
565 |
if (fastopen && fastopen->cookie.len >= 0) { |
2646c831c tcp: RFC7413 opti... |
566 567 568 569 |
u32 need = fastopen->cookie.len; need += fastopen->cookie.exp ? TCPOLEN_EXP_FASTOPEN_BASE : TCPOLEN_FASTOPEN_BASE; |
783237e8d net-tcp: Fast Ope... |
570 571 572 573 574 575 |
need = (need + 3) & ~3U; /* Align to 32 bits */ if (remaining >= need) { opts->options |= OPTION_FAST_OPEN_COOKIE; opts->fastopen_cookie = &fastopen->cookie; remaining -= need; tp->syn_fastopen = 1; |
2646c831c tcp: RFC7413 opti... |
576 |
tp->syn_fastopen_exp = fastopen->cookie.exp ? 1 : 0; |
783237e8d net-tcp: Fast Ope... |
577 578 |
} } |
bd0388ae7 TCPCT part 1f: In... |
579 |
|
bd0388ae7 TCPCT part 1f: In... |
580 |
return MAX_TCP_OPTION_SPACE - remaining; |
40efc6fa1 [TCP]: less inline's |
581 |
} |
67edfef78 TCP: Add comments... |
582 |
/* Set up TCP options for SYN-ACKs. */ |
37bfbdda0 tcp: remove tcp_s... |
583 584 585 586 587 |
static unsigned int tcp_synack_options(struct request_sock *req, unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, const struct tcp_md5sig_key *md5, struct tcp_fastopen_cookie *foc) |
4957faade TCPCT part 1g: Re... |
588 |
{ |
33ad798c9 tcp: options clea... |
589 |
struct inet_request_sock *ireq = inet_rsk(req); |
95c961747 net: cleanup unsi... |
590 |
unsigned int remaining = MAX_TCP_OPTION_SPACE; |
33ad798c9 tcp: options clea... |
591 |
|
cfb6eeb4c [TCP]: MD5 Signat... |
592 |
#ifdef CONFIG_TCP_MD5SIG |
80f03e27a tcp: md5: fix rcu... |
593 |
if (md5) { |
33ad798c9 tcp: options clea... |
594 |
opts->options |= OPTION_MD5; |
4957faade TCPCT part 1g: Re... |
595 596 597 598 599 600 601 |
remaining -= TCPOLEN_MD5SIG_ALIGNED; /* We can't fit any SACK blocks in a packet with MD5 + TS * options. There was discussion about disabling SACK * rather than TS in order to fit in better with old, * buggy kernels, but that was deemed to be unnecessary. */ |
de213e5ee tcp: tcp_synack_o... |
602 |
ireq->tstamp_ok &= !ireq->sack_ok; |
cfb6eeb4c [TCP]: MD5 Signat... |
603 604 |
} #endif |
33ad798c9 tcp: options clea... |
605 |
|
4957faade TCPCT part 1g: Re... |
606 |
/* We always send an MSS option. */ |
33ad798c9 tcp: options clea... |
607 |
opts->mss = mss; |
4957faade TCPCT part 1g: Re... |
608 |
remaining -= TCPOLEN_MSS_ALIGNED; |
33ad798c9 tcp: options clea... |
609 610 611 |
if (likely(ireq->wscale_ok)) { opts->ws = ireq->rcv_wscale; |
89e95a613 IPv4 TCP fails to... |
612 |
opts->options |= OPTION_WSCALE; |
4957faade TCPCT part 1g: Re... |
613 |
remaining -= TCPOLEN_WSCALE_ALIGNED; |
33ad798c9 tcp: options clea... |
614 |
} |
de213e5ee tcp: tcp_synack_o... |
615 |
if (likely(ireq->tstamp_ok)) { |
33ad798c9 tcp: options clea... |
616 |
opts->options |= OPTION_TS; |
95a22caee tcp: randomize tc... |
617 |
opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off; |
33ad798c9 tcp: options clea... |
618 |
opts->tsecr = req->ts_recent; |
4957faade TCPCT part 1g: Re... |
619 |
remaining -= TCPOLEN_TSTAMP_ALIGNED; |
33ad798c9 tcp: options clea... |
620 621 622 |
} if (likely(ireq->sack_ok)) { opts->options |= OPTION_SACK_ADVERTISE; |
de213e5ee tcp: tcp_synack_o... |
623 |
if (unlikely(!ireq->tstamp_ok)) |
4957faade TCPCT part 1g: Re... |
624 |
remaining -= TCPOLEN_SACKPERM_ALIGNED; |
33ad798c9 tcp: options clea... |
625 |
} |
7f9b838b7 tcp: RFC7413 opti... |
626 627 628 629 630 |
if (foc != NULL && foc->len >= 0) { u32 need = foc->len; need += foc->exp ? TCPOLEN_EXP_FASTOPEN_BASE : TCPOLEN_FASTOPEN_BASE; |
8336886f7 tcp: TCP Fast Ope... |
631 632 633 634 635 636 637 |
need = (need + 3) & ~3U; /* Align to 32 bits */ if (remaining >= need) { opts->options |= OPTION_FAST_OPEN_COOKIE; opts->fastopen_cookie = foc; remaining -= need; } } |
1a2c6181c tcp: Remove TCPCT |
638 |
|
4957faade TCPCT part 1g: Re... |
639 |
return MAX_TCP_OPTION_SPACE - remaining; |
33ad798c9 tcp: options clea... |
640 |
} |
67edfef78 TCP: Add comments... |
641 642 643 |
/* Compute TCP options for ESTABLISHED sockets. This is not the * final wire format yet. */ |
95c961747 net: cleanup unsi... |
644 |
static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb, |
33ad798c9 tcp: options clea... |
645 |
struct tcp_out_options *opts, |
cf533ea53 tcp: add const qu... |
646 647 |
struct tcp_md5sig_key **md5) { |
33ad798c9 tcp: options clea... |
648 |
struct tcp_sock *tp = tcp_sk(sk); |
95c961747 net: cleanup unsi... |
649 |
unsigned int size = 0; |
cabeccbd1 tcp: kill eff_sac... |
650 |
unsigned int eff_sacks; |
33ad798c9 tcp: options clea... |
651 |
|
5843ef421 tcp: Always set o... |
652 |
opts->options = 0; |
33ad798c9 tcp: options clea... |
653 654 655 656 657 658 659 660 661 662 663 664 |
#ifdef CONFIG_TCP_MD5SIG *md5 = tp->af_specific->md5_lookup(sk, sk); if (unlikely(*md5)) { opts->options |= OPTION_MD5; size += TCPOLEN_MD5SIG_ALIGNED; } #else *md5 = NULL; #endif if (likely(tp->rx_opt.tstamp_ok)) { opts->options |= OPTION_TS; |
7faee5c0d tcp: remove TCP_S... |
665 |
opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0; |
33ad798c9 tcp: options clea... |
666 667 668 |
opts->tsecr = tp->rx_opt.ts_recent; size += TCPOLEN_TSTAMP_ALIGNED; } |
cabeccbd1 tcp: kill eff_sac... |
669 670 |
eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; if (unlikely(eff_sacks)) { |
95c961747 net: cleanup unsi... |
671 |
const unsigned int remaining = MAX_TCP_OPTION_SPACE - size; |
33ad798c9 tcp: options clea... |
672 |
opts->num_sack_blocks = |
95c961747 net: cleanup unsi... |
673 |
min_t(unsigned int, eff_sacks, |
33ad798c9 tcp: options clea... |
674 675 676 677 678 679 680 |
(remaining - TCPOLEN_SACK_BASE_ALIGNED) / TCPOLEN_SACK_PERBLOCK); size += TCPOLEN_SACK_BASE_ALIGNED + opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; } return size; |
40efc6fa1 [TCP]: less inline's |
681 |
} |
1da177e4c Linux-2.6.12-rc2 |
682 |
|
46d3ceabd tcp: TCP Small Qu... |
683 684 685 686 687 688 689 690 691 |
/* TCP SMALL QUEUES (TSQ) * * TSQ goal is to keep small amount of skbs per tcp flow in tx queues (qdisc+dev) * to reduce RTT and bufferbloat. * We do this using a special skb destructor (tcp_wfree). * * Its important tcp_wfree() can be replaced by sock_wfree() in the event skb * needs to be reallocated in a driver. |
8e3bff96a net: more spellin... |
692 |
* The invariant being skb->truesize subtracted from sk->sk_wmem_alloc |
46d3ceabd tcp: TCP Small Qu... |
693 694 695 696 697 698 699 700 701 702 |
* * Since transmit from skb destructor is forbidden, we use a tasklet * to process all sockets that eventually need to send more skbs. * We use one tasklet per cpu, with its own queue of sockets. */ struct tsq_tasklet { struct tasklet_struct tasklet; struct list_head head; /* queue of tcp sockets */ }; static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet); |
6f458dfb4 tcp: improve late... |
703 704 705 706 |
static void tcp_tsq_handler(struct sock *sk) { if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | |
f9616c35a tcp: implement TS... |
707 708 709 710 |
TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) { struct tcp_sock *tp = tcp_sk(sk); if (tp->lost_out > tp->retrans_out && |
3a91d29f2 tcp: do tcp_mstam... |
711 712 |
tp->snd_cwnd > tcp_packets_in_flight(tp)) { tcp_mstamp_refresh(tp); |
f9616c35a tcp: implement TS... |
713 |
tcp_xmit_retransmit_queue(sk); |
3a91d29f2 tcp: do tcp_mstam... |
714 |
} |
f9616c35a tcp: implement TS... |
715 716 |
tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle, |
bf06200e7 tcp: tsq: fix non... |
717 |
0, GFP_ATOMIC); |
f9616c35a tcp: implement TS... |
718 |
} |
6f458dfb4 tcp: improve late... |
719 |
} |
46d3ceabd tcp: TCP Small Qu... |
720 |
/* |
8e3bff96a net: more spellin... |
721 |
* One tasklet per cpu tries to send more skbs. |
46d3ceabd tcp: TCP Small Qu... |
722 |
* We run in tasklet context but need to disable irqs when |
8e3bff96a net: more spellin... |
723 |
* transferring tsq->head because tcp_wfree() might |
46d3ceabd tcp: TCP Small Qu... |
724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 |
* interrupt us (non NAPI drivers) */ static void tcp_tasklet_func(unsigned long data) { struct tsq_tasklet *tsq = (struct tsq_tasklet *)data; LIST_HEAD(list); unsigned long flags; struct list_head *q, *n; struct tcp_sock *tp; struct sock *sk; local_irq_save(flags); list_splice_init(&tsq->head, &list); local_irq_restore(flags); list_for_each_safe(q, n, &list) { tp = list_entry(q, struct tcp_sock, tsq_node); list_del(&tp->tsq_node); sk = (struct sock *)tp; |
0a9648f12 tcp: add a missin... |
744 |
smp_mb__before_atomic(); |
7aa5470c2 tcp: tsq: move ts... |
745 |
clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags); |
b223feb9d tcp: tsq: add sho... |
746 |
if (!sk->sk_lock.owned && |
7aa5470c2 tcp: tsq: move ts... |
747 |
test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) { |
b223feb9d tcp: tsq: add sho... |
748 749 |
bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { |
7aa5470c2 tcp: tsq: move ts... |
750 |
clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags); |
b223feb9d tcp: tsq: add sho... |
751 752 753 |
tcp_tsq_handler(sk); } bh_unlock_sock(sk); |
46d3ceabd tcp: TCP Small Qu... |
754 |
} |
46d3ceabd tcp: TCP Small Qu... |
755 |
|
46d3ceabd tcp: TCP Small Qu... |
756 757 758 |
sk_free(sk); } } |
40fc3423b tcp: tsq: add tsq... |
759 760 761 762 |
#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \ TCPF_WRITE_TIMER_DEFERRED | \ TCPF_DELACK_TIMER_DEFERRED | \ TCPF_MTU_REDUCED_DEFERRED) |
46d3ceabd tcp: TCP Small Qu... |
763 764 765 766 767 768 769 770 771 |
/** * tcp_release_cb - tcp release_sock() callback * @sk: socket * * called from release_sock() to perform protocol dependent * actions before socket release. */ void tcp_release_cb(struct sock *sk) { |
6f458dfb4 tcp: improve late... |
772 |
unsigned long flags, nflags; |
46d3ceabd tcp: TCP Small Qu... |
773 |
|
6f458dfb4 tcp: improve late... |
774 775 |
/* perform an atomic operation only if at least one flag is set */ do { |
7aa5470c2 tcp: tsq: move ts... |
776 |
flags = sk->sk_tsq_flags; |
6f458dfb4 tcp: improve late... |
777 778 779 |
if (!(flags & TCP_DEFERRED_ALL)) return; nflags = flags & ~TCP_DEFERRED_ALL; |
7aa5470c2 tcp: tsq: move ts... |
780 |
} while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags); |
6f458dfb4 tcp: improve late... |
781 |
|
40fc3423b tcp: tsq: add tsq... |
782 |
if (flags & TCPF_TSQ_DEFERRED) |
6f458dfb4 tcp: improve late... |
783 |
tcp_tsq_handler(sk); |
c3f9b0184 tcp: tcp_release_... |
784 785 786 787 788 789 790 791 792 793 |
/* Here begins the tricky part : * We are called from release_sock() with : * 1) BH disabled * 2) sk_lock.slock spinlock held * 3) socket owned by us (sk->sk_lock.owned == 1) * * But following code is meant to be called from BH handlers, * so we should keep BH disabled, but early release socket ownership */ sock_release_ownership(sk); |
40fc3423b tcp: tsq: add tsq... |
794 |
if (flags & TCPF_WRITE_TIMER_DEFERRED) { |
6f458dfb4 tcp: improve late... |
795 |
tcp_write_timer_handler(sk); |
144d56e91 tcp: fix possible... |
796 797 |
__sock_put(sk); } |
40fc3423b tcp: tsq: add tsq... |
798 |
if (flags & TCPF_DELACK_TIMER_DEFERRED) { |
6f458dfb4 tcp: improve late... |
799 |
tcp_delack_timer_handler(sk); |
144d56e91 tcp: fix possible... |
800 801 |
__sock_put(sk); } |
40fc3423b tcp: tsq: add tsq... |
802 |
if (flags & TCPF_MTU_REDUCED_DEFERRED) { |
4fab90719 tcp: fix tcp_rele... |
803 |
inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); |
144d56e91 tcp: fix possible... |
804 805 |
__sock_put(sk); } |
46d3ceabd tcp: TCP Small Qu... |
806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 |
} EXPORT_SYMBOL(tcp_release_cb); void __init tcp_tasklet_init(void) { int i; for_each_possible_cpu(i) { struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i); INIT_LIST_HEAD(&tsq->head); tasklet_init(&tsq->tasklet, tcp_tasklet_func, (unsigned long)tsq); } } /* * Write buffer destructor automatically called from kfree_skb. |
8e3bff96a net: more spellin... |
825 |
* We can't xmit new skbs from this context, as we might already |
46d3ceabd tcp: TCP Small Qu... |
826 827 |
* hold qdisc lock. */ |
d6a4a1041 tcp: GSO should b... |
828 |
void tcp_wfree(struct sk_buff *skb) |
46d3ceabd tcp: TCP Small Qu... |
829 830 831 |
{ struct sock *sk = skb->sk; struct tcp_sock *tp = tcp_sk(sk); |
408f0a6c2 tcp: tsq: remove ... |
832 |
unsigned long flags, nval, oval; |
9b462d02d tcp: TCP Small Qu... |
833 834 835 836 |
/* Keep one reference on sk_wmem_alloc. * Will be released by sk_free() from here or tcp_tasklet_func() */ |
14afee4b6 net: convert sock... |
837 |
WARN_ON(refcount_sub_and_test(skb->truesize - 1, &sk->sk_wmem_alloc)); |
9b462d02d tcp: TCP Small Qu... |
838 839 840 841 842 843 844 845 |
/* If this softirq is serviced by ksoftirqd, we are likely under stress. * Wait until our queues (qdisc + devices) are drained. * This gives : * - less callbacks to tcp_write_xmit(), reducing stress (batches) * - chance for incoming ACK (processed by another cpu maybe) * to migrate this flow (skb->ooo_okay will be eventually set) */ |
14afee4b6 net: convert sock... |
846 |
if (refcount_read(&sk->sk_wmem_alloc) >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) |
9b462d02d tcp: TCP Small Qu... |
847 |
goto out; |
46d3ceabd tcp: TCP Small Qu... |
848 |
|
7aa5470c2 tcp: tsq: move ts... |
849 |
for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) { |
46d3ceabd tcp: TCP Small Qu... |
850 |
struct tsq_tasklet *tsq; |
a9b204d15 tcp: tsq: avoid o... |
851 |
bool empty; |
46d3ceabd tcp: TCP Small Qu... |
852 |
|
408f0a6c2 tcp: tsq: remove ... |
853 854 |
if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED)) goto out; |
b223feb9d tcp: tsq: add sho... |
855 |
nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED; |
7aa5470c2 tcp: tsq: move ts... |
856 |
nval = cmpxchg(&sk->sk_tsq_flags, oval, nval); |
408f0a6c2 tcp: tsq: remove ... |
857 858 |
if (nval != oval) continue; |
46d3ceabd tcp: TCP Small Qu... |
859 860 |
/* queue this socket to tasklet queue */ local_irq_save(flags); |
903ceff7c net: Replace get_... |
861 |
tsq = this_cpu_ptr(&tsq_tasklet); |
a9b204d15 tcp: tsq: avoid o... |
862 |
empty = list_empty(&tsq->head); |
46d3ceabd tcp: TCP Small Qu... |
863 |
list_add(&tp->tsq_node, &tsq->head); |
a9b204d15 tcp: tsq: avoid o... |
864 865 |
if (empty) tasklet_schedule(&tsq->tasklet); |
46d3ceabd tcp: TCP Small Qu... |
866 |
local_irq_restore(flags); |
9b462d02d tcp: TCP Small Qu... |
867 |
return; |
46d3ceabd tcp: TCP Small Qu... |
868 |
} |
9b462d02d tcp: TCP Small Qu... |
869 870 |
out: sk_free(sk); |
46d3ceabd tcp: TCP Small Qu... |
871 |
} |
218af599f tcp: internal imp... |
872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 |
/* Note: Called under hard irq. * We can not call TCP stack right away. */ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer) { struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer); struct sock *sk = (struct sock *)tp; unsigned long nval, oval; for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) { struct tsq_tasklet *tsq; bool empty; if (oval & TSQF_QUEUED) break; nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED; nval = cmpxchg(&sk->sk_tsq_flags, oval, nval); if (nval != oval) continue; |
14afee4b6 net: convert sock... |
892 |
if (!refcount_inc_not_zero(&sk->sk_wmem_alloc)) |
218af599f tcp: internal imp... |
893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 |
break; /* queue this socket to tasklet queue */ tsq = this_cpu_ptr(&tsq_tasklet); empty = list_empty(&tsq->head); list_add(&tp->tsq_node, &tsq->head); if (empty) tasklet_schedule(&tsq->tasklet); break; } return HRTIMER_NORESTART; } /* BBR congestion control needs pacing. * Same remark for SO_MAX_PACING_RATE. * sch_fq packet scheduler is efficiently handling pacing, * but is not always installed/used. * Return true if TCP stack should pace packets itself. */ static bool tcp_needs_internal_pacing(const struct sock *sk) { return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED; } static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb) { u64 len_ns; u32 rate; if (!tcp_needs_internal_pacing(sk)) return; rate = sk->sk_pacing_rate; if (!rate || rate == ~0U) return; /* Should account for header sizes as sch_fq does, * but lets make things simple. */ len_ns = (u64)skb->len * NSEC_PER_SEC; do_div(len_ns, rate); hrtimer_start(&tcp_sk(sk)->pacing_timer, ktime_add_ns(ktime_get(), len_ns), HRTIMER_MODE_ABS_PINNED); } |
1da177e4c Linux-2.6.12-rc2 |
936 937 938 939 940 941 942 943 944 945 946 |
/* This routine actually transmits TCP packets queued in by * tcp_do_sendmsg(). This is used by both the initial * transmission and possible later retransmissions. * All SKB's seen here are completely headerless. It is our * job to build the TCP header, and pass the packet down to * IP so it can do the same plus pass the packet off to the * device. * * We are working here with either a clone of the original * SKB, or a fresh unique copy made by the retransmit engine. */ |
f7f24b369 tcp: helpers to s... |
947 948 |
static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask, u32 rcv_nxt) |
1da177e4c Linux-2.6.12-rc2 |
949 |
{ |
dfb4b9dce [TCP] Vegas: time... |
950 951 952 953 |
const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet; struct tcp_sock *tp; struct tcp_skb_cb *tcb; |
33ad798c9 tcp: options clea... |
954 |
struct tcp_out_options opts; |
95c961747 net: cleanup unsi... |
955 |
unsigned int tcp_options_size, tcp_header_size; |
8c72c65b4 tcp: update skb->... |
956 |
struct sk_buff *oskb = NULL; |
cfb6eeb4c [TCP]: MD5 Signat... |
957 |
struct tcp_md5sig_key *md5; |
dfb4b9dce [TCP] Vegas: time... |
958 |
struct tcphdr *th; |
dfb4b9dce [TCP] Vegas: time... |
959 960 961 |
int err; BUG_ON(!skb || !tcp_skb_pcount(skb)); |
6f094b9ec tcp: add in_fligh... |
962 |
tp = tcp_sk(sk); |
dfb4b9dce [TCP] Vegas: time... |
963 |
|
ccdbb6e96 tcp: tcp_transmit... |
964 |
if (clone_it) { |
6f094b9ec tcp: add in_fligh... |
965 966 |
TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq - tp->snd_una; |
8c72c65b4 tcp: update skb->... |
967 |
oskb = skb; |
dfb4b9dce [TCP] Vegas: time... |
968 969 970 971 972 973 974 |
if (unlikely(skb_cloned(skb))) skb = pskb_copy(skb, gfp_mask); else skb = skb_clone(skb, gfp_mask); if (unlikely(!skb)) return -ENOBUFS; } |
8c72c65b4 tcp: update skb->... |
975 |
skb->skb_mstamp = tp->tcp_mstamp; |
1da177e4c Linux-2.6.12-rc2 |
976 |
|
dfb4b9dce [TCP] Vegas: time... |
977 |
inet = inet_sk(sk); |
dfb4b9dce [TCP] Vegas: time... |
978 |
tcb = TCP_SKB_CB(skb); |
33ad798c9 tcp: options clea... |
979 |
memset(&opts, 0, sizeof(opts)); |
1da177e4c Linux-2.6.12-rc2 |
980 |
|
4de075e04 tcp: rename tcp_s... |
981 |
if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) |
33ad798c9 tcp: options clea... |
982 983 984 985 986 |
tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); else tcp_options_size = tcp_established_options(sk, skb, &opts, &md5); tcp_header_size = tcp_options_size + sizeof(struct tcphdr); |
e905a9eda [NET] IPV4: Fix w... |
987 |
|
547669d48 tcp: xps: fix reo... |
988 |
/* if no packet is in qdisc/device queue, then allow XPS to select |
b2532eb9a tcp: fix ooo_okay... |
989 990 991 992 993 |
* another queue. We can be called from tcp_tsq_handler() * which holds one reference to sk_wmem_alloc. * * TODO: Ideally, in-flight pure ACK packets should not matter here. * One way to get this would be to set skb->truesize = 2 on them. |
547669d48 tcp: xps: fix reo... |
994 |
*/ |
b2532eb9a tcp: fix ooo_okay... |
995 |
skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1); |
dfb4b9dce [TCP] Vegas: time... |
996 |
|
38ab52e8e tcp: clear pfmema... |
997 998 999 1000 1001 1002 |
/* If we had to use memory reserve to allocate this skb, * this might cause drops if packet is looped back : * Other socket might not have SOCK_MEMALLOC. * Packets not looped back do not care about pfmemalloc. */ skb->pfmemalloc = 0; |
aa8223c7b [SK_BUFF]: Introd... |
1003 1004 |
skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); |
46d3ceabd tcp: TCP Small Qu... |
1005 1006 1007 |
skb_orphan(skb); skb->sk = sk; |
1d2077ac0 net: add __sock_w... |
1008 |
skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree; |
b73c3d0e4 net: Save TX flow... |
1009 |
skb_set_hash_from_sk(skb, sk); |
14afee4b6 net: convert sock... |
1010 |
refcount_add(skb->truesize, &sk->sk_wmem_alloc); |
dfb4b9dce [TCP] Vegas: time... |
1011 |
|
c3a2e8370 tcp: replace dst_... |
1012 |
skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm); |
dfb4b9dce [TCP] Vegas: time... |
1013 |
/* Build TCP header and checksum it. */ |
ea1627c20 tcp: minor optimi... |
1014 |
th = (struct tcphdr *)skb->data; |
c720c7e83 inet: rename some... |
1015 1016 |
th->source = inet->inet_sport; th->dest = inet->inet_dport; |
dfb4b9dce [TCP] Vegas: time... |
1017 |
th->seq = htonl(tcb->seq); |
f7f24b369 tcp: helpers to s... |
1018 |
th->ack_seq = htonl(rcv_nxt); |
df7a3b07c [TCP] net/ipv4/tc... |
1019 |
*(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | |
4de075e04 tcp: rename tcp_s... |
1020 |
tcb->tcp_flags); |
dfb4b9dce [TCP] Vegas: time... |
1021 |
|
dfb4b9dce [TCP] Vegas: time... |
1022 1023 |
th->check = 0; th->urg_ptr = 0; |
1da177e4c Linux-2.6.12-rc2 |
1024 |
|
33f5f57ee tcp: kill pointle... |
1025 |
/* The urg_mode check is necessary during a below snd_una win probe */ |
7691367d7 tcp: Always set u... |
1026 1027 1028 1029 1030 |
if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) { if (before(tp->snd_up, tcb->seq + 0x10000)) { th->urg_ptr = htons(tp->snd_up - tcb->seq); th->urg = 1; } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) { |
0eae88f31 net: Fix various ... |
1031 |
th->urg_ptr = htons(0xFFFF); |
7691367d7 tcp: Always set u... |
1032 1033 |
th->urg = 1; } |
dfb4b9dce [TCP] Vegas: time... |
1034 |
} |
1da177e4c Linux-2.6.12-rc2 |
1035 |
|
bd0388ae7 TCPCT part 1f: In... |
1036 |
tcp_options_write((__be32 *)(th + 1), tp, &opts); |
51466a754 tcp: fill shinfo-... |
1037 |
skb_shinfo(skb)->gso_type = sk->sk_gso_type; |
ea1627c20 tcp: minor optimi... |
1038 1039 1040 1041 1042 1043 1044 1045 1046 |
if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) { th->window = htons(tcp_select_window(sk)); tcp_ecn_send(sk, skb, th, tcp_header_size); } else { /* RFC1323: The window in SYN & SYN/ACK segments * is never scaled. */ th->window = htons(min(tp->rcv_wnd, 65535U)); } |
cfb6eeb4c [TCP]: MD5 Signat... |
1047 1048 1049 |
#ifdef CONFIG_TCP_MD5SIG /* Calculate the MD5 hash, as we have all we need now */ if (md5) { |
a465419b1 net: Introduce sk... |
1050 |
sk_nocaps_add(sk, NETIF_F_GSO_MASK); |
bd0388ae7 TCPCT part 1f: In... |
1051 |
tp->af_specific->calc_md5_hash(opts.hash_location, |
39f8e58e5 tcp: md5: remove ... |
1052 |
md5, sk, skb); |
cfb6eeb4c [TCP]: MD5 Signat... |
1053 1054 |
} #endif |
bb2962461 inet: Remove unus... |
1055 |
icsk->icsk_af_ops->send_check(sk, skb); |
1da177e4c Linux-2.6.12-rc2 |
1056 |
|
4de075e04 tcp: rename tcp_s... |
1057 |
if (likely(tcb->tcp_flags & TCPHDR_ACK)) |
78636179f tcp: do not cance... |
1058 |
tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt); |
1da177e4c Linux-2.6.12-rc2 |
1059 |
|
a44d6eacd tcp: Add RFC4898 ... |
1060 |
if (skb->len != tcp_header_size) { |
cf533ea53 tcp: add const qu... |
1061 |
tcp_event_data_sent(tp, sk); |
a44d6eacd tcp: Add RFC4898 ... |
1062 |
tp->data_segs_out += tcp_skb_pcount(skb); |
218af599f tcp: internal imp... |
1063 |
tcp_internal_pacing(sk, skb); |
a44d6eacd tcp: Add RFC4898 ... |
1064 |
} |
1da177e4c Linux-2.6.12-rc2 |
1065 |
|
bd37a0885 [TCP]: SNMPv2 tcp... |
1066 |
if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) |
aa2ea0586 tcp: fix outsegs ... |
1067 1068 |
TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb)); |
1da177e4c Linux-2.6.12-rc2 |
1069 |
|
2efd055c5 tcp: add tcpi_seg... |
1070 |
tp->segs_out += tcp_skb_pcount(skb); |
f69ad292c tcp: fill shinfo-... |
1071 |
/* OK, its time to fill skb_shinfo(skb)->gso_{segs|size} */ |
cd7d8498c tcp: change tcp_s... |
1072 |
skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb); |
f69ad292c tcp: fill shinfo-... |
1073 |
skb_shinfo(skb)->gso_size = tcp_skb_mss(skb); |
cd7d8498c tcp: change tcp_s... |
1074 |
|
7faee5c0d tcp: remove TCP_S... |
1075 |
/* Our usage of tstamp should remain private */ |
2456e8553 ktime: Get rid of... |
1076 |
skb->tstamp = 0; |
971f10eca tcp: better TCP_S... |
1077 1078 1079 1080 |
/* Cleanup our debris for IP stacks */ memset(skb->cb, 0, max(sizeof(struct inet_skb_parm), sizeof(struct inet6_skb_parm))); |
b0270e910 ipv4: add a sock ... |
1081 |
err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); |
7faee5c0d tcp: remove TCP_S... |
1082 |
|
8c72c65b4 tcp: update skb->... |
1083 1084 1085 1086 |
if (unlikely(err > 0)) { tcp_enter_cwr(sk); err = net_xmit_eval(err); } |
fc2257991 tcp: fix data del... |
1087 |
if (!err && oskb) { |
8c72c65b4 tcp: update skb->... |
1088 |
oskb->skb_mstamp = tp->tcp_mstamp; |
fc2257991 tcp: fix data del... |
1089 1090 |
tcp_rate_skb_sent(sk, oskb); } |
8c72c65b4 tcp: update skb->... |
1091 |
return err; |
1da177e4c Linux-2.6.12-rc2 |
1092 |
} |
f7f24b369 tcp: helpers to s... |
1093 1094 1095 1096 1097 1098 |
static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask) { return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask, tcp_sk(sk)->rcv_nxt); } |
67edfef78 TCP: Add comments... |
1099 |
/* This routine just queues the buffer for sending. |
1da177e4c Linux-2.6.12-rc2 |
1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 |
* * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, * otherwise socket can stall. */ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); /* Advance write_seq and place onto the write_queue. */ tp->write_seq = TCP_SKB_CB(skb)->end_seq; |
f4a775d14 net: introduce __... |
1110 |
__skb_header_release(skb); |
fe067e8ab [TCP]: Abstract o... |
1111 |
tcp_add_write_queue_tail(sk, skb); |
3ab224be6 [NET] CORE: Intro... |
1112 1113 |
sk->sk_wmem_queued += skb->truesize; sk_mem_charge(sk, skb->truesize); |
1da177e4c Linux-2.6.12-rc2 |
1114 |
} |
67edfef78 TCP: Add comments... |
1115 |
/* Initialize TSO segments for a packet. */ |
5bbb432c8 tcp: tcp_set_skb_... |
1116 |
static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now) |
f6302d1d7 [TCP]: Move send ... |
1117 |
{ |
8f26fb1c1 tcp: remove the s... |
1118 |
if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) { |
f6302d1d7 [TCP]: Move send ... |
1119 1120 1121 |
/* Avoid the costly divide in the normal * non-TSO case. */ |
cd7d8498c tcp: change tcp_s... |
1122 |
tcp_skb_pcount_set(skb, 1); |
f69ad292c tcp: fill shinfo-... |
1123 |
TCP_SKB_CB(skb)->tcp_gso_size = 0; |
f6302d1d7 [TCP]: Move send ... |
1124 |
} else { |
cd7d8498c tcp: change tcp_s... |
1125 |
tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now)); |
f69ad292c tcp: fill shinfo-... |
1126 |
TCP_SKB_CB(skb)->tcp_gso_size = mss_now; |
1da177e4c Linux-2.6.12-rc2 |
1127 1128 |
} } |
91fed7a15 [TCP]: Make facke... |
1129 |
/* When a modification to fackets out becomes necessary, we need to check |
68f8353b4 [TCP]: Rewrite SA... |
1130 |
* skb is counted to fackets_out or not. |
91fed7a15 [TCP]: Make facke... |
1131 |
*/ |
cf533ea53 tcp: add const qu... |
1132 |
static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb, |
91fed7a15 [TCP]: Make facke... |
1133 1134 |
int decr) { |
a47e5a988 [TCP]: Convert hi... |
1135 |
struct tcp_sock *tp = tcp_sk(sk); |
dc86967b5 [TCP]: No fackets... |
1136 |
if (!tp->sacked_out || tcp_is_reno(tp)) |
91fed7a15 [TCP]: Make facke... |
1137 |
return; |
6859d4947 [TCP]: Abstract t... |
1138 |
if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq)) |
91fed7a15 [TCP]: Make facke... |
1139 |
tp->fackets_out -= decr; |
91fed7a15 [TCP]: Make facke... |
1140 |
} |
797108d13 tcp: add helper f... |
1141 1142 1143 |
/* Pcount in the middle of the write queue got changed, we need to do various * tweaks to fix counters */ |
cf533ea53 tcp: add const qu... |
1144 |
static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr) |
797108d13 tcp: add helper f... |
1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 |
{ struct tcp_sock *tp = tcp_sk(sk); tp->packets_out -= decr; if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) tp->sacked_out -= decr; if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) tp->retrans_out -= decr; if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) tp->lost_out -= decr; /* Reno case is special. Sigh... */ if (tcp_is_reno(tp) && decr > 0) tp->sacked_out -= min_t(u32, tp->sacked_out, decr); tcp_adjust_fackets_out(sk, skb, decr); if (tp->lost_skb_hint && before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) && |
52cf3cc8a tcp: fix mid-wq a... |
1165 |
(tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))) |
797108d13 tcp: add helper f... |
1166 1167 1168 1169 |
tp->lost_cnt_hint -= decr; tcp_verify_left_out(tp); } |
0a2cf20c3 tcp: remove SKBTX... |
1170 1171 1172 1173 1174 |
static bool tcp_has_tx_tstamp(const struct sk_buff *skb) { return TCP_SKB_CB(skb)->txstamp_ack || (skb_shinfo(skb)->tx_flags & SKBTX_ANY_TSTAMP); } |
490cc7d03 net-timestamp: fi... |
1175 1176 1177 |
static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2) { struct skb_shared_info *shinfo = skb_shinfo(skb); |
0a2cf20c3 tcp: remove SKBTX... |
1178 |
if (unlikely(tcp_has_tx_tstamp(skb)) && |
490cc7d03 net-timestamp: fi... |
1179 1180 1181 1182 1183 1184 1185 |
!before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) { struct skb_shared_info *shinfo2 = skb_shinfo(skb2); u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP; shinfo->tx_flags &= ~tsflags; shinfo2->tx_flags |= tsflags; swap(shinfo->tskey, shinfo2->tskey); |
b51e13faf tcp: Carry txstam... |
1186 1187 |
TCP_SKB_CB(skb2)->txstamp_ack = TCP_SKB_CB(skb)->txstamp_ack; TCP_SKB_CB(skb)->txstamp_ack = 0; |
490cc7d03 net-timestamp: fi... |
1188 1189 |
} } |
a166140e8 tcp: Handle eor b... |
1190 1191 1192 1193 1194 |
static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2) { TCP_SKB_CB(skb2)->eor = TCP_SKB_CB(skb)->eor; TCP_SKB_CB(skb)->eor = 0; } |
1da177e4c Linux-2.6.12-rc2 |
1195 1196 |
/* Function to create two new TCP segments. Shrinks the given segment * to the specified size and appends a new segment with the rest of the |
e905a9eda [NET] IPV4: Fix w... |
1197 |
* packet to the list. This won't be called frequently, I hope. |
1da177e4c Linux-2.6.12-rc2 |
1198 1199 |
* Remember, these are still headerless SKBs at this point. */ |
056834d9f [TCP]: cleanup tc... |
1200 |
int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, |
6cc55e096 tcp: add gfp para... |
1201 |
unsigned int mss_now, gfp_t gfp) |
1da177e4c Linux-2.6.12-rc2 |
1202 1203 1204 |
{ struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; |
6475be16f [TCP]: Keep TSO e... |
1205 |
int nsize, old_factor; |
b60b49ea6 [TCP]: Account sk... |
1206 |
int nlen; |
9ce014610 tcp: get rid of t... |
1207 |
u8 flags; |
1da177e4c Linux-2.6.12-rc2 |
1208 |
|
2fceec133 tcp: len check is... |
1209 1210 |
if (WARN_ON(len > skb->len)) return -EINVAL; |
6a438bbe6 [TCP]: speed up S... |
1211 |
|
1da177e4c Linux-2.6.12-rc2 |
1212 1213 1214 |
nsize = skb_headlen(skb) - len; if (nsize < 0) nsize = 0; |
6cc55e096 tcp: add gfp para... |
1215 |
if (skb_unclone(skb, gfp)) |
1da177e4c Linux-2.6.12-rc2 |
1216 1217 1218 |
return -ENOMEM; /* Get a new skb... force flag on. */ |
eb9344781 tcp: add a force_... |
1219 |
buff = sk_stream_alloc_skb(sk, nsize, gfp, true); |
51456b291 ipv4: coding styl... |
1220 |
if (!buff) |
1da177e4c Linux-2.6.12-rc2 |
1221 |
return -ENOMEM; /* We'll just try again later. */ |
ef5cb9738 [TCP]: Fix truesi... |
1222 |
|
3ab224be6 [NET] CORE: Intro... |
1223 1224 |
sk->sk_wmem_queued += buff->truesize; sk_mem_charge(sk, buff->truesize); |
b60b49ea6 [TCP]: Account sk... |
1225 1226 1227 |
nlen = skb->len - len - nsize; buff->truesize += nlen; skb->truesize -= nlen; |
1da177e4c Linux-2.6.12-rc2 |
1228 1229 1230 1231 1232 1233 1234 |
/* Correct the sequence numbers. */ TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len; TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; /* PSH and FIN should only be set in the second packet. */ |
4de075e04 tcp: rename tcp_s... |
1235 1236 1237 |
flags = TCP_SKB_CB(skb)->tcp_flags; TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); TCP_SKB_CB(buff)->tcp_flags = flags; |
e14c3caf6 [TCP]: Handle SAC... |
1238 |
TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; |
a166140e8 tcp: Handle eor b... |
1239 |
tcp_skb_fragment_eor(skb, buff); |
1da177e4c Linux-2.6.12-rc2 |
1240 |
|
84fa7933a [NET]: Replace CH... |
1241 |
if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { |
1da177e4c Linux-2.6.12-rc2 |
1242 |
/* Copy and checksum data tail into the new buffer. */ |
056834d9f [TCP]: cleanup tc... |
1243 1244 |
buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize), |
1da177e4c Linux-2.6.12-rc2 |
1245 1246 1247 1248 1249 1250 |
nsize, 0); skb_trim(skb, len); skb->csum = csum_block_sub(skb->csum, buff->csum, len); } else { |
84fa7933a [NET]: Replace CH... |
1251 |
skb->ip_summed = CHECKSUM_PARTIAL; |
1da177e4c Linux-2.6.12-rc2 |
1252 1253 1254 1255 |
skb_split(skb, buff, len); } buff->ip_summed = skb->ip_summed; |
a61bbcf28 [NET]: Store skb-... |
1256 |
buff->tstamp = skb->tstamp; |
490cc7d03 net-timestamp: fi... |
1257 |
tcp_fragment_tstamp(skb, buff); |
1da177e4c Linux-2.6.12-rc2 |
1258 |
|
6475be16f [TCP]: Keep TSO e... |
1259 |
old_factor = tcp_skb_pcount(skb); |
1da177e4c Linux-2.6.12-rc2 |
1260 |
/* Fix up tso_factor for both original and new SKB. */ |
5bbb432c8 tcp: tcp_set_skb_... |
1261 1262 |
tcp_set_skb_tso_segs(skb, mss_now); tcp_set_skb_tso_segs(buff, mss_now); |
1da177e4c Linux-2.6.12-rc2 |
1263 |
|
b9f64820f tcp: track data d... |
1264 1265 |
/* Update delivered info for the new segment */ TCP_SKB_CB(buff)->tx = TCP_SKB_CB(skb)->tx; |
6475be16f [TCP]: Keep TSO e... |
1266 1267 1268 |
/* If this packet has been sent out already, we must * adjust the various packet counters. */ |
cf0b450cd [TCP]: Fix off by... |
1269 |
if (!before(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) { |
6475be16f [TCP]: Keep TSO e... |
1270 1271 |
int diff = old_factor - tcp_skb_pcount(skb) - tcp_skb_pcount(buff); |
1da177e4c Linux-2.6.12-rc2 |
1272 |
|
797108d13 tcp: add helper f... |
1273 1274 |
if (diff) tcp_adjust_pcount(sk, skb, diff); |
1da177e4c Linux-2.6.12-rc2 |
1275 1276 1277 |
} /* Link BUFF into the send queue. */ |
f4a775d14 net: introduce __... |
1278 |
__skb_header_release(buff); |
fe067e8ab [TCP]: Abstract o... |
1279 |
tcp_insert_write_queue_after(skb, buff, sk); |
1da177e4c Linux-2.6.12-rc2 |
1280 1281 1282 |
return 0; } |
f4d016666 tcp: remove unnec... |
1283 1284 |
/* This is similar to __pskb_pull_tail(). The difference is that pulled * data is not copied, but immediately discarded. |
1da177e4c Linux-2.6.12-rc2 |
1285 |
*/ |
7162fb242 tcp: do not under... |
1286 |
static int __pskb_trim_head(struct sk_buff *skb, int len) |
1da177e4c Linux-2.6.12-rc2 |
1287 |
{ |
7b7fc97aa tcp: optimize som... |
1288 |
struct skb_shared_info *shinfo; |
1da177e4c Linux-2.6.12-rc2 |
1289 |
int i, k, eat; |
4fa48bf3c tcp: fix tcp_trim... |
1290 1291 1292 1293 1294 |
eat = min_t(int, len, skb_headlen(skb)); if (eat) { __skb_pull(skb, eat); len -= eat; if (!len) |
7162fb242 tcp: do not under... |
1295 |
return 0; |
4fa48bf3c tcp: fix tcp_trim... |
1296 |
} |
1da177e4c Linux-2.6.12-rc2 |
1297 1298 |
eat = len; k = 0; |
7b7fc97aa tcp: optimize som... |
1299 1300 1301 |
shinfo = skb_shinfo(skb); for (i = 0; i < shinfo->nr_frags; i++) { int size = skb_frag_size(&shinfo->frags[i]); |
9e903e085 net: add skb frag... |
1302 1303 |
if (size <= eat) { |
aff65da0f net: ipv4: conver... |
1304 |
skb_frag_unref(skb, i); |
9e903e085 net: add skb frag... |
1305 |
eat -= size; |
1da177e4c Linux-2.6.12-rc2 |
1306 |
} else { |
7b7fc97aa tcp: optimize som... |
1307 |
shinfo->frags[k] = shinfo->frags[i]; |
1da177e4c Linux-2.6.12-rc2 |
1308 |
if (eat) { |
7b7fc97aa tcp: optimize som... |
1309 1310 |
shinfo->frags[k].page_offset += eat; skb_frag_size_sub(&shinfo->frags[k], eat); |
1da177e4c Linux-2.6.12-rc2 |
1311 1312 1313 1314 1315 |
eat = 0; } k++; } } |
7b7fc97aa tcp: optimize som... |
1316 |
shinfo->nr_frags = k; |
1da177e4c Linux-2.6.12-rc2 |
1317 |
|
1da177e4c Linux-2.6.12-rc2 |
1318 1319 |
skb->data_len -= len; skb->len = skb->data_len; |
7162fb242 tcp: do not under... |
1320 |
return len; |
1da177e4c Linux-2.6.12-rc2 |
1321 |
} |
67edfef78 TCP: Add comments... |
1322 |
/* Remove acked data from a packet in the transmit queue. */ |
1da177e4c Linux-2.6.12-rc2 |
1323 1324 |
int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) { |
7162fb242 tcp: do not under... |
1325 |
u32 delta_truesize; |
14bbd6a56 net: Add skb_uncl... |
1326 |
if (skb_unclone(skb, GFP_ATOMIC)) |
1da177e4c Linux-2.6.12-rc2 |
1327 |
return -ENOMEM; |
7162fb242 tcp: do not under... |
1328 |
delta_truesize = __pskb_trim_head(skb, len); |
1da177e4c Linux-2.6.12-rc2 |
1329 1330 |
TCP_SKB_CB(skb)->seq += len; |
84fa7933a [NET]: Replace CH... |
1331 |
skb->ip_summed = CHECKSUM_PARTIAL; |
1da177e4c Linux-2.6.12-rc2 |
1332 |
|
7162fb242 tcp: do not under... |
1333 1334 1335 1336 1337 1338 |
if (delta_truesize) { skb->truesize -= delta_truesize; sk->sk_wmem_queued -= delta_truesize; sk_mem_uncharge(sk, delta_truesize); sock_set_flag(sk, SOCK_QUEUE_SHRUNK); } |
1da177e4c Linux-2.6.12-rc2 |
1339 |
|
5b35e1e6e tcp: fix tcp_trim... |
1340 |
/* Any change of skb->len requires recalculation of tso factor. */ |
1da177e4c Linux-2.6.12-rc2 |
1341 |
if (tcp_skb_pcount(skb) > 1) |
5bbb432c8 tcp: tcp_set_skb_... |
1342 |
tcp_set_skb_tso_segs(skb, tcp_skb_mss(skb)); |
1da177e4c Linux-2.6.12-rc2 |
1343 1344 1345 |
return 0; } |
1b63edd6e tcp: fix SYN-data... |
1346 1347 |
/* Calculate MSS not accounting any TCP options. */ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu) |
5d424d5a6 [TCP]: MTU probing |
1348 |
{ |
cf533ea53 tcp: add const qu... |
1349 1350 |
const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); |
5d424d5a6 [TCP]: MTU probing |
1351 1352 1353 1354 1355 1356 |
int mss_now; /* Calculate base mss without TCP options: It is MMS_S - sizeof(tcphdr) of rfc1122 */ mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr); |
674696014 ipv6: RTAX_FEATUR... |
1357 1358 1359 1360 1361 1362 1363 |
/* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */ if (icsk->icsk_af_ops->net_frag_header_len) { const struct dst_entry *dst = __sk_dst_get(sk); if (dst && dst_allfrag(dst)) mss_now -= icsk->icsk_af_ops->net_frag_header_len; } |
5d424d5a6 [TCP]: MTU probing |
1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 |
/* Clamp it (mss_clamp does not include tcp options) */ if (mss_now > tp->rx_opt.mss_clamp) mss_now = tp->rx_opt.mss_clamp; /* Now subtract optional transport overhead */ mss_now -= icsk->icsk_ext_hdr_len; /* Then reserve room for full set of TCP options and 8 bytes of data */ if (mss_now < 48) mss_now = 48; |
5d424d5a6 [TCP]: MTU probing |
1374 1375 |
return mss_now; } |
1b63edd6e tcp: fix SYN-data... |
1376 1377 1378 1379 1380 1381 1382 |
/* Calculate MSS. Not accounting for SACKs here. */ int tcp_mtu_to_mss(struct sock *sk, int pmtu) { /* Subtract TCP options size, not including SACKs */ return __tcp_mtu_to_mss(sk, pmtu) - (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr)); } |
5d424d5a6 [TCP]: MTU probing |
1383 |
/* Inverse of above */ |
674696014 ipv6: RTAX_FEATUR... |
1384 |
int tcp_mss_to_mtu(struct sock *sk, int mss) |
5d424d5a6 [TCP]: MTU probing |
1385 |
{ |
cf533ea53 tcp: add const qu... |
1386 1387 |
const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); |
5d424d5a6 [TCP]: MTU probing |
1388 1389 1390 1391 1392 1393 |
int mtu; mtu = mss + tp->tcp_header_len + icsk->icsk_ext_hdr_len + icsk->icsk_af_ops->net_header_len; |
674696014 ipv6: RTAX_FEATUR... |
1394 1395 1396 1397 1398 1399 1400 |
/* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */ if (icsk->icsk_af_ops->net_frag_header_len) { const struct dst_entry *dst = __sk_dst_get(sk); if (dst && dst_allfrag(dst)) mtu += icsk->icsk_af_ops->net_frag_header_len; } |
5d424d5a6 [TCP]: MTU probing |
1401 1402 |
return mtu; } |
556c6b46d tcp: export tcp_m... |
1403 |
EXPORT_SYMBOL(tcp_mss_to_mtu); |
5d424d5a6 [TCP]: MTU probing |
1404 |
|
67edfef78 TCP: Add comments... |
1405 |
/* MTU probing init per socket */ |
5d424d5a6 [TCP]: MTU probing |
1406 1407 1408 1409 |
void tcp_mtup_init(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); |
b0f9ca53c ipv4: Namespecify... |
1410 |
struct net *net = sock_net(sk); |
5d424d5a6 [TCP]: MTU probing |
1411 |
|
b0f9ca53c ipv4: Namespecify... |
1412 |
icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1; |
5d424d5a6 [TCP]: MTU probing |
1413 |
icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) + |
e905a9eda [NET] IPV4: Fix w... |
1414 |
icsk->icsk_af_ops->net_header_len; |
b0f9ca53c ipv4: Namespecify... |
1415 |
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss); |
5d424d5a6 [TCP]: MTU probing |
1416 |
icsk->icsk_mtup.probe_size = 0; |
05cbc0db0 ipv4: Create prob... |
1417 |
if (icsk->icsk_mtup.enabled) |
c74df29a8 tcp: use tcp_jiff... |
1418 |
icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; |
5d424d5a6 [TCP]: MTU probing |
1419 |
} |
4bc2f18ba net/ipv4: EXPORT_... |
1420 |
EXPORT_SYMBOL(tcp_mtup_init); |
5d424d5a6 [TCP]: MTU probing |
1421 |
|
1da177e4c Linux-2.6.12-rc2 |
1422 1423 1424 1425 1426 1427 |
/* This function synchronize snd mss to current pmtu/exthdr set. tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts for TCP options, but includes only bare TCP header. tp->rx_opt.mss_clamp is mss negotiated at connection setup. |
caa20d9ab [TCP]: spelling f... |
1428 |
It is minimum of user_mss and mss received with SYN. |
1da177e4c Linux-2.6.12-rc2 |
1429 |
It also does not include TCP options. |
d83d8461f [IP_SOCKGLUE]: Re... |
1430 |
inet_csk(sk)->icsk_pmtu_cookie is last pmtu, seen by this function. |
1da177e4c Linux-2.6.12-rc2 |
1431 1432 1433 1434 1435 1436 1437 1438 |
tp->mss_cache is current effective sending mss, including all tcp options except for SACKs. It is evaluated, taking into account current pmtu, but never exceeds tp->rx_opt.mss_clamp. NOTE1. rfc1122 clearly states that advertised MSS DOES NOT include either tcp or ip options. |
d83d8461f [IP_SOCKGLUE]: Re... |
1439 1440 |
NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache are READ ONLY outside this function. --ANK (980731) |
1da177e4c Linux-2.6.12-rc2 |
1441 |
*/ |
1da177e4c Linux-2.6.12-rc2 |
1442 1443 1444 |
unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) { struct tcp_sock *tp = tcp_sk(sk); |
d83d8461f [IP_SOCKGLUE]: Re... |
1445 |
struct inet_connection_sock *icsk = inet_csk(sk); |
5d424d5a6 [TCP]: MTU probing |
1446 |
int mss_now; |
1da177e4c Linux-2.6.12-rc2 |
1447 |
|
5d424d5a6 [TCP]: MTU probing |
1448 1449 |
if (icsk->icsk_mtup.search_high > pmtu) icsk->icsk_mtup.search_high = pmtu; |
1da177e4c Linux-2.6.12-rc2 |
1450 |
|
5d424d5a6 [TCP]: MTU probing |
1451 |
mss_now = tcp_mtu_to_mss(sk, pmtu); |
409d22b47 [TCP]: Code dupli... |
1452 |
mss_now = tcp_bound_to_half_wnd(tp, mss_now); |
1da177e4c Linux-2.6.12-rc2 |
1453 1454 |
/* And store cached results */ |
d83d8461f [IP_SOCKGLUE]: Re... |
1455 |
icsk->icsk_pmtu_cookie = pmtu; |
5d424d5a6 [TCP]: MTU probing |
1456 1457 |
if (icsk->icsk_mtup.enabled) mss_now = min(mss_now, tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low)); |
c1b4a7e69 [TCP]: Move to ne... |
1458 |
tp->mss_cache = mss_now; |
1da177e4c Linux-2.6.12-rc2 |
1459 1460 1461 |
return mss_now; } |
4bc2f18ba net/ipv4: EXPORT_... |
1462 |
EXPORT_SYMBOL(tcp_sync_mss); |
1da177e4c Linux-2.6.12-rc2 |
1463 1464 1465 |
/* Compute the current effective MSS, taking SACKs and IP options, * and even PMTU discovery events into account. |
1da177e4c Linux-2.6.12-rc2 |
1466 |
*/ |
0c54b85f2 tcp: simplify tcp... |
1467 |
unsigned int tcp_current_mss(struct sock *sk) |
1da177e4c Linux-2.6.12-rc2 |
1468 |
{ |
cf533ea53 tcp: add const qu... |
1469 1470 |
const struct tcp_sock *tp = tcp_sk(sk); const struct dst_entry *dst = __sk_dst_get(sk); |
c1b4a7e69 [TCP]: Move to ne... |
1471 |
u32 mss_now; |
95c961747 net: cleanup unsi... |
1472 |
unsigned int header_len; |
33ad798c9 tcp: options clea... |
1473 1474 |
struct tcp_out_options opts; struct tcp_md5sig_key *md5; |
c1b4a7e69 [TCP]: Move to ne... |
1475 1476 |
mss_now = tp->mss_cache; |
1da177e4c Linux-2.6.12-rc2 |
1477 1478 |
if (dst) { u32 mtu = dst_mtu(dst); |
d83d8461f [IP_SOCKGLUE]: Re... |
1479 |
if (mtu != inet_csk(sk)->icsk_pmtu_cookie) |
1da177e4c Linux-2.6.12-rc2 |
1480 1481 |
mss_now = tcp_sync_mss(sk, mtu); } |
33ad798c9 tcp: options clea... |
1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 |
header_len = tcp_established_options(sk, NULL, &opts, &md5) + sizeof(struct tcphdr); /* The mss_cache is sized based on tp->tcp_header_len, which assumes * some common options. If this is an odd packet (because we have SACK * blocks etc) then our calculated header_len will be different, and * we have to adjust mss_now correspondingly */ if (header_len != tp->tcp_header_len) { int delta = (int) header_len - tp->tcp_header_len; mss_now -= delta; } |
cfb6eeb4c [TCP]: MD5 Signat... |
1492 |
|
1da177e4c Linux-2.6.12-rc2 |
1493 1494 |
return mss_now; } |
86fd14ad1 tcp: make tcp_cwn... |
1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 |
/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto. * As additional protections, we do not touch cwnd in retransmission phases, * and if application hit its sndbuf limit recently. */ static void tcp_cwnd_application_limited(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { /* Limited by application or receiver window. */ u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk)); u32 win_used = max(tp->snd_cwnd_used, init_win); if (win_used < tp->snd_cwnd) { tp->snd_ssthresh = tcp_current_ssthresh(sk); tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; } tp->snd_cwnd_used = 0; } |
c2203cf75 tcp: use tcp_jiff... |
1514 |
tp->snd_cwnd_stamp = tcp_jiffies32; |
86fd14ad1 tcp: make tcp_cwn... |
1515 |
} |
ca8a22634 tcp: make cwnd-li... |
1516 |
static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) |
a762a9800 [TCP]: Kill extra... |
1517 |
{ |
1b1fc3fdd tcp: make congest... |
1518 |
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; |
9e412ba76 [TCP]: Sed magic ... |
1519 |
struct tcp_sock *tp = tcp_sk(sk); |
a762a9800 [TCP]: Kill extra... |
1520 |
|
ca8a22634 tcp: make cwnd-li... |
1521 1522 1523 1524 1525 1526 1527 1528 1529 |
/* Track the maximum number of outstanding packets in each * window, and remember whether we were cwnd-limited then. */ if (!before(tp->snd_una, tp->max_packets_seq) || tp->packets_out > tp->max_packets_out) { tp->max_packets_out = tp->packets_out; tp->max_packets_seq = tp->snd_nxt; tp->is_cwnd_limited = is_cwnd_limited; } |
e114a710a tcp: fix cwnd lim... |
1530 |
|
249015515 tcp: remove in_fl... |
1531 |
if (tcp_is_cwnd_limited(sk)) { |
a762a9800 [TCP]: Kill extra... |
1532 1533 |
/* Network is feed fully. */ tp->snd_cwnd_used = 0; |
c2203cf75 tcp: use tcp_jiff... |
1534 |
tp->snd_cwnd_stamp = tcp_jiffies32; |
a762a9800 [TCP]: Kill extra... |
1535 1536 1537 1538 |
} else { /* Network starves. */ if (tp->packets_out > tp->snd_cwnd_used) tp->snd_cwnd_used = tp->packets_out; |
15d33c070 [TCP]: slow_start... |
1539 |
if (sysctl_tcp_slow_start_after_idle && |
c2203cf75 tcp: use tcp_jiff... |
1540 |
(s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto && |
1b1fc3fdd tcp: make congest... |
1541 |
!ca_ops->cong_control) |
a762a9800 [TCP]: Kill extra... |
1542 |
tcp_cwnd_application_limited(sk); |
b0f71bd3e tcp: instrument h... |
1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 |
/* The following conditions together indicate the starvation * is caused by insufficient sender buffer: * 1) just sent some data (see tcp_write_xmit) * 2) not cwnd limited (this else condition) * 3) no more data to send (null tcp_send_head ) * 4) application is hitting buffer limit (SOCK_NOSPACE) */ if (!tcp_send_head(sk) && sk->sk_socket && test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) && (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED); |
a762a9800 [TCP]: Kill extra... |
1555 1556 |
} } |
d4589926d tcp: refine TSO s... |
1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 |
/* Minshall's variant of the Nagle send check. */ static bool tcp_minshall_check(const struct tcp_sock *tp) { return after(tp->snd_sml, tp->snd_una) && !after(tp->snd_sml, tp->snd_nxt); } /* Update snd_sml if this skb is under mss * Note that a TSO packet might end with a sub-mss segment * The test is really : * if ((skb->len % mss) != 0) * tp->snd_sml = TCP_SKB_CB(skb)->end_seq; * But we can avoid doing the divide again given we already have * skb_pcount = skb->len / mss_now |
0e3a4803a [TCP]: Force TSO ... |
1571 |
*/ |
d4589926d tcp: refine TSO s... |
1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 |
static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now, const struct sk_buff *skb) { if (skb->len < tcp_skb_pcount(skb) * mss_now) tp->snd_sml = TCP_SKB_CB(skb)->end_seq; } /* Return false, if packet can be sent now without violation Nagle's rules: * 1. It is full sized. (provided by caller in %partial bool) * 2. Or it contains FIN. (already checked by caller) * 3. Or TCP_CORK is not set, and TCP_NODELAY is set. * 4. Or TCP_CORK is not set, and all sent packets are ACKed. * With Minshall's modification: all sent small packets are ACKed. */ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp, |
cc93fc51f tcp: delete unuse... |
1587 |
int nonagle) |
d4589926d tcp: refine TSO s... |
1588 1589 1590 1591 1592 |
{ return partial && ((nonagle & TCP_NAGLE_CORK) || (!nonagle && tp->packets_out && tcp_minshall_check(tp))); } |
605ad7f18 tcp: refine TSO a... |
1593 1594 1595 1596 |
/* Return how many segs we'd like on a TSO packet, * to send one TSO packet per ms */ |
1b3878ca1 tcp: export tcp_t... |
1597 1598 |
u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, int min_tso_segs) |
605ad7f18 tcp: refine TSO a... |
1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 |
{ u32 bytes, segs; bytes = min(sk->sk_pacing_rate >> 10, sk->sk_gso_max_size - 1 - MAX_TCP_HEADER); /* Goal is to send at least one packet per ms, * not one big TSO packet every 100 ms. * This preserves ACK clocking and is consistent * with tcp_tso_should_defer() heuristic. */ |
1b3878ca1 tcp: export tcp_t... |
1610 |
segs = max_t(u32, bytes / mss_now, min_tso_segs); |
605ad7f18 tcp: refine TSO a... |
1611 |
|
d6a76199e tcp_bbr: better d... |
1612 |
return segs; |
605ad7f18 tcp: refine TSO a... |
1613 |
} |
1b3878ca1 tcp: export tcp_t... |
1614 |
EXPORT_SYMBOL(tcp_tso_autosize); |
605ad7f18 tcp: refine TSO a... |
1615 |
|
ed6e7268b tcp: allow conges... |
1616 1617 1618 1619 1620 1621 1622 |
/* Return the number of segments we want in the skb we are transmitting. * See if congestion control module wants to decide; otherwise, autosize. */ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) { const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; |
d6a76199e tcp_bbr: better d... |
1623 1624 1625 1626 |
if (!tso_segs) tso_segs = tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs); return min_t(u32, tso_segs, sk->sk_gso_max_segs); |
ed6e7268b tcp: allow conges... |
1627 |
} |
d4589926d tcp: refine TSO s... |
1628 1629 1630 1631 1632 1633 |
/* Returns the portion of skb which can be sent right away */ static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, unsigned int mss_now, unsigned int max_segs, int nonagle) |
c1b4a7e69 [TCP]: Move to ne... |
1634 |
{ |
cf533ea53 tcp: add const qu... |
1635 |
const struct tcp_sock *tp = tcp_sk(sk); |
d4589926d tcp: refine TSO s... |
1636 |
u32 partial, needed, window, max_len; |
c1b4a7e69 [TCP]: Move to ne... |
1637 |
|
90840defa [TCP]: Introduce ... |
1638 |
window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; |
1485348d2 tcp: Apply device... |
1639 |
max_len = mss_now * max_segs; |
0e3a4803a [TCP]: Force TSO ... |
1640 |
|
1485348d2 tcp: Apply device... |
1641 1642 |
if (likely(max_len <= window && skb != tcp_write_queue_tail(sk))) return max_len; |
0e3a4803a [TCP]: Force TSO ... |
1643 |
|
5ea3a7480 [TCP]: Prevent se... |
1644 |
needed = min(skb->len, window); |
1485348d2 tcp: Apply device... |
1645 1646 |
if (max_len <= needed) return max_len; |
0e3a4803a [TCP]: Force TSO ... |
1647 |
|
d4589926d tcp: refine TSO s... |
1648 1649 1650 1651 1652 |
partial = needed % mss_now; /* If last segment is not a full MSS, check if Nagle rules allow us * to include this last segment in this skb. * Otherwise, we'll split the skb at last MSS boundary */ |
cc93fc51f tcp: delete unuse... |
1653 |
if (tcp_nagle_check(partial != 0, tp, nonagle)) |
d4589926d tcp: refine TSO s... |
1654 1655 1656 |
return needed - partial; return needed; |
c1b4a7e69 [TCP]: Move to ne... |
1657 1658 1659 1660 1661 |
} /* Can at least one segment of SKB be sent right now, according to the * congestion window rules? If so, return how many segments are allowed. */ |
cf533ea53 tcp: add const qu... |
1662 1663 |
static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp, const struct sk_buff *skb) |
c1b4a7e69 [TCP]: Move to ne... |
1664 |
{ |
d649a7a81 tcp: limit GSO pa... |
1665 |
u32 in_flight, cwnd, halfcwnd; |
c1b4a7e69 [TCP]: Move to ne... |
1666 1667 |
/* Don't be strict about the congestion window for the final FIN. */ |
4de075e04 tcp: rename tcp_s... |
1668 1669 |
if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) && tcp_skb_pcount(skb) == 1) |
c1b4a7e69 [TCP]: Move to ne... |
1670 1671 1672 1673 |
return 1; in_flight = tcp_packets_in_flight(tp); cwnd = tp->snd_cwnd; |
d649a7a81 tcp: limit GSO pa... |
1674 1675 |
if (in_flight >= cwnd) return 0; |
c1b4a7e69 [TCP]: Move to ne... |
1676 |
|
d649a7a81 tcp: limit GSO pa... |
1677 1678 1679 1680 1681 |
/* For better scheduling, ensure we have at least * 2 GSO packets in flight. */ halfcwnd = max(cwnd >> 1, 1U); return min(halfcwnd, cwnd - in_flight); |
c1b4a7e69 [TCP]: Move to ne... |
1682 |
} |
b595076a1 tree-wide: fix co... |
1683 |
/* Initialize TSO state of a skb. |
67edfef78 TCP: Add comments... |
1684 |
* This must be invoked the first time we consider transmitting |
c1b4a7e69 [TCP]: Move to ne... |
1685 1686 |
* SKB onto the wire. */ |
5bbb432c8 tcp: tcp_set_skb_... |
1687 |
static int tcp_init_tso_segs(struct sk_buff *skb, unsigned int mss_now) |
c1b4a7e69 [TCP]: Move to ne... |
1688 1689 |
{ int tso_segs = tcp_skb_pcount(skb); |
f8269a495 tcp: make urg+gso... |
1690 |
if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) { |
5bbb432c8 tcp: tcp_set_skb_... |
1691 |
tcp_set_skb_tso_segs(skb, mss_now); |
c1b4a7e69 [TCP]: Move to ne... |
1692 1693 1694 1695 |
tso_segs = tcp_skb_pcount(skb); } return tso_segs; } |
c1b4a7e69 [TCP]: Move to ne... |
1696 |
|
a2a385d62 tcp: bool convers... |
1697 |
/* Return true if the Nagle test allows this packet to be |
c1b4a7e69 [TCP]: Move to ne... |
1698 1699 |
* sent now. */ |
a2a385d62 tcp: bool convers... |
1700 1701 |
static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb, unsigned int cur_mss, int nonagle) |
c1b4a7e69 [TCP]: Move to ne... |
1702 1703 1704 1705 1706 1707 1708 1709 |
{ /* Nagle rule does not apply to frames, which sit in the middle of the * write_queue (they have no chances to get new data). * * This is implemented in the callers, where they modify the 'nonagle' * argument based upon the location of SKB in the send queue. */ if (nonagle & TCP_NAGLE_PUSH) |
a2a385d62 tcp: bool convers... |
1710 |
return true; |
c1b4a7e69 [TCP]: Move to ne... |
1711 |
|
9b44190dc tcp: refactor F-RTO |
1712 1713 |
/* Don't use the nagle rule for urgent data (or for the final FIN). */ if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) |
a2a385d62 tcp: bool convers... |
1714 |
return true; |
c1b4a7e69 [TCP]: Move to ne... |
1715 |
|
cc93fc51f tcp: delete unuse... |
1716 |
if (!tcp_nagle_check(skb->len < cur_mss, tp, nonagle)) |
a2a385d62 tcp: bool convers... |
1717 |
return true; |
c1b4a7e69 [TCP]: Move to ne... |
1718 |
|
a2a385d62 tcp: bool convers... |
1719 |
return false; |
c1b4a7e69 [TCP]: Move to ne... |
1720 1721 1722 |
} /* Does at least the first segment of SKB fit into the send window? */ |
a2a385d62 tcp: bool convers... |
1723 1724 1725 |
static bool tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb, unsigned int cur_mss) |
c1b4a7e69 [TCP]: Move to ne... |
1726 1727 1728 1729 1730 |
{ u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (skb->len > cur_mss) end_seq = TCP_SKB_CB(skb)->seq + cur_mss; |
90840defa [TCP]: Introduce ... |
1731 |
return !after(end_seq, tcp_wnd_end(tp)); |
c1b4a7e69 [TCP]: Move to ne... |
1732 |
} |
c1b4a7e69 [TCP]: Move to ne... |
1733 1734 1735 1736 1737 1738 1739 |
/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet * which is put after SKB on the list. It is very much like * tcp_fragment() except that it may make several kinds of assumptions * in order to speed up the splitting operation. In particular, we * know that all the data is in scatter-gather pages, and that the * packet has never been sent out before (and thus is not cloned). */ |
056834d9f [TCP]: cleanup tc... |
1740 |
static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, |
c4ead4c59 tcp: tso_fragment... |
1741 |
unsigned int mss_now, gfp_t gfp) |
c1b4a7e69 [TCP]: Move to ne... |
1742 1743 1744 |
{ struct sk_buff *buff; int nlen = skb->len - len; |
9ce014610 tcp: get rid of t... |
1745 |
u8 flags; |
c1b4a7e69 [TCP]: Move to ne... |
1746 1747 |
/* All of a TSO frame must be composed of paged data. */ |
c8ac37746 [TCP]: Fix bug #5... |
1748 |
if (skb->len != skb->data_len) |
6cc55e096 tcp: add gfp para... |
1749 |
return tcp_fragment(sk, skb, len, mss_now, gfp); |
c1b4a7e69 [TCP]: Move to ne... |
1750 |
|
eb9344781 tcp: add a force_... |
1751 |
buff = sk_stream_alloc_skb(sk, 0, gfp, true); |
51456b291 ipv4: coding styl... |
1752 |
if (unlikely(!buff)) |
c1b4a7e69 [TCP]: Move to ne... |
1753 |
return -ENOMEM; |
3ab224be6 [NET] CORE: Intro... |
1754 1755 |
sk->sk_wmem_queued += buff->truesize; sk_mem_charge(sk, buff->truesize); |
b60b49ea6 [TCP]: Account sk... |
1756 |
buff->truesize += nlen; |
c1b4a7e69 [TCP]: Move to ne... |
1757 1758 1759 1760 1761 1762 1763 1764 |
skb->truesize -= nlen; /* Correct the sequence numbers. */ TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len; TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; /* PSH and FIN should only be set in the second packet. */ |
4de075e04 tcp: rename tcp_s... |
1765 1766 1767 |
flags = TCP_SKB_CB(skb)->tcp_flags; TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); TCP_SKB_CB(buff)->tcp_flags = flags; |
c1b4a7e69 [TCP]: Move to ne... |
1768 1769 1770 |
/* This packet was never sent out yet, so no SACK bits. */ TCP_SKB_CB(buff)->sacked = 0; |
a166140e8 tcp: Handle eor b... |
1771 |
tcp_skb_fragment_eor(skb, buff); |
84fa7933a [NET]: Replace CH... |
1772 |
buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL; |
c1b4a7e69 [TCP]: Move to ne... |
1773 |
skb_split(skb, buff, len); |
490cc7d03 net-timestamp: fi... |
1774 |
tcp_fragment_tstamp(skb, buff); |
c1b4a7e69 [TCP]: Move to ne... |
1775 1776 |
/* Fix up tso_factor for both original and new SKB. */ |
5bbb432c8 tcp: tcp_set_skb_... |
1777 1778 |
tcp_set_skb_tso_segs(skb, mss_now); tcp_set_skb_tso_segs(buff, mss_now); |
c1b4a7e69 [TCP]: Move to ne... |
1779 1780 |
/* Link BUFF into the send queue. */ |
f4a775d14 net: introduce __... |
1781 |
__skb_header_release(buff); |
fe067e8ab [TCP]: Abstract o... |
1782 |
tcp_insert_write_queue_after(skb, buff, sk); |
c1b4a7e69 [TCP]: Move to ne... |
1783 1784 1785 1786 1787 1788 1789 1790 1791 |
return 0; } /* Try to defer sending, if possible, in order to minimize the amount * of TSO splitting we do. View it as a kind of TSO Nagle test. * * This algorithm is from John Heffner. */ |
ca8a22634 tcp: make cwnd-li... |
1792 |
static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, |
605ad7f18 tcp: refine TSO a... |
1793 |
bool *is_cwnd_limited, u32 max_segs) |
c1b4a7e69 [TCP]: Move to ne... |
1794 |
{ |
6687e988d [ICSK]: Move TCP ... |
1795 |
const struct inet_connection_sock *icsk = inet_csk(sk); |
50c8339e9 tcp: tso: restore... |
1796 1797 |
u32 age, send_win, cong_win, limit, in_flight; struct tcp_sock *tp = tcp_sk(sk); |
50c8339e9 tcp: tso: restore... |
1798 |
struct sk_buff *head; |
ad9f4f50f tcp: avoid a poss... |
1799 |
int win_divisor; |
c1b4a7e69 [TCP]: Move to ne... |
1800 |
|
4de075e04 tcp: rename tcp_s... |
1801 |
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) |
ae8064ac3 [TCP]: Bound TSO ... |
1802 |
goto send_now; |
c1b4a7e69 [TCP]: Move to ne... |
1803 |
|
99d7662a0 tcp: tso: allow d... |
1804 |
if (icsk->icsk_ca_state >= TCP_CA_Recovery) |
ae8064ac3 [TCP]: Bound TSO ... |
1805 |
goto send_now; |
5f852eb53 tcp: tso: remove ... |
1806 1807 1808 |
/* Avoid bursty behavior by allowing defer * only if the last write was recent. */ |
d635fbe27 tcp: use tcp_jiff... |
1809 |
if ((s32)(tcp_jiffies32 - tp->lsndtime) > 0) |
ae8064ac3 [TCP]: Bound TSO ... |
1810 |
goto send_now; |
908a75c17 [TCP]: Never TSO ... |
1811 |
|
c1b4a7e69 [TCP]: Move to ne... |
1812 |
in_flight = tcp_packets_in_flight(tp); |
056834d9f [TCP]: cleanup tc... |
1813 |
BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight)); |
c1b4a7e69 [TCP]: Move to ne... |
1814 |
|
90840defa [TCP]: Introduce ... |
1815 |
send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; |
c1b4a7e69 [TCP]: Move to ne... |
1816 1817 1818 1819 1820 |
/* From in_flight test above, we know that cwnd > in_flight. */ cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache; limit = min(send_win, cong_win); |
ba244fe90 [TCP]: Fix tcp_ts... |
1821 |
/* If a full-sized TSO skb can be sent, do it. */ |
605ad7f18 tcp: refine TSO a... |
1822 |
if (limit >= max_segs * tp->mss_cache) |
ae8064ac3 [TCP]: Bound TSO ... |
1823 |
goto send_now; |
ba244fe90 [TCP]: Fix tcp_ts... |
1824 |
|
62ad27619 tcp: deferring in... |
1825 1826 1827 |
/* Middle in queue won't get any more data, full sendable already? */ if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) goto send_now; |
ad9f4f50f tcp: avoid a poss... |
1828 1829 |
win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor); if (win_divisor) { |
c1b4a7e69 [TCP]: Move to ne... |
1830 1831 1832 1833 1834 |
u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); /* If at least some fraction of a window is available, * just use it. */ |
ad9f4f50f tcp: avoid a poss... |
1835 |
chunk /= win_divisor; |
c1b4a7e69 [TCP]: Move to ne... |
1836 |
if (limit >= chunk) |
ae8064ac3 [TCP]: Bound TSO ... |
1837 |
goto send_now; |
c1b4a7e69 [TCP]: Move to ne... |
1838 1839 1840 1841 1842 1843 |
} else { /* Different approach, try not to defer past a single * ACK. Receiver should ACK every other full sized * frame, so if we have space for more than 3 frames * then send now. */ |
6b5a5c0db tcp: do not scale... |
1844 |
if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache) |
ae8064ac3 [TCP]: Bound TSO ... |
1845 |
goto send_now; |
c1b4a7e69 [TCP]: Move to ne... |
1846 |
} |
50c8339e9 tcp: tso: restore... |
1847 |
head = tcp_write_queue_head(sk); |
385e20706 tcp: use tp->tcp_... |
1848 |
|
9a568de48 tcp: switch TCP T... |
1849 |
age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp); |
50c8339e9 tcp: tso: restore... |
1850 1851 1852 |
/* If next ACK is likely to come too late (half srtt), do not defer */ if (age < (tp->srtt_us >> 4)) goto send_now; |
5f852eb53 tcp: tso: remove ... |
1853 |
/* Ok, it looks like it is advisable to defer. */ |
ae8064ac3 [TCP]: Bound TSO ... |
1854 |
|
d2e1339f4 tcp: Fix CWV bein... |
1855 |
if (cong_win < send_win && cong_win <= skb->len) |
ca8a22634 tcp: make cwnd-li... |
1856 |
*is_cwnd_limited = true; |
a2a385d62 tcp: bool convers... |
1857 |
return true; |
ae8064ac3 [TCP]: Bound TSO ... |
1858 1859 |
send_now: |
a2a385d62 tcp: bool convers... |
1860 |
return false; |
c1b4a7e69 [TCP]: Move to ne... |
1861 |
} |
05cbc0db0 ipv4: Create prob... |
1862 1863 1864 1865 1866 1867 1868 1869 1870 |
static inline void tcp_mtu_check_reprobe(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); u32 interval; s32 delta; interval = net->ipv4.sysctl_tcp_probe_interval; |
c74df29a8 tcp: use tcp_jiff... |
1871 |
delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp; |
05cbc0db0 ipv4: Create prob... |
1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 |
if (unlikely(delta >= interval * HZ)) { int mss = tcp_current_mss(sk); /* Update current search range */ icsk->icsk_mtup.probe_size = 0; icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) + icsk->icsk_af_ops->net_header_len; icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); /* Update probe time stamp */ |
c74df29a8 tcp: use tcp_jiff... |
1883 |
icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; |
05cbc0db0 ipv4: Create prob... |
1884 1885 |
} } |
17634603d tcp: Honor the eo... |
1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 |
static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len) { struct sk_buff *skb, *next; skb = tcp_send_head(sk); tcp_for_write_queue_from_safe(skb, next, sk) { if (len <= skb->len) break; if (unlikely(TCP_SKB_CB(skb)->eor)) return false; len -= skb->len; } return true; } |
5d424d5a6 [TCP]: MTU probing |
1903 |
/* Create a new MTU probe if we are ready. |
67edfef78 TCP: Add comments... |
1904 1905 1906 1907 |
* MTU probe is regularly attempting to increase the path MTU by * deliberately sending larger packets. This discovers routing * changes resulting in larger path MTUs. * |
5d424d5a6 [TCP]: MTU probing |
1908 1909 |
* Returns 0 if we should wait to probe (no cwnd available), * 1 if a probe was sent, |
056834d9f [TCP]: cleanup tc... |
1910 1911 |
* -1 otherwise */ |
5d424d5a6 [TCP]: MTU probing |
1912 1913 |
static int tcp_mtu_probe(struct sock *sk) { |
5d424d5a6 [TCP]: MTU probing |
1914 |
struct inet_connection_sock *icsk = inet_csk(sk); |
12a59abc2 tcp: tcp_mtu_prob... |
1915 |
struct tcp_sock *tp = tcp_sk(sk); |
5d424d5a6 [TCP]: MTU probing |
1916 |
struct sk_buff *skb, *nskb, *next; |
6b58e0a5f ipv4: Use binary ... |
1917 |
struct net *net = sock_net(sk); |
5d424d5a6 [TCP]: MTU probing |
1918 |
int probe_size; |
91cc17c0e [TCP]: MTUprobe: ... |
1919 |
int size_needed; |
12a59abc2 tcp: tcp_mtu_prob... |
1920 |
int copy, len; |
5d424d5a6 [TCP]: MTU probing |
1921 |
int mss_now; |
6b58e0a5f ipv4: Use binary ... |
1922 |
int interval; |
5d424d5a6 [TCP]: MTU probing |
1923 1924 1925 1926 |
/* Not currently probing/verifying, * not in recovery, * have enough cwnd, and |
12a59abc2 tcp: tcp_mtu_prob... |
1927 1928 1929 1930 1931 1932 1933 |
* not SACKing (the variable headers throw things off) */ if (likely(!icsk->icsk_mtup.enabled || icsk->icsk_mtup.probe_size || inet_csk(sk)->icsk_ca_state != TCP_CA_Open || tp->snd_cwnd < 11 || tp->rx_opt.num_sacks || tp->rx_opt.dsack)) |
5d424d5a6 [TCP]: MTU probing |
1934 |
return -1; |
6b58e0a5f ipv4: Use binary ... |
1935 1936 1937 1938 |
/* Use binary search for probe_size between tcp_mss_base, * and current mss_clamp. if (search_high - search_low) * smaller than a threshold, backoff from probing. */ |
0c54b85f2 tcp: simplify tcp... |
1939 |
mss_now = tcp_current_mss(sk); |
6b58e0a5f ipv4: Use binary ... |
1940 1941 |
probe_size = tcp_mtu_to_mss(sk, (icsk->icsk_mtup.search_high + icsk->icsk_mtup.search_low) >> 1); |
91cc17c0e [TCP]: MTUprobe: ... |
1942 |
size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache; |
6b58e0a5f ipv4: Use binary ... |
1943 |
interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low; |
05cbc0db0 ipv4: Create prob... |
1944 1945 1946 1947 |
/* When misfortune happens, we are reprobing actively, * and then reprobe timer has expired. We stick with current * probing process by not resetting search range to its orignal. */ |
6b58e0a5f ipv4: Use binary ... |
1948 |
if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) || |
05cbc0db0 ipv4: Create prob... |
1949 1950 1951 1952 1953 |
interval < net->ipv4.sysctl_tcp_probe_threshold) { /* Check whether enough time has elaplased for * another round of probing. */ tcp_mtu_check_reprobe(sk); |
5d424d5a6 [TCP]: MTU probing |
1954 1955 1956 1957 |
return -1; } /* Have enough data in the send queue to probe? */ |
7f9c33e51 [TCP] MTUprobe: C... |
1958 |
if (tp->write_seq - tp->snd_nxt < size_needed) |
5d424d5a6 [TCP]: MTU probing |
1959 |
return -1; |
91cc17c0e [TCP]: MTUprobe: ... |
1960 1961 |
if (tp->snd_wnd < size_needed) return -1; |
90840defa [TCP]: Introduce ... |
1962 |
if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp))) |
91cc17c0e [TCP]: MTUprobe: ... |
1963 |
return 0; |
5d424d5a6 [TCP]: MTU probing |
1964 |
|
d67c58e9a [TCP]: Remove loc... |
1965 1966 1967 |
/* Do we need to wait to drain cwnd? With none in flight, don't stall */ if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) { if (!tcp_packets_in_flight(tp)) |
5d424d5a6 [TCP]: MTU probing |
1968 1969 1970 1971 |
return -1; else return 0; } |
17634603d tcp: Honor the eo... |
1972 1973 |
if (!tcp_can_coalesce_send_queue_head(sk, probe_size)) return -1; |
5d424d5a6 [TCP]: MTU probing |
1974 |
/* We're allowed to probe. Build it now. */ |
eb9344781 tcp: add a force_... |
1975 |
nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false); |
51456b291 ipv4: coding styl... |
1976 |
if (!nskb) |
5d424d5a6 [TCP]: MTU probing |
1977 |
return -1; |
3ab224be6 [NET] CORE: Intro... |
1978 1979 |
sk->sk_wmem_queued += nskb->truesize; sk_mem_charge(sk, nskb->truesize); |
5d424d5a6 [TCP]: MTU probing |
1980 |
|
fe067e8ab [TCP]: Abstract o... |
1981 |
skb = tcp_send_head(sk); |
5d424d5a6 [TCP]: MTU probing |
1982 1983 1984 |
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; |
4de075e04 tcp: rename tcp_s... |
1985 |
TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK; |
5d424d5a6 [TCP]: MTU probing |
1986 1987 |
TCP_SKB_CB(nskb)->sacked = 0; nskb->csum = 0; |
84fa7933a [NET]: Replace CH... |
1988 |
nskb->ip_summed = skb->ip_summed; |
5d424d5a6 [TCP]: MTU probing |
1989 |
|
50c4817e9 [TCP]: MTUprobe: ... |
1990 |
tcp_insert_write_queue_before(nskb, skb, sk); |
2b7cda9c3 tcp: fix tcp_mtu_... |
1991 |
tcp_highest_sack_replace(sk, skb, nskb); |
50c4817e9 [TCP]: MTUprobe: ... |
1992 |
|
5d424d5a6 [TCP]: MTU probing |
1993 |
len = 0; |
234b68607 [TCP]: Add tcp_fo... |
1994 |
tcp_for_write_queue_from_safe(skb, next, sk) { |
5d424d5a6 [TCP]: MTU probing |
1995 |
copy = min_t(int, skb->len, probe_size - len); |
2fe664f1f tcp: fix wrong ch... |
1996 |
if (nskb->ip_summed) { |
5d424d5a6 [TCP]: MTU probing |
1997 |
skb_copy_bits(skb, 0, skb_put(nskb, copy), copy); |
2fe664f1f tcp: fix wrong ch... |
1998 1999 2000 2001 2002 2003 |
} else { __wsum csum = skb_copy_and_csum_bits(skb, 0, skb_put(nskb, copy), copy, 0); nskb->csum = csum_block_add(nskb->csum, csum, len); } |
5d424d5a6 [TCP]: MTU probing |
2004 2005 2006 2007 |
if (skb->len <= copy) { /* We've eaten all the data from this skb. * Throw it away. */ |
4de075e04 tcp: rename tcp_s... |
2008 |
TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; |
17634603d tcp: Honor the eo... |
2009 2010 2011 2012 |
/* If this is the last SKB we copy and eor is set * we need to propagate it to the new skb. */ TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor; |
fe067e8ab [TCP]: Abstract o... |
2013 |
tcp_unlink_write_queue(skb, sk); |
3ab224be6 [NET] CORE: Intro... |
2014 |
sk_wmem_free_skb(sk, skb); |
5d424d5a6 [TCP]: MTU probing |
2015 |
} else { |
4de075e04 tcp: rename tcp_s... |
2016 |
TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags & |
a3433f35a tcp: unify tcp fl... |
2017 |
~(TCPHDR_FIN|TCPHDR_PSH); |
5d424d5a6 [TCP]: MTU probing |
2018 2019 |
if (!skb_shinfo(skb)->nr_frags) { skb_pull(skb, copy); |
84fa7933a [NET]: Replace CH... |
2020 |
if (skb->ip_summed != CHECKSUM_PARTIAL) |
056834d9f [TCP]: cleanup tc... |
2021 2022 |
skb->csum = csum_partial(skb->data, skb->len, 0); |
5d424d5a6 [TCP]: MTU probing |
2023 2024 |
} else { __pskb_trim_head(skb, copy); |
5bbb432c8 tcp: tcp_set_skb_... |
2025 |
tcp_set_skb_tso_segs(skb, mss_now); |
5d424d5a6 [TCP]: MTU probing |
2026 2027 2028 2029 2030 |
} TCP_SKB_CB(skb)->seq += copy; } len += copy; |
234b68607 [TCP]: Add tcp_fo... |
2031 2032 2033 |
if (len >= probe_size) break; |
5d424d5a6 [TCP]: MTU probing |
2034 |
} |
5bbb432c8 tcp: tcp_set_skb_... |
2035 |
tcp_init_tso_segs(nskb, nskb->len); |
5d424d5a6 [TCP]: MTU probing |
2036 2037 |
/* We're ready to send. If this fails, the probe will |
7faee5c0d tcp: remove TCP_S... |
2038 2039 |
* be resegmented into mss-sized pieces by tcp_write_xmit(). */ |
5d424d5a6 [TCP]: MTU probing |
2040 2041 |
if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) { /* Decrement cwnd here because we are sending |
056834d9f [TCP]: cleanup tc... |
2042 |
* effectively two packets. */ |
5d424d5a6 [TCP]: MTU probing |
2043 |
tp->snd_cwnd--; |
66f5fe624 [TCP]: Rename upd... |
2044 |
tcp_event_new_data_sent(sk, nskb); |
5d424d5a6 [TCP]: MTU probing |
2045 2046 |
icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len); |
0e7b13685 [TCP] mtu probing... |
2047 2048 |
tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq; tp->mtu_probe.probe_seq_end = TCP_SKB_CB(nskb)->end_seq; |
5d424d5a6 [TCP]: MTU probing |
2049 2050 2051 2052 2053 2054 |
return 1; } return -1; } |
218af599f tcp: internal imp... |
2055 2056 2057 2058 2059 |
static bool tcp_pacing_check(const struct sock *sk) { return tcp_needs_internal_pacing(sk) && hrtimer_active(&tcp_sk(sk)->pacing_timer); } |
f9616c35a tcp: implement TS... |
2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 |
/* TCP Small Queues : * Control number of packets in qdisc/devices to two packets / or ~1 ms. * (These limits are doubled for retransmits) * This allows for : * - better RTT estimation and ACK scheduling * - faster recovery * - high rates * Alas, some drivers / subsystems require a fair amount * of queued bytes to ensure line rate. * One example is wifi aggregation (802.11 AMPDU) */ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, unsigned int factor) { unsigned int limit; limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); limit <<= factor; |
14afee4b6 net: convert sock... |
2079 |
if (refcount_read(&sk->sk_wmem_alloc) > limit) { |
75eefc6c5 tcp: tsq: add a s... |
2080 2081 2082 2083 2084 2085 2086 2087 |
/* Always send the 1st or 2nd skb in write queue. * No need to wait for TX completion to call us back, * after softirq/tasklet schedule. * This helps when TX completions are delayed too much. */ if (skb == sk->sk_write_queue.next || skb->prev == sk->sk_write_queue.next) return false; |
7aa5470c2 tcp: tsq: move ts... |
2088 |
set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); |
f9616c35a tcp: implement TS... |
2089 2090 2091 2092 2093 |
/* It is possible TX completion already happened * before we set TSQ_THROTTLED, so we must * test again the condition. */ smp_mb__after_atomic(); |
14afee4b6 net: convert sock... |
2094 |
if (refcount_read(&sk->sk_wmem_alloc) > limit) |
f9616c35a tcp: implement TS... |
2095 2096 2097 2098 |
return true; } return false; } |
05b055e89 tcp: instrument t... |
2099 2100 |
static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new) { |
628174ccc tcp: uses jiffies... |
2101 |
const u32 now = tcp_jiffies32; |
efe967cde tcp: avoid bogus ... |
2102 |
enum tcp_chrono old = tp->chrono_type; |
05b055e89 tcp: instrument t... |
2103 |
|
efe967cde tcp: avoid bogus ... |
2104 2105 |
if (old > TCP_CHRONO_UNSPEC) tp->chrono_stat[old - 1] += now - tp->chrono_start; |
05b055e89 tcp: instrument t... |
2106 2107 2108 2109 2110 2111 2112 2113 2114 |
tp->chrono_start = now; tp->chrono_type = new; } void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type) { struct tcp_sock *tp = tcp_sk(sk); /* If there are multiple conditions worthy of tracking in a |
0f87230d1 tcp: instrument h... |
2115 2116 |
* chronograph then the highest priority enum takes precedence * over the other conditions. So that if something "more interesting" |
05b055e89 tcp: instrument t... |
2117 2118 2119 2120 2121 2122 2123 2124 2125 |
* starts happening, stop the previous chrono and start a new one. */ if (type > tp->chrono_type) tcp_chrono_set(tp, type); } void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type) { struct tcp_sock *tp = tcp_sk(sk); |
0f87230d1 tcp: instrument h... |
2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 |
/* There are multiple conditions worthy of tracking in a * chronograph, so that the highest priority enum takes * precedence over the other conditions (see tcp_chrono_start). * If a condition stops, we only stop chrono tracking if * it's the "most interesting" or current chrono we are * tracking and starts busy chrono if we have pending data. */ if (tcp_write_queue_empty(sk)) tcp_chrono_set(tp, TCP_CHRONO_UNSPEC); else if (type == tp->chrono_type) tcp_chrono_set(tp, TCP_CHRONO_BUSY); |
05b055e89 tcp: instrument t... |
2138 |
} |
1da177e4c Linux-2.6.12-rc2 |
2139 2140 2141 2142 |
/* This routine writes packets to the network. It advances the * send_head. This happens as incoming acks open up the remote * window for us. * |
f8269a495 tcp: make urg+gso... |
2143 2144 2145 2146 |
* LARGESEND note: !tcp_urg_mode is overkill, only frames between * snd_up-64k-mss .. snd_up cannot be large. However, taking into * account rare use of URG, this is not a big flaw. * |
6ba8a3b19 tcp: Tail loss pr... |
2147 2148 |
* Send at most one packet when push_one > 0. Temporarily ignore * cwnd limit to force at most one packet out when push_one == 2. |
a2a385d62 tcp: bool convers... |
2149 2150 |
* Returns true, if no segments are in flight and we have queued segments, * but cannot send anything now because of SWS or another problem. |
1da177e4c Linux-2.6.12-rc2 |
2151 |
*/ |
a2a385d62 tcp: bool convers... |
2152 2153 |
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp) |
1da177e4c Linux-2.6.12-rc2 |
2154 2155 |
{ struct tcp_sock *tp = tcp_sk(sk); |
92df7b518 [TCP]: tcp_write_... |
2156 |
struct sk_buff *skb; |
c1b4a7e69 [TCP]: Move to ne... |
2157 2158 |
unsigned int tso_segs, sent_pkts; int cwnd_quota; |
5d424d5a6 [TCP]: MTU probing |
2159 |
int result; |
5615f8861 tcp: instrument h... |
2160 |
bool is_cwnd_limited = false, is_rwnd_limited = false; |
605ad7f18 tcp: refine TSO a... |
2161 |
u32 max_segs; |
1da177e4c Linux-2.6.12-rc2 |
2162 |
|
92df7b518 [TCP]: tcp_write_... |
2163 |
sent_pkts = 0; |
5d424d5a6 [TCP]: MTU probing |
2164 |
|
ee1836aec tcp: refresh tp t... |
2165 |
tcp_mstamp_refresh(tp); |
d5dd9175b tcp: use tcp_writ... |
2166 2167 2168 2169 |
if (!push_one) { /* Do MTU probing. */ result = tcp_mtu_probe(sk); if (!result) { |
a2a385d62 tcp: bool convers... |
2170 |
return false; |
d5dd9175b tcp: use tcp_writ... |
2171 2172 2173 |
} else if (result > 0) { sent_pkts = 1; } |
5d424d5a6 [TCP]: MTU probing |
2174 |
} |
ed6e7268b tcp: allow conges... |
2175 |
max_segs = tcp_tso_segs(sk, mss_now); |
fe067e8ab [TCP]: Abstract o... |
2176 |
while ((skb = tcp_send_head(sk))) { |
c8ac37746 [TCP]: Fix bug #5... |
2177 |
unsigned int limit; |
218af599f tcp: internal imp... |
2178 2179 |
if (tcp_pacing_check(sk)) break; |
5bbb432c8 tcp: tcp_set_skb_... |
2180 |
tso_segs = tcp_init_tso_segs(skb, mss_now); |
c1b4a7e69 [TCP]: Move to ne... |
2181 |
BUG_ON(!tso_segs); |
aa93466bd [TCP]: Eliminate ... |
2182 |
|
9d186cac7 tcp: don't use ti... |
2183 |
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { |
7faee5c0d tcp: remove TCP_S... |
2184 |
/* "skb_mstamp" is used as a start point for the retransmit timer */ |
385e20706 tcp: use tp->tcp_... |
2185 |
skb->skb_mstamp = tp->tcp_mstamp; |
ec3423257 tcp: fix retransm... |
2186 |
goto repair; /* Skip network transmission */ |
9d186cac7 tcp: don't use ti... |
2187 |
} |
ec3423257 tcp: fix retransm... |
2188 |
|
b68e9f857 [PATCH] tcp: fix ... |
2189 |
cwnd_quota = tcp_cwnd_test(tp, skb); |
6ba8a3b19 tcp: Tail loss pr... |
2190 2191 2192 2193 2194 2195 2196 |
if (!cwnd_quota) { if (push_one == 2) /* Force out a loss probe pkt. */ cwnd_quota = 1; else break; } |
b68e9f857 [PATCH] tcp: fix ... |
2197 |
|
5615f8861 tcp: instrument h... |
2198 2199 |
if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) { is_rwnd_limited = true; |
b68e9f857 [PATCH] tcp: fix ... |
2200 |
break; |
5615f8861 tcp: instrument h... |
2201 |
} |
b68e9f857 [PATCH] tcp: fix ... |
2202 |
|
d6a4e26af tcp: tcp_tso_auto... |
2203 |
if (tso_segs == 1) { |
c1b4a7e69 [TCP]: Move to ne... |
2204 2205 2206 2207 2208 |
if (unlikely(!tcp_nagle_test(tp, skb, mss_now, (tcp_skb_is_last(sk, skb) ? nonagle : TCP_NAGLE_PUSH)))) break; } else { |
ca8a22634 tcp: make cwnd-li... |
2209 |
if (!push_one && |
605ad7f18 tcp: refine TSO a... |
2210 2211 |
tcp_tso_should_defer(sk, skb, &is_cwnd_limited, max_segs)) |
c1b4a7e69 [TCP]: Move to ne... |
2212 2213 |
break; } |
aa93466bd [TCP]: Eliminate ... |
2214 |
|
605ad7f18 tcp: refine TSO a... |
2215 |
limit = mss_now; |
d6a4e26af tcp: tcp_tso_auto... |
2216 |
if (tso_segs > 1 && !tcp_urg_mode(tp)) |
605ad7f18 tcp: refine TSO a... |
2217 2218 2219 2220 2221 2222 2223 2224 2225 |
limit = tcp_mss_split_point(sk, skb, mss_now, min_t(unsigned int, cwnd_quota, max_segs), nonagle); if (skb->len > limit && unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) break; |
7aa5470c2 tcp: tsq: move ts... |
2226 2227 |
if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags); |
f9616c35a tcp: implement TS... |
2228 2229 |
if (tcp_small_queue_check(sk, skb, 0)) break; |
c9eeec26e tcp: TSQ can use ... |
2230 |
|
d5dd9175b tcp: use tcp_writ... |
2231 |
if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) |
92df7b518 [TCP]: tcp_write_... |
2232 |
break; |
1da177e4c Linux-2.6.12-rc2 |
2233 |
|
ec3423257 tcp: fix retransm... |
2234 |
repair: |
92df7b518 [TCP]: tcp_write_... |
2235 2236 2237 |
/* Advance the send_head. This one is sent out. * This call will increment packets_out. */ |
66f5fe624 [TCP]: Rename upd... |
2238 |
tcp_event_new_data_sent(sk, skb); |
1da177e4c Linux-2.6.12-rc2 |
2239 |
|
92df7b518 [TCP]: tcp_write_... |
2240 |
tcp_minshall_update(tp, mss_now, skb); |
a262f0cdf Proportional Rate... |
2241 |
sent_pkts += tcp_skb_pcount(skb); |
d5dd9175b tcp: use tcp_writ... |
2242 2243 2244 |
if (push_one) break; |
92df7b518 [TCP]: tcp_write_... |
2245 |
} |
1da177e4c Linux-2.6.12-rc2 |
2246 |
|
5615f8861 tcp: instrument h... |
2247 2248 2249 2250 |
if (is_rwnd_limited) tcp_chrono_start(sk, TCP_CHRONO_RWND_LIMITED); else tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED); |
aa93466bd [TCP]: Eliminate ... |
2251 |
if (likely(sent_pkts)) { |
684bad110 tcp: use PRR to r... |
2252 2253 |
if (tcp_in_cwnd_reduction(sk)) tp->prr_out += sent_pkts; |
6ba8a3b19 tcp: Tail loss pr... |
2254 2255 2256 |
/* Send one loss probe per tail loss episode. */ if (push_one != 2) |
241eb29c0 tcp: when schedul... |
2257 |
tcp_schedule_loss_probe(sk, false); |
d2e1339f4 tcp: Fix CWV bein... |
2258 |
is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd); |
ca8a22634 tcp: make cwnd-li... |
2259 |
tcp_cwnd_validate(sk, is_cwnd_limited); |
a2a385d62 tcp: bool convers... |
2260 |
return false; |
1da177e4c Linux-2.6.12-rc2 |
2261 |
} |
b340b2645 tcp: TLP retransm... |
2262 |
return !tp->packets_out && tcp_send_head(sk); |
6ba8a3b19 tcp: Tail loss pr... |
2263 |
} |
241eb29c0 tcp: when schedul... |
2264 |
bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto) |
6ba8a3b19 tcp: Tail loss pr... |
2265 2266 2267 |
{ struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); |
a2815817f tcp: enable xmit ... |
2268 |
u32 timeout, rto_delta_us; |
6ba8a3b19 tcp: Tail loss pr... |
2269 |
|
6ba8a3b19 tcp: Tail loss pr... |
2270 2271 2272 |
/* Don't do any loss probe on a Fast Open connection before 3WHS * finishes. */ |
f9b995822 tcp: send loss pr... |
2273 |
if (tp->fastopen_rsk) |
6ba8a3b19 tcp: Tail loss pr... |
2274 |
return false; |
6ba8a3b19 tcp: Tail loss pr... |
2275 2276 2277 |
/* Schedule a loss probe in 2*RTT for SACK capable connections * in Open state, that are either limited by cwnd or application. */ |
bec41a11d tcp: remove early... |
2278 2279 2280 |
if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) || !tp->packets_out || !tcp_is_sack(tp) || icsk->icsk_ca_state != TCP_CA_Open) |
6ba8a3b19 tcp: Tail loss pr... |
2281 2282 2283 2284 2285 |
return false; if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && tcp_send_head(sk)) return false; |
bb4d991a2 tcp: adjust tail ... |
2286 |
/* Probe timeout is 2*rtt. Add minimum RTO to account |
f9b995822 tcp: send loss pr... |
2287 2288 |
* for delayed ack when there's one outstanding packet. If no RTT * sample is available then probe after TCP_TIMEOUT_INIT. |
6ba8a3b19 tcp: Tail loss pr... |
2289 |
*/ |
bb4d991a2 tcp: adjust tail ... |
2290 2291 2292 2293 2294 2295 2296 2297 2298 |
if (tp->srtt_us) { timeout = usecs_to_jiffies(tp->srtt_us >> 2); if (tp->packets_out == 1) timeout += TCP_RTO_MIN; else timeout += TCP_TIMEOUT_MIN; } else { timeout = TCP_TIMEOUT_INIT; } |
6ba8a3b19 tcp: Tail loss pr... |
2299 |
|
a2815817f tcp: enable xmit ... |
2300 |
/* If the RTO formula yields an earlier time, then use that time. */ |
241eb29c0 tcp: when schedul... |
2301 2302 2303 |
rto_delta_us = advancing_rto ? jiffies_to_usecs(inet_csk(sk)->icsk_rto) : tcp_rto_delta_us(sk); /* How far in future is RTO? */ |
a2815817f tcp: enable xmit ... |
2304 2305 |
if (rto_delta_us > 0) timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us)); |
6ba8a3b19 tcp: Tail loss pr... |
2306 2307 2308 2309 2310 |
inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, TCP_RTO_MAX); return true; } |
1f3279ae0 tcp: avoid retran... |
2311 2312 2313 |
/* Thanks to skb fast clones, we can detect if a prior transmit of * a packet is still in a qdisc or driver queue. * In this case, there is very little point doing a retransmit ! |
1f3279ae0 tcp: avoid retran... |
2314 2315 2316 2317 |
*/ static bool skb_still_in_host_queue(const struct sock *sk, const struct sk_buff *skb) { |
39bb5e628 net: skb_fclone_b... |
2318 |
if (unlikely(skb_fclone_busy(sk, skb))) { |
c10d9310e tcp: do not assum... |
2319 2320 |
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); |
1f3279ae0 tcp: avoid retran... |
2321 2322 2323 2324 |
return true; } return false; } |
b340b2645 tcp: TLP retransm... |
2325 |
/* When probe timeout (PTO) fires, try send a new segment if possible, else |
6ba8a3b19 tcp: Tail loss pr... |
2326 2327 2328 2329 |
* retransmit the last segment. */ void tcp_send_loss_probe(struct sock *sk) { |
9b717a8d2 tcp: TLP loss det... |
2330 |
struct tcp_sock *tp = tcp_sk(sk); |
6ba8a3b19 tcp: Tail loss pr... |
2331 2332 2333 |
struct sk_buff *skb; int pcount; int mss = tcp_current_mss(sk); |
6ba8a3b19 tcp: Tail loss pr... |
2334 |
|
b340b2645 tcp: TLP retransm... |
2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 |
skb = tcp_send_head(sk); if (skb) { if (tcp_snd_wnd_test(tp, skb, mss)) { pcount = tp->packets_out; tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); if (tp->packets_out > pcount) goto probe_sent; goto rearm_timer; } skb = tcp_write_queue_prev(sk, skb); } else { skb = tcp_write_queue_tail(sk); |
6ba8a3b19 tcp: Tail loss pr... |
2347 |
} |
9b717a8d2 tcp: TLP loss det... |
2348 2349 2350 |
/* At most one outstanding TLP retransmission. */ if (tp->tlp_high_seq) goto rearm_timer; |
6ba8a3b19 tcp: Tail loss pr... |
2351 |
/* Retransmit last segment. */ |
6ba8a3b19 tcp: Tail loss pr... |
2352 2353 |
if (WARN_ON(!skb)) goto rearm_timer; |
1f3279ae0 tcp: avoid retran... |
2354 2355 |
if (skb_still_in_host_queue(sk, skb)) goto rearm_timer; |
6ba8a3b19 tcp: Tail loss pr... |
2356 2357 2358 2359 2360 |
pcount = tcp_skb_pcount(skb); if (WARN_ON(!pcount)) goto rearm_timer; if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { |
6cc55e096 tcp: add gfp para... |
2361 2362 |
if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss, GFP_ATOMIC))) |
6ba8a3b19 tcp: Tail loss pr... |
2363 |
goto rearm_timer; |
b340b2645 tcp: TLP retransm... |
2364 |
skb = tcp_write_queue_next(sk, skb); |
6ba8a3b19 tcp: Tail loss pr... |
2365 2366 2367 2368 |
} if (WARN_ON(!skb || !tcp_skb_pcount(skb))) goto rearm_timer; |
10d3be569 tcp-tso: do not s... |
2369 |
if (__tcp_retransmit_skb(sk, skb, 1)) |
b340b2645 tcp: TLP retransm... |
2370 |
goto rearm_timer; |
6ba8a3b19 tcp: Tail loss pr... |
2371 |
|
9b717a8d2 tcp: TLP loss det... |
2372 |
/* Record snd_nxt for loss detection. */ |
b340b2645 tcp: TLP retransm... |
2373 |
tp->tlp_high_seq = tp->snd_nxt; |
9b717a8d2 tcp: TLP loss det... |
2374 |
|
b340b2645 tcp: TLP retransm... |
2375 |
probe_sent: |
c10d9310e tcp: do not assum... |
2376 |
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES); |
b340b2645 tcp: TLP retransm... |
2377 2378 |
/* Reset s.t. tcp_rearm_rto will restart timer from now */ inet_csk(sk)->icsk_pending = 0; |
6ba8a3b19 tcp: Tail loss pr... |
2379 |
rearm_timer: |
fcd16c0a9 tcp: don't extend... |
2380 |
tcp_rearm_rto(sk); |
1da177e4c Linux-2.6.12-rc2 |
2381 |
} |
a762a9800 [TCP]: Kill extra... |
2382 2383 2384 2385 |
/* Push out any pending frames which were held back due to * TCP_CORK or attempt at coalescing tiny packets. * The socket must be locked by the caller. */ |
9e412ba76 [TCP]: Sed magic ... |
2386 2387 |
void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle) |
a762a9800 [TCP]: Kill extra... |
2388 |
{ |
726e07a8a tcp: move some pa... |
2389 2390 2391 2392 2393 2394 |
/* If we are closed, the bytes will have to remain here. * In time closedown will finish, we empty the write queue and * all will be happy. */ if (unlikely(sk->sk_state == TCP_CLOSE)) return; |
99a1dec70 net: introduce sk... |
2395 |
if (tcp_write_xmit(sk, cur_mss, nonagle, 0, |
7450aaf61 tcp: suppress too... |
2396 |
sk_gfp_mask(sk, GFP_ATOMIC))) |
726e07a8a tcp: move some pa... |
2397 |
tcp_check_probe_timer(sk); |
a762a9800 [TCP]: Kill extra... |
2398 |
} |
c1b4a7e69 [TCP]: Move to ne... |
2399 2400 2401 2402 2403 |
/* Send _single_ skb sitting at the send head. This function requires * true push pending frames to setup probe timer etc. */ void tcp_push_one(struct sock *sk, unsigned int mss_now) { |
fe067e8ab [TCP]: Abstract o... |
2404 |
struct sk_buff *skb = tcp_send_head(sk); |
c1b4a7e69 [TCP]: Move to ne... |
2405 2406 |
BUG_ON(!skb || skb->len < mss_now); |
d5dd9175b tcp: use tcp_writ... |
2407 |
tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation); |
c1b4a7e69 [TCP]: Move to ne... |
2408 |
} |
1da177e4c Linux-2.6.12-rc2 |
2409 2410 |
/* This function returns the amount that we can raise the * usable window based on the following constraints |
e905a9eda [NET] IPV4: Fix w... |
2411 |
* |
1da177e4c Linux-2.6.12-rc2 |
2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 |
* 1. The window can never be shrunk once it is offered (RFC 793) * 2. We limit memory per socket * * RFC 1122: * "the suggested [SWS] avoidance algorithm for the receiver is to keep * RECV.NEXT + RCV.WIN fixed until: * RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)" * * i.e. don't raise the right edge of the window until you can raise * it at least MSS bytes. * * Unfortunately, the recommended algorithm breaks header prediction, * since header prediction assumes th->window stays fixed. * * Strictly speaking, keeping th->window fixed violates the receiver * side SWS prevention criteria. The problem is that under this rule * a stream of single byte packets will cause the right side of the * window to always advance by a single byte. |
e905a9eda [NET] IPV4: Fix w... |
2430 |
* |
1da177e4c Linux-2.6.12-rc2 |
2431 2432 |
* Of course, if the sender implements sender side SWS prevention * then this will not be a problem. |
e905a9eda [NET] IPV4: Fix w... |
2433 |
* |
1da177e4c Linux-2.6.12-rc2 |
2434 |
* BSD seems to make the following compromise: |
e905a9eda [NET] IPV4: Fix w... |
2435 |
* |
1da177e4c Linux-2.6.12-rc2 |
2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 |
* If the free space is less than the 1/4 of the maximum * space available and the free space is less than 1/2 mss, * then set the window to 0. * [ Actually, bsd uses MSS and 1/4 of maximal _window_ ] * Otherwise, just prevent the window from shrinking * and from being larger than the largest representable value. * * This prevents incremental opening of the window in the regime * where TCP is limited by the speed of the reader side taking * data out of the TCP receive queue. It does nothing about * those cases where the window is constrained on the sender side * because the pipeline is full. * * BSD also seems to "accidentally" limit itself to windows that are a * multiple of MSS, at least until the free space gets quite small. * This would appear to be a side effect of the mbuf implementation. * Combining these two algorithms results in the observed behavior * of having a fixed window size at almost all times. * * Below we obtain similar behavior by forcing the offered window to * a multiple of the mss when it is feasible to do so. * * Note, we don't "adjust" for TIMESTAMP or SACK option bytes. * Regular options like TIMESTAMP are taken into account. */ u32 __tcp_select_window(struct sock *sk) { |
463c84b97 [NET]: Introduce ... |
2463 |
struct inet_connection_sock *icsk = inet_csk(sk); |
1da177e4c Linux-2.6.12-rc2 |
2464 |
struct tcp_sock *tp = tcp_sk(sk); |
caa20d9ab [TCP]: spelling f... |
2465 |
/* MSS for the peer's data. Previous versions used mss_clamp |
1da177e4c Linux-2.6.12-rc2 |
2466 2467 2468 2469 2470 |
* here. I don't know if the value based on our guesses * of peer's MSS is better for the performance. It's more correct * but may be worse for the performance because of rcv_mss * fluctuations. --SAW 1998/11/1 */ |
463c84b97 [NET]: Introduce ... |
2471 |
int mss = icsk->icsk_ack.rcv_mss; |
1da177e4c Linux-2.6.12-rc2 |
2472 |
int free_space = tcp_space(sk); |
86c1a0456 tcp: use zero-win... |
2473 2474 |
int allowed_space = tcp_full_space(sk); int full_space = min_t(int, tp->window_clamp, allowed_space); |
1da177e4c Linux-2.6.12-rc2 |
2475 |
int window; |
06425c308 tcp: fix 0 divide... |
2476 |
if (unlikely(mss > full_space)) { |
e905a9eda [NET] IPV4: Fix w... |
2477 |
mss = full_space; |
06425c308 tcp: fix 0 divide... |
2478 2479 2480 |
if (mss <= 0) return 0; } |
b92edbe0b [TCP] Avoid two d... |
2481 |
if (free_space < (full_space >> 1)) { |
463c84b97 [NET]: Introduce ... |
2482 |
icsk->icsk_ack.quick = 0; |
1da177e4c Linux-2.6.12-rc2 |
2483 |
|
b8da51ebb tcp: introduce tc... |
2484 |
if (tcp_under_memory_pressure(sk)) |
056834d9f [TCP]: cleanup tc... |
2485 2486 |
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); |
1da177e4c Linux-2.6.12-rc2 |
2487 |
|
86c1a0456 tcp: use zero-win... |
2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 |
/* free_space might become our new window, make sure we don't * increase it due to wscale. */ free_space = round_down(free_space, 1 << tp->rx_opt.rcv_wscale); /* if free space is less than mss estimate, or is below 1/16th * of the maximum allowed, try to move to zero-window, else * tcp_clamp_window() will grow rcv buf up to tcp_rmem[2], and * new incoming data is dropped due to memory limits. * With large window, mss test triggers way too late in order * to announce zero window in time before rmem limit kicks in. */ if (free_space < (allowed_space >> 4) || free_space < mss) |
1da177e4c Linux-2.6.12-rc2 |
2501 2502 2503 2504 2505 2506 2507 2508 2509 |
return 0; } if (free_space > tp->rcv_ssthresh) free_space = tp->rcv_ssthresh; /* Don't do rounding if we are using window scaling, since the * scaled window will not line up with the MSS boundary anyway. */ |
1da177e4c Linux-2.6.12-rc2 |
2510 2511 2512 2513 2514 2515 2516 |
if (tp->rx_opt.rcv_wscale) { window = free_space; /* Advertise enough space so that it won't get scaled away. * Import case: prevent zero window announcement if * 1<<rcv_wscale > mss. */ |
1935299d9 net: tcp: Refine ... |
2517 |
window = ALIGN(window, (1 << tp->rx_opt.rcv_wscale)); |
1da177e4c Linux-2.6.12-rc2 |
2518 |
} else { |
1935299d9 net: tcp: Refine ... |
2519 |
window = tp->rcv_wnd; |
1da177e4c Linux-2.6.12-rc2 |
2520 2521 2522 2523 2524 2525 2526 2527 2528 |
/* Get the largest window that is a nice multiple of mss. * Window clamp already applied above. * If our current window offering is within 1 mss of the * free space we just keep it. This prevents the divide * and multiply from happening most of the time. * We also don't do any window rounding when the free space * is too small. */ if (window <= free_space - mss || window > free_space) |
1935299d9 net: tcp: Refine ... |
2529 |
window = rounddown(free_space, mss); |
84565070e [TCP]: Do receive... |
2530 |
else if (mss == full_space && |
b92edbe0b [TCP] Avoid two d... |
2531 |
free_space > window + (full_space >> 1)) |
84565070e [TCP]: Do receive... |
2532 |
window = free_space; |
1da177e4c Linux-2.6.12-rc2 |
2533 2534 2535 2536 |
} return window; } |
cfea5a688 tcp: Merge tx_fla... |
2537 2538 |
void tcp_skb_collapse_tstamp(struct sk_buff *skb, const struct sk_buff *next_skb) |
082ac2d51 tcp: Merge tx_fla... |
2539 |
{ |
0a2cf20c3 tcp: remove SKBTX... |
2540 2541 2542 |
if (unlikely(tcp_has_tx_tstamp(next_skb))) { const struct skb_shared_info *next_shinfo = skb_shinfo(next_skb); |
082ac2d51 tcp: Merge tx_fla... |
2543 |
struct skb_shared_info *shinfo = skb_shinfo(skb); |
0a2cf20c3 tcp: remove SKBTX... |
2544 |
shinfo->tx_flags |= next_shinfo->tx_flags & SKBTX_ANY_TSTAMP; |
082ac2d51 tcp: Merge tx_fla... |
2545 |
shinfo->tskey = next_shinfo->tskey; |
2de8023e7 tcp: Merge txstam... |
2546 2547 |
TCP_SKB_CB(skb)->txstamp_ack |= TCP_SKB_CB(next_skb)->txstamp_ack; |
082ac2d51 tcp: Merge tx_fla... |
2548 2549 |
} } |
4a17fc3ad tcp: collapse mor... |
2550 |
/* Collapses two adjacent SKB's during retransmission. */ |
f8071cde7 tcp: enhance tcp_... |
2551 |
static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) |
1da177e4c Linux-2.6.12-rc2 |
2552 2553 |
{ struct tcp_sock *tp = tcp_sk(sk); |
fe067e8ab [TCP]: Abstract o... |
2554 |
struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); |
058dc3342 [TCP]: reduce tcp... |
2555 |
int skb_size, next_skb_size; |
1da177e4c Linux-2.6.12-rc2 |
2556 |
|
058dc3342 [TCP]: reduce tcp... |
2557 2558 |
skb_size = skb->len; next_skb_size = next_skb->len; |
1da177e4c Linux-2.6.12-rc2 |
2559 |
|
058dc3342 [TCP]: reduce tcp... |
2560 |
BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); |
a6963a6b3 [TCP]: Re-place h... |
2561 |
|
f8071cde7 tcp: enhance tcp_... |
2562 2563 2564 2565 2566 2567 2568 |
if (next_skb_size) { if (next_skb_size <= skb_availroom(skb)) skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size), next_skb_size); else if (!skb_shift(skb, next_skb, next_skb_size)) return false; } |
2b7cda9c3 tcp: fix tcp_mtu_... |
2569 |
tcp_highest_sack_replace(sk, next_skb, skb); |
1da177e4c Linux-2.6.12-rc2 |
2570 |
|
058dc3342 [TCP]: reduce tcp... |
2571 |
tcp_unlink_write_queue(next_skb, sk); |
1da177e4c Linux-2.6.12-rc2 |
2572 |
|
058dc3342 [TCP]: reduce tcp... |
2573 2574 |
if (next_skb->ip_summed == CHECKSUM_PARTIAL) skb->ip_summed = CHECKSUM_PARTIAL; |
1da177e4c Linux-2.6.12-rc2 |
2575 |
|
058dc3342 [TCP]: reduce tcp... |
2576 2577 |
if (skb->ip_summed != CHECKSUM_PARTIAL) skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size); |
1da177e4c Linux-2.6.12-rc2 |
2578 |
|
058dc3342 [TCP]: reduce tcp... |
2579 2580 |
/* Update sequence range on original skb. */ TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; |
1da177e4c Linux-2.6.12-rc2 |
2581 |
|
e6c7d0857 tcp: drop unneces... |
2582 |
/* Merge over control information. This moves PSH/FIN etc. over */ |
4de075e04 tcp: rename tcp_s... |
2583 |
TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags; |
058dc3342 [TCP]: reduce tcp... |
2584 2585 2586 2587 2588 |
/* All done, get rid of second SKB and account for it so * packet counting does not break. */ TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS; |
a643b5d41 tcp: Handle eor b... |
2589 |
TCP_SKB_CB(skb)->eor = TCP_SKB_CB(next_skb)->eor; |
058dc3342 [TCP]: reduce tcp... |
2590 2591 |
/* changed transmit queue under us so clear hints */ |
ef9da47c7 tcp: don't clear ... |
2592 2593 2594 |
tcp_clear_retrans_hints_partial(tp); if (next_skb == tp->retransmit_skb_hint) tp->retransmit_skb_hint = skb; |
058dc3342 [TCP]: reduce tcp... |
2595 |
|
797108d13 tcp: add helper f... |
2596 |
tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb)); |
082ac2d51 tcp: Merge tx_fla... |
2597 |
tcp_skb_collapse_tstamp(skb, next_skb); |
058dc3342 [TCP]: reduce tcp... |
2598 |
sk_wmem_free_skb(sk, next_skb); |
f8071cde7 tcp: enhance tcp_... |
2599 |
return true; |
1da177e4c Linux-2.6.12-rc2 |
2600 |
} |
67edfef78 TCP: Add comments... |
2601 |
/* Check if coalescing SKBs is legal. */ |
a2a385d62 tcp: bool convers... |
2602 |
static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb) |
4a17fc3ad tcp: collapse mor... |
2603 2604 |
{ if (tcp_skb_pcount(skb) > 1) |
a2a385d62 tcp: bool convers... |
2605 |
return false; |
4a17fc3ad tcp: collapse mor... |
2606 |
if (skb_cloned(skb)) |
a2a385d62 tcp: bool convers... |
2607 |
return false; |
4a17fc3ad tcp: collapse mor... |
2608 |
if (skb == tcp_send_head(sk)) |
a2a385d62 tcp: bool convers... |
2609 |
return false; |
2331ccc5b tcp: enhance tcp ... |
2610 |
/* Some heuristics for collapsing over SACK'd could be invented */ |
4a17fc3ad tcp: collapse mor... |
2611 |
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) |
a2a385d62 tcp: bool convers... |
2612 |
return false; |
4a17fc3ad tcp: collapse mor... |
2613 |
|
a2a385d62 tcp: bool convers... |
2614 |
return true; |
4a17fc3ad tcp: collapse mor... |
2615 |
} |
67edfef78 TCP: Add comments... |
2616 2617 2618 |
/* Collapse packets in the retransmit queue to make to create * less packets on the wire. This is only done on retransmission. */ |
4a17fc3ad tcp: collapse mor... |
2619 2620 2621 2622 2623 |
static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, int space) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb = to, *tmp; |
a2a385d62 tcp: bool convers... |
2624 |
bool first = true; |
4a17fc3ad tcp: collapse mor... |
2625 2626 2627 |
if (!sysctl_tcp_retrans_collapse) return; |
4de075e04 tcp: rename tcp_s... |
2628 |
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) |
4a17fc3ad tcp: collapse mor... |
2629 2630 2631 2632 2633 |
return; tcp_for_write_queue_from_safe(skb, tmp, sk) { if (!tcp_can_collapse(sk, skb)) break; |
a643b5d41 tcp: Handle eor b... |
2634 2635 |
if (!tcp_skb_can_collapse_to(to)) break; |
4a17fc3ad tcp: collapse mor... |
2636 2637 2638 |
space -= skb->len; if (first) { |
a2a385d62 tcp: bool convers... |
2639 |
first = false; |
4a17fc3ad tcp: collapse mor... |
2640 2641 2642 2643 2644 |
continue; } if (space < 0) break; |
4a17fc3ad tcp: collapse mor... |
2645 2646 2647 |
if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp))) break; |
f8071cde7 tcp: enhance tcp_... |
2648 2649 |
if (!tcp_collapse_retrans(sk, to)) break; |
4a17fc3ad tcp: collapse mor... |
2650 2651 |
} } |
1da177e4c Linux-2.6.12-rc2 |
2652 2653 2654 2655 |
/* This retransmits one SKB. Policy decisions and retransmit queue * state updates are done by the caller. Returns non-zero if an * error occurred which prevented the send. */ |
10d3be569 tcp-tso: do not s... |
2656 |
int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) |
1da177e4c Linux-2.6.12-rc2 |
2657 |
{ |
5d424d5a6 [TCP]: MTU probing |
2658 |
struct inet_connection_sock *icsk = inet_csk(sk); |
10d3be569 tcp-tso: do not s... |
2659 |
struct tcp_sock *tp = tcp_sk(sk); |
7d227cd23 tcp: TCP connecti... |
2660 |
unsigned int cur_mss; |
10d3be569 tcp-tso: do not s... |
2661 |
int diff, len, err; |
1da177e4c Linux-2.6.12-rc2 |
2662 |
|
10d3be569 tcp-tso: do not s... |
2663 2664 |
/* Inconclusive MTU probe */ if (icsk->icsk_mtup.probe_size) |
5d424d5a6 [TCP]: MTU probing |
2665 |
icsk->icsk_mtup.probe_size = 0; |
5d424d5a6 [TCP]: MTU probing |
2666 |
|
1da177e4c Linux-2.6.12-rc2 |
2667 |
/* Do not sent more than we queued. 1/4 is reserved for possible |
caa20d9ab [TCP]: spelling f... |
2668 |
* copying overhead: fragmentation, tunneling, mangling etc. |
1da177e4c Linux-2.6.12-rc2 |
2669 |
*/ |
14afee4b6 net: convert sock... |
2670 |
if (refcount_read(&sk->sk_wmem_alloc) > |
ffb4d6c85 tcp: fix overflow... |
2671 2672 |
min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) |
1da177e4c Linux-2.6.12-rc2 |
2673 |
return -EAGAIN; |
1f3279ae0 tcp: avoid retran... |
2674 2675 |
if (skb_still_in_host_queue(sk, skb)) return -EBUSY; |
1da177e4c Linux-2.6.12-rc2 |
2676 |
if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { |
c89d53430 tcp: purge write ... |
2677 2678 2679 2680 |
if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) { WARN_ON_ONCE(1); return -EINVAL; } |
1da177e4c Linux-2.6.12-rc2 |
2681 2682 2683 |
if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) return -ENOMEM; } |
7d227cd23 tcp: TCP connecti... |
2684 2685 |
if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) return -EHOSTUNREACH; /* Routing failure or similar. */ |
0c54b85f2 tcp: simplify tcp... |
2686 |
cur_mss = tcp_current_mss(sk); |
7d227cd23 tcp: TCP connecti... |
2687 |
|
1da177e4c Linux-2.6.12-rc2 |
2688 2689 2690 2691 2692 |
/* If receiver has shrunk his window, and skb is out of * new window, do not retransmit it. The exception is the * case, when window is shrunk to zero. In this case * our retransmit serves as a zero window probe. */ |
9d4fb27db net/ipv4: Move &&... |
2693 2694 |
if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) && TCP_SKB_CB(skb)->seq != tp->snd_una) |
1da177e4c Linux-2.6.12-rc2 |
2695 |
return -EAGAIN; |
10d3be569 tcp-tso: do not s... |
2696 2697 2698 |
len = cur_mss * segs; if (skb->len > len) { if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC)) |
1da177e4c Linux-2.6.12-rc2 |
2699 |
return -ENOMEM; /* We'll try again later. */ |
02276f3c9 tcp: fix corner c... |
2700 |
} else { |
10d3be569 tcp-tso: do not s... |
2701 2702 |
if (skb_unclone(skb, GFP_ATOMIC)) return -ENOMEM; |
9eb9362e5 tcp: miscounts du... |
2703 |
|
10d3be569 tcp-tso: do not s... |
2704 2705 2706 2707 2708 2709 2710 |
diff = tcp_skb_pcount(skb); tcp_set_skb_tso_segs(skb, cur_mss); diff -= tcp_skb_pcount(skb); if (diff) tcp_adjust_pcount(sk, skb, diff); if (skb->len < cur_mss) tcp_retrans_try_collapse(sk, skb, cur_mss); |
1da177e4c Linux-2.6.12-rc2 |
2711 |
} |
492135557 tcp: add rfc3168,... |
2712 2713 2714 |
/* RFC3168, section 6.1.1.1. ECN fallback */ if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN) tcp_ecn_clear_syn(sk, skb); |
678550c65 tcp: include loca... |
2715 2716 2717 2718 2719 2720 |
/* Update global and local TCP statistics. */ segs = tcp_skb_pcount(skb); TCP_ADD_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS, segs); if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); tp->total_retrans += segs; |
50bceae9b tcp: Reallocate h... |
2721 2722 2723 2724 2725 2726 |
/* make sure skb->data is aligned on arches that require it * and check if ack-trimming & collapsing extended the headroom * beyond what csum_start can cover. */ if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) || skb_headroom(skb) >= 0xFFFF)) { |
10a81980f tcp: refresh skb ... |
2727 |
struct sk_buff *nskb; |
10a81980f tcp: refresh skb ... |
2728 |
nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); |
c84a57113 tcp: fix bogus RT... |
2729 2730 |
err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : -ENOBUFS; |
5889e2c0e tcp: call tcp_rat... |
2731 |
if (!err) { |
8c72c65b4 tcp: update skb->... |
2732 |
skb->skb_mstamp = tp->tcp_mstamp; |
5889e2c0e tcp: call tcp_rat... |
2733 2734 |
tcp_rate_skb_sent(sk, skb); } |
117632e64 tcp: take care of... |
2735 |
} else { |
c84a57113 tcp: fix bogus RT... |
2736 |
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
117632e64 tcp: take care of... |
2737 |
} |
c84a57113 tcp: fix bogus RT... |
2738 |
|
fc9f35010 tcp: increment re... |
2739 |
if (likely(!err)) { |
c84a57113 tcp: fix bogus RT... |
2740 |
TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; |
678550c65 tcp: include loca... |
2741 2742 |
} else if (err != -EBUSY) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); |
fc9f35010 tcp: increment re... |
2743 |
} |
c84a57113 tcp: fix bogus RT... |
2744 |
return err; |
93b174ad7 tcp: bug fix Fast... |
2745 |
} |
10d3be569 tcp-tso: do not s... |
2746 |
int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) |
93b174ad7 tcp: bug fix Fast... |
2747 2748 |
{ struct tcp_sock *tp = tcp_sk(sk); |
10d3be569 tcp-tso: do not s... |
2749 |
int err = __tcp_retransmit_skb(sk, skb, segs); |
1da177e4c Linux-2.6.12-rc2 |
2750 2751 |
if (err == 0) { |
1da177e4c Linux-2.6.12-rc2 |
2752 |
#if FASTRETRANS_DEBUG > 0 |
056834d9f [TCP]: cleanup tc... |
2753 |
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { |
e87cc4728 net: Convert net_... |
2754 2755 |
net_dbg_ratelimited("retrans_out leaked "); |
1da177e4c Linux-2.6.12-rc2 |
2756 2757 2758 2759 2760 2761 2762 |
} #endif TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS; tp->retrans_out += tcp_skb_pcount(skb); /* Save stamp of the first retransmit. */ if (!tp->retrans_stamp) |
7faee5c0d tcp: remove TCP_S... |
2763 |
tp->retrans_stamp = tcp_skb_timestamp(skb); |
1da177e4c Linux-2.6.12-rc2 |
2764 |
|
1da177e4c Linux-2.6.12-rc2 |
2765 |
} |
6e08d5e3c tcp: fix false un... |
2766 2767 2768 2769 |
if (tp->undo_retrans < 0) tp->undo_retrans = 0; tp->undo_retrans += tcp_skb_pcount(skb); |
1da177e4c Linux-2.6.12-rc2 |
2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 |
return err; } /* This gets called after a retransmit timeout, and the initially * retransmitted data is acknowledged. It tries to continue * resending the rest of the retransmit queue, until either * we've sent it all or the congestion window limit is reached. * If doing SACK, the first ACK which comes back for a timeout * based retransmit packet might feed us FACK information again. * If so, we use it to avoid unnecessarily retransmissions. */ void tcp_xmit_retransmit_queue(struct sock *sk) { |
6687e988d [ICSK]: Move TCP ... |
2783 |
const struct inet_connection_sock *icsk = inet_csk(sk); |
1da177e4c Linux-2.6.12-rc2 |
2784 2785 |
struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; |
0e1c54c2a tcp: reorganize r... |
2786 |
struct sk_buff *hole = NULL; |
840a3cbe8 tcp: remove forwa... |
2787 |
u32 max_segs; |
61eb55f4d tcp: Reorganize s... |
2788 |
int mib_idx; |
6a438bbe6 [TCP]: speed up S... |
2789 |
|
45e77d314 tcp: fix crash in... |
2790 2791 |
if (!tp->packets_out) return; |
618d9f255 tcp: back retrans... |
2792 |
if (tp->retransmit_skb_hint) { |
6a438bbe6 [TCP]: speed up S... |
2793 |
skb = tp->retransmit_skb_hint; |
618d9f255 tcp: back retrans... |
2794 |
} else { |
fe067e8ab [TCP]: Abstract o... |
2795 |
skb = tcp_write_queue_head(sk); |
618d9f255 tcp: back retrans... |
2796 |
} |
1da177e4c Linux-2.6.12-rc2 |
2797 |
|
ed6e7268b tcp: allow conges... |
2798 |
max_segs = tcp_tso_segs(sk, tcp_current_mss(sk)); |
08ebd1721 tcp: remove tp->l... |
2799 |
tcp_for_write_queue_from(skb, sk) { |
dca0aaf84 tcp: defer sacked... |
2800 |
__u8 sacked; |
10d3be569 tcp-tso: do not s... |
2801 |
int segs; |
1da177e4c Linux-2.6.12-rc2 |
2802 |
|
08ebd1721 tcp: remove tp->l... |
2803 2804 |
if (skb == tcp_send_head(sk)) break; |
218af599f tcp: internal imp... |
2805 2806 2807 |
if (tcp_pacing_check(sk)) break; |
08ebd1721 tcp: remove tp->l... |
2808 |
/* we could do better than to assign each time */ |
51456b291 ipv4: coding styl... |
2809 |
if (!hole) |
0e1c54c2a tcp: reorganize r... |
2810 |
tp->retransmit_skb_hint = skb; |
08ebd1721 tcp: remove tp->l... |
2811 |
|
10d3be569 tcp-tso: do not s... |
2812 2813 |
segs = tp->snd_cwnd - tcp_packets_in_flight(tp); if (segs <= 0) |
08ebd1721 tcp: remove tp->l... |
2814 |
return; |
dca0aaf84 tcp: defer sacked... |
2815 |
sacked = TCP_SKB_CB(skb)->sacked; |
a3d2e9f8e tcp: do not send ... |
2816 2817 2818 2819 |
/* In case tcp_shift_skb_data() have aggregated large skbs, * we need to make sure not sending too bigs TSO packets */ segs = min_t(int, segs, max_segs); |
1da177e4c Linux-2.6.12-rc2 |
2820 |
|
840a3cbe8 tcp: remove forwa... |
2821 2822 |
if (tp->retrans_out >= tp->lost_out) { break; |
0e1c54c2a tcp: reorganize r... |
2823 |
} else if (!(sacked & TCPCB_LOST)) { |
51456b291 ipv4: coding styl... |
2824 |
if (!hole && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED))) |
0e1c54c2a tcp: reorganize r... |
2825 2826 |
hole = skb; continue; |
1da177e4c Linux-2.6.12-rc2 |
2827 |
|
0e1c54c2a tcp: reorganize r... |
2828 2829 2830 2831 2832 2833 |
} else { if (icsk->icsk_ca_state != TCP_CA_Loss) mib_idx = LINUX_MIB_TCPFASTRETRANS; else mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS; } |
1da177e4c Linux-2.6.12-rc2 |
2834 |
|
0e1c54c2a tcp: reorganize r... |
2835 |
if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS)) |
1da177e4c Linux-2.6.12-rc2 |
2836 |
continue; |
f9616c35a tcp: implement TS... |
2837 2838 |
if (tcp_small_queue_check(sk, skb, 1)) return; |
10d3be569 tcp-tso: do not s... |
2839 |
if (tcp_retransmit_skb(sk, skb, segs)) |
0e1c54c2a tcp: reorganize r... |
2840 |
return; |
24ab6bec8 tcp: account all ... |
2841 |
|
de1d65781 tcp: fix under-ac... |
2842 |
NET_ADD_STATS(sock_net(sk), mib_idx, tcp_skb_pcount(skb)); |
1da177e4c Linux-2.6.12-rc2 |
2843 |
|
684bad110 tcp: use PRR to r... |
2844 |
if (tcp_in_cwnd_reduction(sk)) |
a262f0cdf Proportional Rate... |
2845 |
tp->prr_out += tcp_skb_pcount(skb); |
57dde7f70 tcp: add reorderi... |
2846 2847 |
if (skb == tcp_write_queue_head(sk) && icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT) |
3f421baa4 [NET]: Just move ... |
2848 2849 2850 |
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); |
1da177e4c Linux-2.6.12-rc2 |
2851 2852 |
} } |
d83769a58 tcp: fix possible... |
2853 2854 |
/* We allow to exceed memory limits for FIN packets to expedite * connection tear down and (memory) recovery. |
845704a53 tcp: avoid loopin... |
2855 2856 |
* Otherwise tcp_send_fin() could be tempted to either delay FIN * or even be forced to close flow without any FIN. |
a6c5ea4cc tcp: rename sk_fo... |
2857 2858 |
* In general, we want to allow one skb per socket to avoid hangs * with edge trigger epoll() |
d83769a58 tcp: fix possible... |
2859 |
*/ |
a6c5ea4cc tcp: rename sk_fo... |
2860 |
void sk_forced_mem_schedule(struct sock *sk, int size) |
d83769a58 tcp: fix possible... |
2861 |
{ |
e805605c7 net: tcp_memcontr... |
2862 |
int amt; |
d83769a58 tcp: fix possible... |
2863 2864 2865 2866 2867 |
if (size <= sk->sk_forward_alloc) return; amt = sk_mem_pages(size); sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; |
e805605c7 net: tcp_memcontr... |
2868 |
sk_memory_allocated_add(sk, amt); |
baac50bbc net: tcp_memcontr... |
2869 2870 |
if (mem_cgroup_sockets_enabled && sk->sk_memcg) mem_cgroup_charge_skmem(sk->sk_memcg, amt); |
d83769a58 tcp: fix possible... |
2871 |
} |
845704a53 tcp: avoid loopin... |
2872 2873 |
/* Send a FIN. The caller locks the socket for us. * We should try to send a FIN packet really hard, but eventually give up. |
1da177e4c Linux-2.6.12-rc2 |
2874 2875 2876 |
*/ void tcp_send_fin(struct sock *sk) { |
845704a53 tcp: avoid loopin... |
2877 |
struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk); |
e905a9eda [NET] IPV4: Fix w... |
2878 |
struct tcp_sock *tp = tcp_sk(sk); |
e905a9eda [NET] IPV4: Fix w... |
2879 |
|
845704a53 tcp: avoid loopin... |
2880 2881 2882 2883 |
/* Optimization, tack on the FIN if we have one skb in write queue and * this skb was not yet sent, or we are under memory pressure. * Note: in the latter case, FIN packet will be sent after a timeout, * as TCP stack thinks it has already been transmitted. |
1da177e4c Linux-2.6.12-rc2 |
2884 |
*/ |
b8da51ebb tcp: introduce tc... |
2885 |
if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) { |
845704a53 tcp: avoid loopin... |
2886 2887 2888 |
coalesce: TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; TCP_SKB_CB(tskb)->end_seq++; |
1da177e4c Linux-2.6.12-rc2 |
2889 |
tp->write_seq++; |
845704a53 tcp: avoid loopin... |
2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 |
if (!tcp_send_head(sk)) { /* This means tskb was already sent. * Pretend we included the FIN on previous transmit. * We need to set tp->snd_nxt to the value it would have * if FIN had been sent. This is because retransmit path * does not change tp->snd_nxt. */ tp->snd_nxt++; return; } |
1da177e4c Linux-2.6.12-rc2 |
2900 |
} else { |
845704a53 tcp: avoid loopin... |
2901 2902 2903 2904 2905 |
skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); if (unlikely(!skb)) { if (tskb) goto coalesce; return; |
1da177e4c Linux-2.6.12-rc2 |
2906 |
} |
d83769a58 tcp: fix possible... |
2907 |
skb_reserve(skb, MAX_TCP_HEADER); |
a6c5ea4cc tcp: rename sk_fo... |
2908 |
sk_forced_mem_schedule(sk, skb->truesize); |
1da177e4c Linux-2.6.12-rc2 |
2909 |
/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ |
e870a8efc [TCP]: Perform se... |
2910 |
tcp_init_nondata_skb(skb, tp->write_seq, |
a3433f35a tcp: unify tcp fl... |
2911 |
TCPHDR_ACK | TCPHDR_FIN); |
1da177e4c Linux-2.6.12-rc2 |
2912 2913 |
tcp_queue_skb(sk, skb); } |
845704a53 tcp: avoid loopin... |
2914 |
__tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF); |
1da177e4c Linux-2.6.12-rc2 |
2915 2916 2917 2918 2919 |
} /* We get here when a process closes a file descriptor (either due to * an explicit close() or as a byproduct of exit()'ing) and there * was unread data in the receive queue. This behavior is recommended |
65bb723c9 [TCP]: Update ref... |
2920 |
* by RFC 2525, section 2.17. -DaveM |
1da177e4c Linux-2.6.12-rc2 |
2921 |
*/ |
dd0fc66fb [PATCH] gfp flags... |
2922 |
void tcp_send_active_reset(struct sock *sk, gfp_t priority) |
1da177e4c Linux-2.6.12-rc2 |
2923 |
{ |
1da177e4c Linux-2.6.12-rc2 |
2924 |
struct sk_buff *skb; |
7cc2b043b net: tcp: Increas... |
2925 |
TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS); |
1da177e4c Linux-2.6.12-rc2 |
2926 2927 2928 |
/* NOTE: No TCP options attached and we never retransmit this. */ skb = alloc_skb(MAX_TCP_HEADER, priority); if (!skb) { |
4e6734447 mib: add net to N... |
2929 |
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); |
1da177e4c Linux-2.6.12-rc2 |
2930 2931 2932 2933 2934 |
return; } /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, MAX_TCP_HEADER); |
e870a8efc [TCP]: Perform se... |
2935 |
tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), |
a3433f35a tcp: unify tcp fl... |
2936 |
TCPHDR_ACK | TCPHDR_RST); |
9a568de48 tcp: switch TCP T... |
2937 |
tcp_mstamp_refresh(tcp_sk(sk)); |
1da177e4c Linux-2.6.12-rc2 |
2938 |
/* Send it off. */ |
dfb4b9dce [TCP] Vegas: time... |
2939 |
if (tcp_transmit_skb(sk, skb, 0, priority)) |
4e6734447 mib: add net to N... |
2940 |
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); |
1da177e4c Linux-2.6.12-rc2 |
2941 |
} |
67edfef78 TCP: Add comments... |
2942 2943 |
/* Send a crossed SYN-ACK during socket establishment. * WARNING: This routine must only be called when we have already sent |
1da177e4c Linux-2.6.12-rc2 |
2944 2945 2946 2947 2948 2949 |
* a SYN packet that crossed the incoming SYN that caused this routine * to get called. If this assumption fails then the initial rcv_wnd * and rcv_wscale values will not be correct. */ int tcp_send_synack(struct sock *sk) { |
056834d9f [TCP]: cleanup tc... |
2950 |
struct sk_buff *skb; |
1da177e4c Linux-2.6.12-rc2 |
2951 |
|
fe067e8ab [TCP]: Abstract o... |
2952 |
skb = tcp_write_queue_head(sk); |
51456b291 ipv4: coding styl... |
2953 |
if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { |
91df42bed net: ipv4 and ipv... |
2954 2955 |
pr_debug("%s: wrong queue state ", __func__); |
1da177e4c Linux-2.6.12-rc2 |
2956 2957 |
return -EFAULT; } |
4de075e04 tcp: rename tcp_s... |
2958 |
if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) { |
1da177e4c Linux-2.6.12-rc2 |
2959 2960 |
if (skb_cloned(skb)) { struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); |
51456b291 ipv4: coding styl... |
2961 |
if (!nskb) |
1da177e4c Linux-2.6.12-rc2 |
2962 |
return -ENOMEM; |
fe067e8ab [TCP]: Abstract o... |
2963 |
tcp_unlink_write_queue(skb, sk); |
f4a775d14 net: introduce __... |
2964 |
__skb_header_release(nskb); |
fe067e8ab [TCP]: Abstract o... |
2965 |
__tcp_add_write_queue_head(sk, nskb); |
3ab224be6 [NET] CORE: Intro... |
2966 2967 2968 |
sk_wmem_free_skb(sk, skb); sk->sk_wmem_queued += nskb->truesize; sk_mem_charge(sk, nskb->truesize); |
1da177e4c Linux-2.6.12-rc2 |
2969 2970 |
skb = nskb; } |
4de075e04 tcp: rename tcp_s... |
2971 |
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK; |
735d38311 tcp: change TCP_E... |
2972 |
tcp_ecn_send_synack(sk, skb); |
1da177e4c Linux-2.6.12-rc2 |
2973 |
} |
dfb4b9dce [TCP] Vegas: time... |
2974 |
return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
1da177e4c Linux-2.6.12-rc2 |
2975 |
} |
4aea39c11 tcp: tcp_make_syn... |
2976 2977 2978 2979 2980 |
/** * tcp_make_synack - Prepare a SYN-ACK. * sk: listener socket * dst: dst entry attached to the SYNACK * req: request_sock pointer |
4aea39c11 tcp: tcp_make_syn... |
2981 2982 2983 2984 |
* * Allocate one skb and build a SYNACK packet. * @dst is consumed : Caller should not use it again. */ |
5d062de7f tcp: constify tcp... |
2985 |
struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, |
e6b4d1136 TCPCT part 1a: ad... |
2986 |
struct request_sock *req, |
ca6fb0651 tcp: attach SYNAC... |
2987 |
struct tcp_fastopen_cookie *foc, |
b3d051477 tcp: do not mess ... |
2988 |
enum tcp_synack_type synack_type) |
1da177e4c Linux-2.6.12-rc2 |
2989 |
{ |
2e6599cb8 [NET] Generalise ... |
2990 |
struct inet_request_sock *ireq = inet_rsk(req); |
5d062de7f tcp: constify tcp... |
2991 |
const struct tcp_sock *tp = tcp_sk(sk); |
80f03e27a tcp: md5: fix rcu... |
2992 |
struct tcp_md5sig_key *md5 = NULL; |
5d062de7f tcp: constify tcp... |
2993 2994 |
struct tcp_out_options opts; struct sk_buff *skb; |
bd0388ae7 TCPCT part 1f: In... |
2995 |
int tcp_header_size; |
5d062de7f tcp: constify tcp... |
2996 |
struct tcphdr *th; |
f5fff5dc8 tcp: advertise MS... |
2997 |
int mss; |
1da177e4c Linux-2.6.12-rc2 |
2998 |
|
ca6fb0651 tcp: attach SYNAC... |
2999 |
skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); |
4aea39c11 tcp: tcp_make_syn... |
3000 3001 |
if (unlikely(!skb)) { dst_release(dst); |
1da177e4c Linux-2.6.12-rc2 |
3002 |
return NULL; |
4aea39c11 tcp: tcp_make_syn... |
3003 |
} |
1da177e4c Linux-2.6.12-rc2 |
3004 3005 |
/* Reserve space for headers. */ skb_reserve(skb, MAX_TCP_HEADER); |
b3d051477 tcp: do not mess ... |
3006 3007 |
switch (synack_type) { case TCP_SYNACK_NORMAL: |
9e17f8a47 net: make skb_set... |
3008 |
skb_set_owner_w(skb, req_to_sk(req)); |
b3d051477 tcp: do not mess ... |
3009 3010 3011 3012 3013 3014 3015 |
break; case TCP_SYNACK_COOKIE: /* Under synflood, we do not attach skb to a socket, * to avoid false sharing. */ break; case TCP_SYNACK_FASTOPEN: |
ca6fb0651 tcp: attach SYNAC... |
3016 3017 3018 3019 3020 |
/* sk is a const pointer, because we want to express multiple * cpu might call us concurrently. * sk->sk_wmem_alloc in an atomic, we can promote to rw. */ skb_set_owner_w(skb, (struct sock *)sk); |
b3d051477 tcp: do not mess ... |
3021 |
break; |
ca6fb0651 tcp: attach SYNAC... |
3022 |
} |
4aea39c11 tcp: tcp_make_syn... |
3023 |
skb_dst_set(skb, dst); |
1da177e4c Linux-2.6.12-rc2 |
3024 |
|
3541f9e8b tcp: add tcp_mss_... |
3025 |
mss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); |
f5fff5dc8 tcp: advertise MS... |
3026 |
|
33ad798c9 tcp: options clea... |
3027 |
memset(&opts, 0, sizeof(opts)); |
8b5f12d04 syncookies: fix i... |
3028 3029 |
#ifdef CONFIG_SYN_COOKIES if (unlikely(req->cookie_ts)) |
9a568de48 tcp: switch TCP T... |
3030 |
skb->skb_mstamp = cookie_init_timestamp(req); |
8b5f12d04 syncookies: fix i... |
3031 3032 |
else #endif |
9a568de48 tcp: switch TCP T... |
3033 |
skb->skb_mstamp = tcp_clock_us(); |
80f03e27a tcp: md5: fix rcu... |
3034 3035 3036 |
#ifdef CONFIG_TCP_MD5SIG rcu_read_lock(); |
fd3a154a0 tcp: md5: get rid... |
3037 |
md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); |
80f03e27a tcp: md5: fix rcu... |
3038 |
#endif |
58d607d3e tcp: provide skb-... |
3039 |
skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4); |
37bfbdda0 tcp: remove tcp_s... |
3040 3041 |
tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) + sizeof(*th); |
cfb6eeb4c [TCP]: MD5 Signat... |
3042 |
|
aa8223c7b [SK_BUFF]: Introd... |
3043 3044 |
skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); |
1da177e4c Linux-2.6.12-rc2 |
3045 |
|
ea1627c20 tcp: minor optimi... |
3046 |
th = (struct tcphdr *)skb->data; |
1da177e4c Linux-2.6.12-rc2 |
3047 3048 3049 |
memset(th, 0, sizeof(struct tcphdr)); th->syn = 1; th->ack = 1; |
6ac705b18 tcp: remove tcp_e... |
3050 |
tcp_ecn_make_synack(req, th); |
b44084c2c inet: rename ir_l... |
3051 |
th->source = htons(ireq->ir_num); |
634fb979e inet: includes a ... |
3052 |
th->dest = ireq->ir_rmt_port; |
e05a90ec9 net: reflect mark... |
3053 |
skb->mark = ireq->ir_mark; |
3b1177503 tcp: do not mangl... |
3054 3055 |
skb->ip_summed = CHECKSUM_PARTIAL; th->seq = htonl(tcp_rsk(req)->snt_isn); |
8336886f7 tcp: TCP Fast Ope... |
3056 3057 |
/* XXX data is queued and acked as is. No buffer/window check */ th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); |
1da177e4c Linux-2.6.12-rc2 |
3058 3059 |
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ |
ed53d0ab7 net: shrink struc... |
3060 |
th->window = htons(min(req->rsk_rcv_wnd, 65535U)); |
5d062de7f tcp: constify tcp... |
3061 |
tcp_options_write((__be32 *)(th + 1), NULL, &opts); |
1da177e4c Linux-2.6.12-rc2 |
3062 |
th->doff = (tcp_header_size >> 2); |
90bbcc608 net: tcp: rename ... |
3063 |
__TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); |
cfb6eeb4c [TCP]: MD5 Signat... |
3064 3065 3066 |
#ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ |
80f03e27a tcp: md5: fix rcu... |
3067 |
if (md5) |
bd0388ae7 TCPCT part 1f: In... |
3068 |
tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location, |
39f8e58e5 tcp: md5: remove ... |
3069 |
md5, req_to_sk(req), skb); |
80f03e27a tcp: md5: fix rcu... |
3070 |
rcu_read_unlock(); |
cfb6eeb4c [TCP]: MD5 Signat... |
3071 |
#endif |
b50edd781 tcp: tcp_make_syn... |
3072 |
/* Do not fool tcpdump (if any), clean our debris */ |
2456e8553 ktime: Get rid of... |
3073 |
skb->tstamp = 0; |
1da177e4c Linux-2.6.12-rc2 |
3074 3075 |
return skb; } |
4bc2f18ba net/ipv4: EXPORT_... |
3076 |
EXPORT_SYMBOL(tcp_make_synack); |
1da177e4c Linux-2.6.12-rc2 |
3077 |
|
81164413a net: tcp: add per... |
3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 |
static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst) { struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_congestion_ops *ca; u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); if (ca_key == TCP_CA_UNSPEC) return; rcu_read_lock(); ca = tcp_ca_find_key(ca_key); if (likely(ca && try_module_get(ca->owner))) { module_put(icsk->icsk_ca_ops->owner); icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst); icsk->icsk_ca_ops = ca; } rcu_read_unlock(); } |
67edfef78 TCP: Add comments... |
3096 |
/* Do all connect socket setups that can be done AF independent. */ |
f7e56a76a tcp: make local f... |
3097 |
static void tcp_connect_init(struct sock *sk) |
1da177e4c Linux-2.6.12-rc2 |
3098 |
{ |
cf533ea53 tcp: add const qu... |
3099 |
const struct dst_entry *dst = __sk_dst_get(sk); |
1da177e4c Linux-2.6.12-rc2 |
3100 3101 |
struct tcp_sock *tp = tcp_sk(sk); __u8 rcv_wscale; |
13d3b1ebe bpf: Support for ... |
3102 |
u32 rcv_wnd; |
1da177e4c Linux-2.6.12-rc2 |
3103 3104 3105 3106 |
/* We'll fix this up when we get a response from the other end. * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. */ |
5d2ed0521 tcp: Namespaceify... |
3107 3108 3109 |
tp->tcp_header_len = sizeof(struct tcphdr); if (sock_net(sk)->ipv4.sysctl_tcp_timestamps) tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED; |
1da177e4c Linux-2.6.12-rc2 |
3110 |
|
cfb6eeb4c [TCP]: MD5 Signat... |
3111 |
#ifdef CONFIG_TCP_MD5SIG |
00db41243 ipv4: coding styl... |
3112 |
if (tp->af_specific->md5_lookup(sk, sk)) |
cfb6eeb4c [TCP]: MD5 Signat... |
3113 3114 |
tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED; #endif |
1da177e4c Linux-2.6.12-rc2 |
3115 3116 3117 3118 |
/* If user gave his TCP_MAXSEG, record it to clamp */ if (tp->rx_opt.user_mss) tp->rx_opt.mss_clamp = tp->rx_opt.user_mss; tp->max_window = 0; |
5d424d5a6 [TCP]: MTU probing |
3119 |
tcp_mtup_init(sk); |
1da177e4c Linux-2.6.12-rc2 |
3120 |
tcp_sync_mss(sk, dst_mtu(dst)); |
81164413a net: tcp: add per... |
3121 |
tcp_ca_dst_init(sk, dst); |
1da177e4c Linux-2.6.12-rc2 |
3122 3123 |
if (!tp->window_clamp) tp->window_clamp = dst_metric(dst, RTAX_WINDOW); |
3541f9e8b tcp: add tcp_mss_... |
3124 |
tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); |
f5fff5dc8 tcp: advertise MS... |
3125 |
|
1da177e4c Linux-2.6.12-rc2 |
3126 |
tcp_initialize_rcv_mss(sk); |
1da177e4c Linux-2.6.12-rc2 |
3127 |
|
e88c64f0a tcp: allow effect... |
3128 3129 3130 3131 |
/* limit the window selection if the user enforce a smaller rx buffer */ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0)) tp->window_clamp = tcp_full_space(sk); |
13d3b1ebe bpf: Support for ... |
3132 3133 3134 |
rcv_wnd = tcp_rwnd_init_bpf(sk); if (rcv_wnd == 0) rcv_wnd = dst_metric(dst, RTAX_INITRWND); |
1da177e4c Linux-2.6.12-rc2 |
3135 3136 3137 3138 |
tcp_select_initial_window(tcp_full_space(sk), tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, |
9bb37ef00 tcp: Namespaceify... |
3139 |
sock_net(sk)->ipv4.sysctl_tcp_window_scaling, |
31d12926e net: Add rtnetlin... |
3140 |
&rcv_wscale, |
13d3b1ebe bpf: Support for ... |
3141 |
rcv_wnd); |
1da177e4c Linux-2.6.12-rc2 |
3142 3143 3144 3145 3146 3147 3148 |
tp->rx_opt.rcv_wscale = rcv_wscale; tp->rcv_ssthresh = tp->rcv_wnd; sk->sk_err = 0; sock_reset_flag(sk, SOCK_DONE); tp->snd_wnd = 0; |
ee7537b63 tcp: tcp_init_wl ... |
3149 |
tcp_init_wl(tp, 0); |
c89d53430 tcp: purge write ... |
3150 |
tcp_write_queue_purge(sk); |
1da177e4c Linux-2.6.12-rc2 |
3151 3152 |
tp->snd_una = tp->write_seq; tp->snd_sml = tp->write_seq; |
33f5f57ee tcp: kill pointle... |
3153 |
tp->snd_up = tp->write_seq; |
370816aef tcp: Move code ar... |
3154 |
tp->snd_nxt = tp->write_seq; |
ee9952831 tcp: Initial repa... |
3155 3156 3157 |
if (likely(!tp->repair)) tp->rcv_nxt = 0; |
c7781a6e3 tcp: initialize r... |
3158 |
else |
70eabf0e1 tcp: use tcp_jiff... |
3159 |
tp->rcv_tstamp = tcp_jiffies32; |
ee9952831 tcp: Initial repa... |
3160 3161 |
tp->rcv_wup = tp->rcv_nxt; tp->copied_seq = tp->rcv_nxt; |
1da177e4c Linux-2.6.12-rc2 |
3162 |
|
8550f328f bpf: Support for ... |
3163 |
inet_csk(sk)->icsk_rto = tcp_timeout_init(sk); |
463c84b97 [NET]: Introduce ... |
3164 |
inet_csk(sk)->icsk_retransmits = 0; |
1da177e4c Linux-2.6.12-rc2 |
3165 3166 |
tcp_clear_retrans(tp); } |
783237e8d net-tcp: Fast Ope... |
3167 3168 3169 3170 3171 3172 |
static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); tcb->end_seq += skb->len; |
f4a775d14 net: introduce __... |
3173 |
__skb_header_release(skb); |
783237e8d net-tcp: Fast Ope... |
3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 |
__tcp_add_write_queue_tail(sk, skb); sk->sk_wmem_queued += skb->truesize; sk_mem_charge(sk, skb->truesize); tp->write_seq = tcb->end_seq; tp->packets_out += tcp_skb_pcount(skb); } /* Build and send a SYN with data and (cached) Fast Open cookie. However, * queue a data-only packet after the regular SYN, such that regular SYNs * are retransmitted on timeouts. Also if the remote SYN-ACK acknowledges * only the SYN sequence, the data are retransmitted in the first ACK. * If cookie is not cached or other error occurs, falls back to send a * regular SYN with Fast Open cookie request option. */ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_request *fo = tp->fastopen_req; |
065263f40 net/tcp-fastopen:... |
3192 |
int space, err = 0; |
355a901e6 tcp: make connect... |
3193 |
struct sk_buff *syn_data; |
aab487435 net-tcp: Fast Ope... |
3194 |
|
67da22d23 net-tcp: Fast Ope... |
3195 |
tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */ |
065263f40 net/tcp-fastopen:... |
3196 |
if (!tcp_fastopen_cookie_check(sk, &tp->rx_opt.mss_clamp, &fo->cookie)) |
783237e8d net-tcp: Fast Ope... |
3197 3198 3199 3200 3201 3202 |
goto fallback; /* MSS for SYN-data is based on cached MSS and bounded by PMTU and * user-MSS. Reserve maximum option space for middleboxes that add * private TCP options. The cost is reduced data space in SYN :( */ |
3541f9e8b tcp: add tcp_mss_... |
3203 |
tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp); |
1b63edd6e tcp: fix SYN-data... |
3204 |
space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - |
783237e8d net-tcp: Fast Ope... |
3205 |
MAX_TCP_OPTION_SPACE; |
f5ddcbbb4 net-tcp: fastopen... |
3206 3207 3208 3209 |
space = min_t(size_t, space, fo->size); /* limit to order-0 allocations */ space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); |
eb9344781 tcp: add a force_... |
3210 |
syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation, false); |
355a901e6 tcp: make connect... |
3211 |
if (!syn_data) |
783237e8d net-tcp: Fast Ope... |
3212 |
goto fallback; |
355a901e6 tcp: make connect... |
3213 3214 |
syn_data->ip_summed = CHECKSUM_PARTIAL; memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); |
07e100f98 tcp: restore fast... |
3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 |
if (space) { int copied = copy_from_iter(skb_put(syn_data, space), space, &fo->data->msg_iter); if (unlikely(!copied)) { kfree_skb(syn_data); goto fallback; } if (copied != space) { skb_trim(syn_data, copied); space = copied; } |
57be5bdad ip: convert tcp_s... |
3226 |
} |
355a901e6 tcp: make connect... |
3227 3228 3229 3230 |
/* No more data pending in inet_wait_for_connect() */ if (space == fo->size) fo->data = NULL; fo->copied = space; |
783237e8d net-tcp: Fast Ope... |
3231 |
|
355a901e6 tcp: make connect... |
3232 |
tcp_connect_queue_skb(sk, syn_data); |
0f87230d1 tcp: instrument h... |
3233 3234 |
if (syn_data->len) tcp_chrono_start(sk, TCP_CHRONO_BUSY); |
783237e8d net-tcp: Fast Ope... |
3235 |
|
355a901e6 tcp: make connect... |
3236 |
err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation); |
783237e8d net-tcp: Fast Ope... |
3237 |
|
355a901e6 tcp: make connect... |
3238 |
syn->skb_mstamp = syn_data->skb_mstamp; |
431a91242 tcp: timestamp SY... |
3239 |
|
355a901e6 tcp: make connect... |
3240 3241 3242 3243 3244 3245 3246 3247 |
/* Now full SYN+DATA was cloned and sent (or not), * remove the SYN from the original skb (syn_data) * we keep in write queue in case of a retransmit, as we * also have the SYN packet (with no data) in the same queue. */ TCP_SKB_CB(syn_data)->seq++; TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH; if (!err) { |
67da22d23 net-tcp: Fast Ope... |
3248 |
tp->syn_data = (fo->copied > 0); |
f19c29e3e tcp: snmp stats f... |
3249 |
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT); |
783237e8d net-tcp: Fast Ope... |
3250 3251 |
goto done; } |
783237e8d net-tcp: Fast Ope... |
3252 |
|
b5b7db8d6 tcp: fastopen: fi... |
3253 3254 3255 |
/* data was not sent, this is our new send_head */ sk->sk_send_head = syn_data; tp->packets_out -= tcp_skb_pcount(syn_data); |
783237e8d net-tcp: Fast Ope... |
3256 3257 3258 3259 3260 3261 3262 |
fallback: /* Send a regular SYN with Fast Open cookie request option */ if (fo->cookie.len > 0) fo->cookie.len = 0; err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation); if (err) tp->syn_fastopen = 0; |
783237e8d net-tcp: Fast Ope... |
3263 3264 3265 3266 |
done: fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */ return err; } |
67edfef78 TCP: Add comments... |
3267 |
/* Build a SYN and send it off. */ |
1da177e4c Linux-2.6.12-rc2 |
3268 3269 3270 3271 |
int tcp_connect(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; |
ee5868119 network: tcp_conn... |
3272 |
int err; |
1da177e4c Linux-2.6.12-rc2 |
3273 |
|
9872a4bde bpf: Add TCP conn... |
3274 |
tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB); |
8ba609247 tcp: fastopen: tc... |
3275 3276 3277 |
if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) return -EHOSTUNREACH; /* Routing failure or similar. */ |
1da177e4c Linux-2.6.12-rc2 |
3278 |
tcp_connect_init(sk); |
2b9164771 ipv6: adapt conne... |
3279 3280 3281 3282 |
if (unlikely(tp->repair)) { tcp_finish_connect(sk, NULL); return 0; } |
eb9344781 tcp: add a force_... |
3283 |
buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true); |
355a901e6 tcp: make connect... |
3284 |
if (unlikely(!buff)) |
1da177e4c Linux-2.6.12-rc2 |
3285 |
return -ENOBUFS; |
a3433f35a tcp: unify tcp fl... |
3286 |
tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); |
9a568de48 tcp: switch TCP T... |
3287 3288 |
tcp_mstamp_refresh(tp); tp->retrans_stamp = tcp_time_stamp(tp); |
783237e8d net-tcp: Fast Ope... |
3289 |
tcp_connect_queue_skb(sk, buff); |
735d38311 tcp: change TCP_E... |
3290 |
tcp_ecn_send_syn(sk, buff); |
1da177e4c Linux-2.6.12-rc2 |
3291 |
|
783237e8d net-tcp: Fast Ope... |
3292 3293 3294 |
/* Send off SYN; include data in Fast Open. */ err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); |
ee5868119 network: tcp_conn... |
3295 3296 |
if (err == -ECONNREFUSED) return err; |
bd37a0885 [TCP]: SNMPv2 tcp... |
3297 3298 3299 3300 3301 3302 |
/* We change tp->snd_nxt after the tcp_transmit_skb() call * in order to make this packet get counted in tcpOutSegs. */ tp->snd_nxt = tp->write_seq; tp->pushed_seq = tp->write_seq; |
b5b7db8d6 tcp: fastopen: fi... |
3303 3304 3305 3306 3307 |
buff = tcp_send_head(sk); if (unlikely(buff)) { tp->snd_nxt = TCP_SKB_CB(buff)->seq; tp->pushed_seq = TCP_SKB_CB(buff)->seq; } |
81cc8a75d mib: add net to T... |
3308 |
TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS); |
1da177e4c Linux-2.6.12-rc2 |
3309 3310 |
/* Timer for repeating the SYN until an answer. */ |
3f421baa4 [NET]: Just move ... |
3311 3312 |
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); |
1da177e4c Linux-2.6.12-rc2 |
3313 3314 |
return 0; } |
4bc2f18ba net/ipv4: EXPORT_... |
3315 |
EXPORT_SYMBOL(tcp_connect); |
1da177e4c Linux-2.6.12-rc2 |
3316 3317 3318 3319 3320 3321 3322 |
/* Send out a delayed ack, the caller does the policy checking * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check() * for details. */ void tcp_send_delayed_ack(struct sock *sk) { |
463c84b97 [NET]: Introduce ... |
3323 3324 |
struct inet_connection_sock *icsk = inet_csk(sk); int ato = icsk->icsk_ack.ato; |
1da177e4c Linux-2.6.12-rc2 |
3325 3326 3327 |
unsigned long timeout; if (ato > TCP_DELACK_MIN) { |
463c84b97 [NET]: Introduce ... |
3328 |
const struct tcp_sock *tp = tcp_sk(sk); |
056834d9f [TCP]: cleanup tc... |
3329 |
int max_ato = HZ / 2; |
1da177e4c Linux-2.6.12-rc2 |
3330 |
|
056834d9f [TCP]: cleanup tc... |
3331 3332 |
if (icsk->icsk_ack.pingpong || (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)) |
1da177e4c Linux-2.6.12-rc2 |
3333 3334 3335 3336 3337 |
max_ato = TCP_DELACK_MAX; /* Slow path, intersegment interval is "high". */ /* If some rtt estimate is known, use it to bound delayed ack. |
463c84b97 [NET]: Introduce ... |
3338 |
* Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements |
1da177e4c Linux-2.6.12-rc2 |
3339 3340 |
* directly. */ |
740b0f184 tcp: switch rtt e... |
3341 3342 3343 |
if (tp->srtt_us) { int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3), TCP_DELACK_MIN); |
1da177e4c Linux-2.6.12-rc2 |
3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 |
if (rtt < max_ato) max_ato = rtt; } ato = min(ato, max_ato); } /* Stay within the limit we were given */ timeout = jiffies + ato; /* Use new timeout only if there wasn't a older one earlier. */ |
463c84b97 [NET]: Introduce ... |
3356 |
if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) { |
1da177e4c Linux-2.6.12-rc2 |
3357 3358 3359 |
/* If delack timer was blocked or is about to expire, * send ACK now. */ |
463c84b97 [NET]: Introduce ... |
3360 3361 |
if (icsk->icsk_ack.blocked || time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) { |
1da177e4c Linux-2.6.12-rc2 |
3362 3363 3364 |
tcp_send_ack(sk); return; } |
463c84b97 [NET]: Introduce ... |
3365 3366 |
if (!time_before(timeout, icsk->icsk_ack.timeout)) timeout = icsk->icsk_ack.timeout; |
1da177e4c Linux-2.6.12-rc2 |
3367 |
} |
463c84b97 [NET]: Introduce ... |
3368 3369 3370 |
icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; icsk->icsk_ack.timeout = timeout; sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); |
1da177e4c Linux-2.6.12-rc2 |
3371 3372 3373 |
} /* This routine sends an ack and also updates the window. */ |
f7f24b369 tcp: helpers to s... |
3374 |
void __tcp_send_ack(struct sock *sk, u32 rcv_nxt) |
1da177e4c Linux-2.6.12-rc2 |
3375 |
{ |
058dc3342 [TCP]: reduce tcp... |
3376 |
struct sk_buff *buff; |
1da177e4c Linux-2.6.12-rc2 |
3377 |
|
058dc3342 [TCP]: reduce tcp... |
3378 3379 3380 |
/* If we have been reset, we may not send again. */ if (sk->sk_state == TCP_CLOSE) return; |
1da177e4c Linux-2.6.12-rc2 |
3381 |
|
058dc3342 [TCP]: reduce tcp... |
3382 3383 3384 3385 |
/* We are not putting this on the write queue, so * tcp_transmit_skb() will set the ownership to this * sock. */ |
7450aaf61 tcp: suppress too... |
3386 3387 3388 |
buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN)); if (unlikely(!buff)) { |
058dc3342 [TCP]: reduce tcp... |
3389 3390 3391 3392 3393 |
inet_csk_schedule_ack(sk); inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX); return; |
1da177e4c Linux-2.6.12-rc2 |
3394 |
} |
058dc3342 [TCP]: reduce tcp... |
3395 3396 3397 |
/* Reserve space for headers and prepare control bits. */ skb_reserve(buff, MAX_TCP_HEADER); |
a3433f35a tcp: unify tcp fl... |
3398 |
tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); |
058dc3342 [TCP]: reduce tcp... |
3399 |
|
987819657 tcp: do not pace ... |
3400 3401 3402 |
/* We do not want pure acks influencing TCP Small Queues or fq/pacing * too much. * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784 |
987819657 tcp: do not pace ... |
3403 3404 |
*/ skb_set_tcp_pure_ack(buff); |
058dc3342 [TCP]: reduce tcp... |
3405 |
/* Send it off, this clears delayed acks for us. */ |
f7f24b369 tcp: helpers to s... |
3406 3407 |
__tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0, rcv_nxt); } |
78636179f tcp: do not cance... |
3408 |
EXPORT_SYMBOL_GPL(__tcp_send_ack); |
f7f24b369 tcp: helpers to s... |
3409 3410 3411 3412 |
void tcp_send_ack(struct sock *sk) { __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt); |
1da177e4c Linux-2.6.12-rc2 |
3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 |
} /* This routine sends a packet with an out of date sequence * number. It assumes the other end will try to ack it. * * Question: what should we make while urgent mode? * 4.4BSD forces sending single byte of data. We cannot send * out of window data, because we have SND.NXT==SND.MAX... * * Current solution: to send TWO zero-length segments in urgent mode: * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is * out-of-date with SND.UNA-1 to probe window. */ |
e520af48c tcp: add TCPWinPr... |
3426 |
static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) |
1da177e4c Linux-2.6.12-rc2 |
3427 3428 3429 3430 3431 |
{ struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; /* We don't queue it, tcp_transmit_skb() sets ownership. */ |
7450aaf61 tcp: suppress too... |
3432 3433 |
skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN)); |
51456b291 ipv4: coding styl... |
3434 |
if (!skb) |
1da177e4c Linux-2.6.12-rc2 |
3435 3436 3437 3438 |
return -1; /* Reserve space for headers and set control bits. */ skb_reserve(skb, MAX_TCP_HEADER); |
1da177e4c Linux-2.6.12-rc2 |
3439 3440 3441 3442 |
/* Use a previous sequence. This should cause the other * end to send an ack. Don't queue or clone SKB, just * send it. */ |
a3433f35a tcp: unify tcp fl... |
3443 |
tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); |
e2e8009ff tcp: remove impro... |
3444 |
NET_INC_STATS(sock_net(sk), mib); |
7450aaf61 tcp: suppress too... |
3445 |
return tcp_transmit_skb(sk, skb, 0, (__force gfp_t)0); |
1da177e4c Linux-2.6.12-rc2 |
3446 |
} |
385e20706 tcp: use tp->tcp_... |
3447 |
/* Called from setsockopt( ... TCP_REPAIR ) */ |
ee9952831 tcp: Initial repa... |
3448 3449 3450 3451 |
void tcp_send_window_probe(struct sock *sk) { if (sk->sk_state == TCP_ESTABLISHED) { tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1; |
9a568de48 tcp: switch TCP T... |
3452 |
tcp_mstamp_refresh(tcp_sk(sk)); |
e520af48c tcp: add TCPWinPr... |
3453 |
tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE); |
ee9952831 tcp: Initial repa... |
3454 3455 |
} } |
67edfef78 TCP: Add comments... |
3456 |
/* Initiate keepalive or window probe from timer. */ |
e520af48c tcp: add TCPWinPr... |
3457 |
int tcp_write_wakeup(struct sock *sk, int mib) |
1da177e4c Linux-2.6.12-rc2 |
3458 |
{ |
058dc3342 [TCP]: reduce tcp... |
3459 3460 |
struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; |
1da177e4c Linux-2.6.12-rc2 |
3461 |
|
058dc3342 [TCP]: reduce tcp... |
3462 3463 |
if (sk->sk_state == TCP_CLOSE) return -1; |
00db41243 ipv4: coding styl... |
3464 3465 |
skb = tcp_send_head(sk); if (skb && before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) { |
058dc3342 [TCP]: reduce tcp... |
3466 |
int err; |
0c54b85f2 tcp: simplify tcp... |
3467 |
unsigned int mss = tcp_current_mss(sk); |
058dc3342 [TCP]: reduce tcp... |
3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 |
unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) tp->pushed_seq = TCP_SKB_CB(skb)->end_seq; /* We are probing the opening of a window * but the window size is != 0 * must have been a result SWS avoidance ( sender ) */ if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq || skb->len > mss) { seg_size = min(seg_size, mss); |
4de075e04 tcp: rename tcp_s... |
3480 |
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; |
6cc55e096 tcp: add gfp para... |
3481 |
if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC)) |
058dc3342 [TCP]: reduce tcp... |
3482 3483 |
return -1; } else if (!tcp_skb_pcount(skb)) |
5bbb432c8 tcp: tcp_set_skb_... |
3484 |
tcp_set_skb_tso_segs(skb, mss); |
058dc3342 [TCP]: reduce tcp... |
3485 |
|
4de075e04 tcp: rename tcp_s... |
3486 |
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; |
058dc3342 [TCP]: reduce tcp... |
3487 3488 3489 3490 3491 |
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); if (!err) tcp_event_new_data_sent(sk, skb); return err; } else { |
33f5f57ee tcp: kill pointle... |
3492 |
if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF)) |
e520af48c tcp: add TCPWinPr... |
3493 3494 |
tcp_xmit_probe_skb(sk, 1, mib); return tcp_xmit_probe_skb(sk, 0, mib); |
1da177e4c Linux-2.6.12-rc2 |
3495 |
} |
1da177e4c Linux-2.6.12-rc2 |
3496 3497 3498 3499 3500 3501 3502 |
} /* A window probe timeout has occurred. If window is not closed send * a partial packet else a zero probe. */ void tcp_send_probe0(struct sock *sk) { |
463c84b97 [NET]: Introduce ... |
3503 |
struct inet_connection_sock *icsk = inet_csk(sk); |
1da177e4c Linux-2.6.12-rc2 |
3504 |
struct tcp_sock *tp = tcp_sk(sk); |
c6214a97c ipv4: Namespaceif... |
3505 |
struct net *net = sock_net(sk); |
fcdd1cf4d tcp: avoid possib... |
3506 |
unsigned long probe_max; |
1da177e4c Linux-2.6.12-rc2 |
3507 |
int err; |
e520af48c tcp: add TCPWinPr... |
3508 |
err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE); |
1da177e4c Linux-2.6.12-rc2 |
3509 |
|
fe067e8ab [TCP]: Abstract o... |
3510 |
if (tp->packets_out || !tcp_send_head(sk)) { |
1da177e4c Linux-2.6.12-rc2 |
3511 |
/* Cancel probe timer, if it is not required. */ |
6687e988d [ICSK]: Move TCP ... |
3512 |
icsk->icsk_probes_out = 0; |
463c84b97 [NET]: Introduce ... |
3513 |
icsk->icsk_backoff = 0; |
1da177e4c Linux-2.6.12-rc2 |
3514 3515 3516 3517 |
return; } if (err <= 0) { |
c6214a97c ipv4: Namespaceif... |
3518 |
if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2) |
463c84b97 [NET]: Introduce ... |
3519 |
icsk->icsk_backoff++; |
6687e988d [ICSK]: Move TCP ... |
3520 |
icsk->icsk_probes_out++; |
fcdd1cf4d tcp: avoid possib... |
3521 |
probe_max = TCP_RTO_MAX; |
1da177e4c Linux-2.6.12-rc2 |
3522 3523 |
} else { /* If packet was not sent due to local congestion, |
6687e988d [ICSK]: Move TCP ... |
3524 |
* do not backoff and do not remember icsk_probes_out. |
1da177e4c Linux-2.6.12-rc2 |
3525 3526 3527 3528 |
* Let local senders to fight for local resources. * * Use accumulated backoff yet. */ |
6687e988d [ICSK]: Move TCP ... |
3529 3530 |
if (!icsk->icsk_probes_out) icsk->icsk_probes_out = 1; |
fcdd1cf4d tcp: avoid possib... |
3531 |
probe_max = TCP_RESOURCE_PROBE_INTERVAL; |
1da177e4c Linux-2.6.12-rc2 |
3532 |
} |
fcdd1cf4d tcp: avoid possib... |
3533 |
inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, |
21c8fe991 tcp: adjust windo... |
3534 |
tcp_probe0_when(sk, probe_max), |
fcdd1cf4d tcp: avoid possib... |
3535 |
TCP_RTO_MAX); |
1da177e4c Linux-2.6.12-rc2 |
3536 |
} |
5db92c994 tcp: unify tcp_v4... |
3537 |
|
ea3bea3a1 tcp/dccp: constif... |
3538 |
int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) |
5db92c994 tcp: unify tcp_v4... |
3539 3540 3541 3542 |
{ const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific; struct flowi fl; int res; |
58d607d3e tcp: provide skb-... |
3543 |
tcp_rsk(req)->txhash = net_tx_rndhash(); |
b3d051477 tcp: do not mess ... |
3544 |
res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL); |
5db92c994 tcp: unify tcp_v4... |
3545 |
if (!res) { |
90bbcc608 net: tcp: rename ... |
3546 |
__TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); |
02a1d6e7a net: rename NET_{... |
3547 |
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); |
7e32b4436 tcp: properly acc... |
3548 3549 |
if (unlikely(tcp_passive_fastopen(sk))) tcp_sk(sk)->total_retrans++; |
5db92c994 tcp: unify tcp_v4... |
3550 3551 3552 3553 |
} return res; } EXPORT_SYMBOL(tcp_rtx_synack); |