Blame view
net/ipv4/tcp_cong.c
10.4 KB
317a76f9a [TCP]: Add plugga... |
1 2 3 4 5 6 7 |
/* * Plugable TCP congestion control support and newReno * congestion control. * Based on ideas from I/O scheduler suport and Web100. * * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org> */ |
317a76f9a [TCP]: Add plugga... |
8 9 10 11 |
#include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> #include <linux/list.h> |
5a0e3ad6a include cleanup: ... |
12 |
#include <linux/gfp.h> |
317a76f9a [TCP]: Add plugga... |
13 |
#include <net/tcp.h> |
886236c12 [TCP]: Add RFC374... |
14 |
int sysctl_tcp_max_ssthresh = 0; |
317a76f9a [TCP]: Add plugga... |
15 16 17 18 19 20 21 |
static DEFINE_SPINLOCK(tcp_cong_list_lock); static LIST_HEAD(tcp_cong_list); /* Simple linear search, don't expect many entries! */ static struct tcp_congestion_ops *tcp_ca_find(const char *name) { struct tcp_congestion_ops *e; |
5f8ef48d2 [TCP]: Allow choo... |
22 |
list_for_each_entry_rcu(e, &tcp_cong_list, list) { |
317a76f9a [TCP]: Add plugga... |
23 24 25 26 27 28 29 30 |
if (strcmp(e->name, name) == 0) return e; } return NULL; } /* |
d08df601a Various typo fixes. |
31 |
* Attach new congestion control algorithm to the list |
317a76f9a [TCP]: Add plugga... |
32 33 34 35 36 37 38 |
* of available options. */ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) { int ret = 0; /* all algorithms must implement ssthresh and cong_avoid ops */ |
72dc5b922 [TCP]: Minimum co... |
39 |
if (!ca->ssthresh || !ca->cong_avoid) { |
317a76f9a [TCP]: Add plugga... |
40 41 42 43 44 45 46 47 48 49 50 51 |
printk(KERN_ERR "TCP %s does not implement required ops ", ca->name); return -EINVAL; } spin_lock(&tcp_cong_list_lock); if (tcp_ca_find(ca->name)) { printk(KERN_NOTICE "TCP %s already registered ", ca->name); ret = -EEXIST; } else { |
3d2573f7e [TCP]: default co... |
52 |
list_add_tail_rcu(&ca->list, &tcp_cong_list); |
317a76f9a [TCP]: Add plugga... |
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
printk(KERN_INFO "TCP %s registered ", ca->name); } spin_unlock(&tcp_cong_list_lock); return ret; } EXPORT_SYMBOL_GPL(tcp_register_congestion_control); /* * Remove congestion control algorithm, called from * the module's remove function. Module ref counts are used * to ensure that this can't be done till all sockets using * that method are closed. */ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) { spin_lock(&tcp_cong_list_lock); list_del_rcu(&ca->list); spin_unlock(&tcp_cong_list_lock); } EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); /* Assign choice of congestion control. */ |
6687e988d [ICSK]: Move TCP ... |
77 |
void tcp_init_congestion_control(struct sock *sk) |
317a76f9a [TCP]: Add plugga... |
78 |
{ |
6687e988d [ICSK]: Move TCP ... |
79 |
struct inet_connection_sock *icsk = inet_csk(sk); |
317a76f9a [TCP]: Add plugga... |
80 |
struct tcp_congestion_ops *ca; |
4d4d3d1e8 [TCP]: Congestion... |
81 82 83 84 85 86 87 88 |
/* if no choice made yet assign the current value set as default */ if (icsk->icsk_ca_ops == &tcp_init_congestion_ops) { rcu_read_lock(); list_for_each_entry_rcu(ca, &tcp_cong_list, list) { if (try_module_get(ca->owner)) { icsk->icsk_ca_ops = ca; break; } |
5f8ef48d2 [TCP]: Allow choo... |
89 |
|
4d4d3d1e8 [TCP]: Congestion... |
90 |
/* fallback to next available */ |
317a76f9a [TCP]: Add plugga... |
91 |
} |
4d4d3d1e8 [TCP]: Congestion... |
92 |
rcu_read_unlock(); |
317a76f9a [TCP]: Add plugga... |
93 |
} |
317a76f9a [TCP]: Add plugga... |
94 |
|
6687e988d [ICSK]: Move TCP ... |
95 96 |
if (icsk->icsk_ca_ops->init) icsk->icsk_ca_ops->init(sk); |
317a76f9a [TCP]: Add plugga... |
97 98 99 |
} /* Manage refcounts on socket close. */ |
6687e988d [ICSK]: Move TCP ... |
100 |
void tcp_cleanup_congestion_control(struct sock *sk) |
317a76f9a [TCP]: Add plugga... |
101 |
{ |
6687e988d [ICSK]: Move TCP ... |
102 103 104 105 106 |
struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ca_ops->release) icsk->icsk_ca_ops->release(sk); module_put(icsk->icsk_ca_ops->owner); |
317a76f9a [TCP]: Add plugga... |
107 108 109 110 111 112 113 114 115 116 |
} /* Used by sysctl to change default congestion control */ int tcp_set_default_congestion_control(const char *name) { struct tcp_congestion_ops *ca; int ret = -ENOENT; spin_lock(&tcp_cong_list_lock); ca = tcp_ca_find(name); |
95a5afca4 net: Remove CONFI... |
117 |
#ifdef CONFIG_MODULES |
a8f80e8ff Networking: use C... |
118 |
if (!ca && capable(CAP_NET_ADMIN)) { |
317a76f9a [TCP]: Add plugga... |
119 120 121 122 123 124 125 126 127 |
spin_unlock(&tcp_cong_list_lock); request_module("tcp_%s", name); spin_lock(&tcp_cong_list_lock); ca = tcp_ca_find(name); } #endif if (ca) { |
164891aad [TCP]: Congestion... |
128 |
ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */ |
317a76f9a [TCP]: Add plugga... |
129 130 131 132 133 134 135 |
list_move(&ca->list, &tcp_cong_list); ret = 0; } spin_unlock(&tcp_cong_list_lock); return ret; } |
b1736a714 [TCP]: Set defaul... |
136 137 138 139 140 141 |
/* Set default value from kernel configuration at bootup */ static int __init tcp_congestion_default(void) { return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG); } late_initcall(tcp_congestion_default); |
3ff825b28 [TCP]: Add tcp_av... |
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
/* Build string with list of available congestion control values */ void tcp_get_available_congestion_control(char *buf, size_t maxlen) { struct tcp_congestion_ops *ca; size_t offs = 0; rcu_read_lock(); list_for_each_entry_rcu(ca, &tcp_cong_list, list) { offs += snprintf(buf + offs, maxlen - offs, "%s%s", offs == 0 ? "" : " ", ca->name); } rcu_read_unlock(); } |
317a76f9a [TCP]: Add plugga... |
157 158 159 160 161 162 163 164 165 166 167 168 |
/* Get current default congestion control */ void tcp_get_default_congestion_control(char *name) { struct tcp_congestion_ops *ca; /* We will always have reno... */ BUG_ON(list_empty(&tcp_cong_list)); rcu_read_lock(); ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list); strncpy(name, ca->name, TCP_CA_NAME_MAX); rcu_read_unlock(); } |
ce7bc3bf1 [TCP]: Restrict c... |
169 170 171 172 173 174 175 176 177 |
/* Built list of non-restricted congestion control values */ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen) { struct tcp_congestion_ops *ca; size_t offs = 0; *buf = '\0'; rcu_read_lock(); list_for_each_entry_rcu(ca, &tcp_cong_list, list) { |
164891aad [TCP]: Congestion... |
178 |
if (!(ca->flags & TCP_CONG_NON_RESTRICTED)) |
ce7bc3bf1 [TCP]: Restrict c... |
179 180 181 182 183 184 185 186 187 188 189 190 191 |
continue; offs += snprintf(buf + offs, maxlen - offs, "%s%s", offs == 0 ? "" : " ", ca->name); } rcu_read_unlock(); } /* Change list of non-restricted congestion control */ int tcp_set_allowed_congestion_control(char *val) { struct tcp_congestion_ops *ca; |
c34186ed0 net/ipv4: Elimina... |
192 |
char *saved_clone, *clone, *name; |
ce7bc3bf1 [TCP]: Restrict c... |
193 |
int ret = 0; |
c34186ed0 net/ipv4: Elimina... |
194 |
saved_clone = clone = kstrdup(val, GFP_USER); |
ce7bc3bf1 [TCP]: Restrict c... |
195 196 197 198 199 200 201 202 203 204 205 206 |
if (!clone) return -ENOMEM; spin_lock(&tcp_cong_list_lock); /* pass 1 check for bad entries */ while ((name = strsep(&clone, " ")) && *name) { ca = tcp_ca_find(name); if (!ca) { ret = -ENOENT; goto out; } } |
164891aad [TCP]: Congestion... |
207 |
/* pass 2 clear old values */ |
ce7bc3bf1 [TCP]: Restrict c... |
208 |
list_for_each_entry_rcu(ca, &tcp_cong_list, list) |
164891aad [TCP]: Congestion... |
209 |
ca->flags &= ~TCP_CONG_NON_RESTRICTED; |
ce7bc3bf1 [TCP]: Restrict c... |
210 211 212 213 214 215 |
/* pass 3 mark as allowed */ while ((name = strsep(&val, " ")) && *name) { ca = tcp_ca_find(name); WARN_ON(!ca); if (ca) |
164891aad [TCP]: Congestion... |
216 |
ca->flags |= TCP_CONG_NON_RESTRICTED; |
ce7bc3bf1 [TCP]: Restrict c... |
217 218 219 |
} out: spin_unlock(&tcp_cong_list_lock); |
c34186ed0 net/ipv4: Elimina... |
220 |
kfree(saved_clone); |
ce7bc3bf1 [TCP]: Restrict c... |
221 222 223 |
return ret; } |
5f8ef48d2 [TCP]: Allow choo... |
224 |
/* Change congestion control for socket */ |
6687e988d [ICSK]: Move TCP ... |
225 |
int tcp_set_congestion_control(struct sock *sk, const char *name) |
5f8ef48d2 [TCP]: Allow choo... |
226 |
{ |
6687e988d [ICSK]: Move TCP ... |
227 |
struct inet_connection_sock *icsk = inet_csk(sk); |
5f8ef48d2 [TCP]: Allow choo... |
228 229 230 231 232 |
struct tcp_congestion_ops *ca; int err = 0; rcu_read_lock(); ca = tcp_ca_find(name); |
4d4d3d1e8 [TCP]: Congestion... |
233 |
|
35bfbc940 [TCP]: Allow auto... |
234 |
/* no change asking for existing value */ |
6687e988d [ICSK]: Move TCP ... |
235 |
if (ca == icsk->icsk_ca_ops) |
5f8ef48d2 [TCP]: Allow choo... |
236 |
goto out; |
95a5afca4 net: Remove CONFI... |
237 |
#ifdef CONFIG_MODULES |
35bfbc940 [TCP]: Allow auto... |
238 |
/* not found attempt to autoload module */ |
a8f80e8ff Networking: use C... |
239 |
if (!ca && capable(CAP_NET_ADMIN)) { |
35bfbc940 [TCP]: Allow auto... |
240 241 242 243 244 245 |
rcu_read_unlock(); request_module("tcp_%s", name); rcu_read_lock(); ca = tcp_ca_find(name); } #endif |
5f8ef48d2 [TCP]: Allow choo... |
246 247 |
if (!ca) err = -ENOENT; |
164891aad [TCP]: Congestion... |
248 |
else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || capable(CAP_NET_ADMIN))) |
ce7bc3bf1 [TCP]: Restrict c... |
249 |
err = -EPERM; |
5f8ef48d2 [TCP]: Allow choo... |
250 251 252 253 |
else if (!try_module_get(ca->owner)) err = -EBUSY; else { |
6687e988d [ICSK]: Move TCP ... |
254 255 |
tcp_cleanup_congestion_control(sk); icsk->icsk_ca_ops = ca; |
4d4d3d1e8 [TCP]: Congestion... |
256 257 |
if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init) |
6687e988d [ICSK]: Move TCP ... |
258 |
icsk->icsk_ca_ops->init(sk); |
5f8ef48d2 [TCP]: Allow choo... |
259 260 261 262 263 |
} out: rcu_read_unlock(); return err; } |
cea14e0ed [TCP]: Uninline t... |
264 265 266 267 268 269 270 271 272 273 |
/* RFC2861 Check whether we are limited by application or congestion window * This is the inverse of cwnd check in tcp_tso_should_defer */ int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) { const struct tcp_sock *tp = tcp_sk(sk); u32 left; if (in_flight >= tp->snd_cwnd) return 1; |
cea14e0ed [TCP]: Uninline t... |
274 |
left = tp->snd_cwnd - in_flight; |
ce447eb91 tcp: Allow send-l... |
275 |
if (sk_can_gso(sk) && |
246eb2af0 tcp: Limit cwnd g... |
276 277 |
left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && left * tp->mss_cache < sk->sk_gso_max_size) |
ce447eb91 tcp: Allow send-l... |
278 |
return 1; |
6b5a5c0db tcp: do not scale... |
279 |
return left <= tcp_max_tso_deferred_mss(tp); |
cea14e0ed [TCP]: Uninline t... |
280 281 |
} EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited); |
40efc6fa1 [TCP]: less inline's |
282 283 |
/* |
a02ba0416 [TCP] slow start:... |
284 285 286 287 288 |
* Slow start is used when congestion window is less than slow start * threshold. This version implements the basic RFC2581 version * and optionally supports: * RFC3742 Limited Slow Start - growth limited to max_ssthresh * RFC3465 Appropriate Byte Counting - growth limited by bytes acknowledged |
40efc6fa1 [TCP]: less inline's |
289 290 291 |
*/ void tcp_slow_start(struct tcp_sock *tp) { |
a02ba0416 [TCP] slow start:... |
292 293 294 295 296 297 298 299 300 301 302 303 304 305 |
int cnt; /* increase in packets */ /* RFC3465: ABC Slow start * Increase only after a full MSS of bytes is acked * * TCP sender SHOULD increase cwnd by the number of * previously unacknowledged bytes ACKed by each incoming * acknowledgment, provided the increase is not more than L */ if (sysctl_tcp_abc && tp->bytes_acked < tp->mss_cache) return; if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh) cnt = sysctl_tcp_max_ssthresh >> 1; /* limited slow start */ |
886236c12 [TCP]: Add RFC374... |
306 |
else |
a02ba0416 [TCP] slow start:... |
307 |
cnt = tp->snd_cwnd; /* exponential increase */ |
886236c12 [TCP]: Add RFC374... |
308 |
|
a02ba0416 [TCP] slow start:... |
309 310 311 |
/* RFC3465: ABC * We MAY increase by 2 if discovered delayed ack */ |
886236c12 [TCP]: Add RFC374... |
312 313 |
if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) cnt <<= 1; |
40efc6fa1 [TCP]: less inline's |
314 |
tp->bytes_acked = 0; |
886236c12 [TCP]: Add RFC374... |
315 316 317 318 319 320 |
tp->snd_cwnd_cnt += cnt; while (tp->snd_cwnd_cnt >= tp->snd_cwnd) { tp->snd_cwnd_cnt -= tp->snd_cwnd; if (tp->snd_cwnd < tp->snd_cwnd_clamp) tp->snd_cwnd++; } |
40efc6fa1 [TCP]: less inline's |
321 322 |
} EXPORT_SYMBOL_GPL(tcp_slow_start); |
758ce5c8d tcp: add helper f... |
323 324 325 326 327 328 329 330 331 332 333 334 |
/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w) */ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w) { if (tp->snd_cwnd_cnt >= w) { if (tp->snd_cwnd < tp->snd_cwnd_clamp) tp->snd_cwnd++; tp->snd_cwnd_cnt = 0; } else { tp->snd_cwnd_cnt++; } } EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); |
317a76f9a [TCP]: Add plugga... |
335 336 337 338 339 340 341 |
/* * TCP Reno congestion control * This is special case used for fallback as well. */ /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ |
c3a05c605 [TCP]: Cong.ctrl ... |
342 |
void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) |
317a76f9a [TCP]: Add plugga... |
343 |
{ |
6687e988d [ICSK]: Move TCP ... |
344 |
struct tcp_sock *tp = tcp_sk(sk); |
f4805eded [TCP]: fix conges... |
345 |
if (!tcp_is_cwnd_limited(sk, in_flight)) |
317a76f9a [TCP]: Add plugga... |
346 |
return; |
7faffa1c7 [TCP]: add tcp_sl... |
347 |
/* In "safe" area, increase. */ |
e905a9eda [NET] IPV4: Fix w... |
348 |
if (tp->snd_cwnd <= tp->snd_ssthresh) |
7faffa1c7 [TCP]: add tcp_sl... |
349 |
tcp_slow_start(tp); |
9772efb97 [TCP]: Appropriat... |
350 |
|
e905a9eda [NET] IPV4: Fix w... |
351 |
/* In dangerous area, increase slowly. */ |
9772efb97 [TCP]: Appropriat... |
352 |
else if (sysctl_tcp_abc) { |
e905a9eda [NET] IPV4: Fix w... |
353 354 355 356 357 358 359 360 361 |
/* RFC3465: Appropriate Byte Count * increase once for each full cwnd acked */ if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) { tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache; if (tp->snd_cwnd < tp->snd_cwnd_clamp) tp->snd_cwnd++; } } else { |
758ce5c8d tcp: add helper f... |
362 |
tcp_cong_avoid_ai(tp, tp->snd_cwnd); |
e905a9eda [NET] IPV4: Fix w... |
363 |
} |
317a76f9a [TCP]: Add plugga... |
364 365 366 367 |
} EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); /* Slow start threshold is half the congestion window (min 2) */ |
6687e988d [ICSK]: Move TCP ... |
368 |
u32 tcp_reno_ssthresh(struct sock *sk) |
317a76f9a [TCP]: Add plugga... |
369 |
{ |
6687e988d [ICSK]: Move TCP ... |
370 |
const struct tcp_sock *tp = tcp_sk(sk); |
317a76f9a [TCP]: Add plugga... |
371 372 373 |
return max(tp->snd_cwnd >> 1U, 2U); } EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); |
72dc5b922 [TCP]: Minimum co... |
374 375 |
/* Lower bound on congestion window with halving. */ u32 tcp_reno_min_cwnd(const struct sock *sk) |
317a76f9a [TCP]: Add plugga... |
376 |
{ |
6687e988d [ICSK]: Move TCP ... |
377 |
const struct tcp_sock *tp = tcp_sk(sk); |
317a76f9a [TCP]: Add plugga... |
378 379 380 381 382 |
return tp->snd_ssthresh/2; } EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); struct tcp_congestion_ops tcp_reno = { |
164891aad [TCP]: Congestion... |
383 |
.flags = TCP_CONG_NON_RESTRICTED, |
317a76f9a [TCP]: Add plugga... |
384 385 386 387 388 389 |
.name = "reno", .owner = THIS_MODULE, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, .min_cwnd = tcp_reno_min_cwnd, }; |
5f8ef48d2 [TCP]: Allow choo... |
390 391 392 393 394 395 396 397 398 399 400 401 |
/* Initial congestion control used (until SYN) * really reno under another name so we can tell difference * during tcp_set_default_congestion_control */ struct tcp_congestion_ops tcp_init_congestion_ops = { .name = "", .owner = THIS_MODULE, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, .min_cwnd = tcp_reno_min_cwnd, }; EXPORT_SYMBOL_GPL(tcp_init_congestion_ops); |