Blame view

net/ipv4/tcp_cong.c 9.44 KB
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
1
2
3
  /*
   * Plugable TCP congestion control support and newReno
   * congestion control.
02582e9bc   Masanari Iida   treewide: fix typ...
4
   * Based on ideas from I/O scheduler support and Web100.
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
5
6
7
   *
   * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org>
   */
afd465030   Joe Perches   net: ipv4: Standa...
8
  #define pr_fmt(fmt) "TCP: " fmt
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
9
10
11
12
  #include <linux/module.h>
  #include <linux/mm.h>
  #include <linux/types.h>
  #include <linux/list.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
13
  #include <linux/gfp.h>
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
14
15
16
17
18
19
20
21
22
  #include <net/tcp.h>
  
  static DEFINE_SPINLOCK(tcp_cong_list_lock);
  static LIST_HEAD(tcp_cong_list);
  
  /* Simple linear search, don't expect many entries! */
  static struct tcp_congestion_ops *tcp_ca_find(const char *name)
  {
  	struct tcp_congestion_ops *e;
5f8ef48d2   Stephen Hemminger   [TCP]: Allow choo...
23
  	list_for_each_entry_rcu(e, &tcp_cong_list, list) {
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
24
25
26
27
28
29
30
31
  		if (strcmp(e->name, name) == 0)
  			return e;
  	}
  
  	return NULL;
  }
  
  /*
d08df601a   Robert P. J. Day   Various typo fixes.
32
   * Attach new congestion control algorithm to the list
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
33
34
35
36
37
38
39
   * of available options.
   */
  int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
  {
  	int ret = 0;
  
  	/* all algorithms must implement ssthresh and cong_avoid ops */
72dc5b922   Stephen Hemminger   [TCP]: Minimum co...
40
  	if (!ca->ssthresh || !ca->cong_avoid) {
afd465030   Joe Perches   net: ipv4: Standa...
41
42
  		pr_err("%s does not implement required ops
  ", ca->name);
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
43
44
45
46
47
  		return -EINVAL;
  	}
  
  	spin_lock(&tcp_cong_list_lock);
  	if (tcp_ca_find(ca->name)) {
afd465030   Joe Perches   net: ipv4: Standa...
48
49
  		pr_notice("%s already registered
  ", ca->name);
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
50
51
  		ret = -EEXIST;
  	} else {
3d2573f7e   Stephen Hemminger   [TCP]: default co...
52
  		list_add_tail_rcu(&ca->list, &tcp_cong_list);
afd465030   Joe Perches   net: ipv4: Standa...
53
54
  		pr_info("%s registered
  ", ca->name);
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
  	}
  	spin_unlock(&tcp_cong_list_lock);
  
  	return ret;
  }
  EXPORT_SYMBOL_GPL(tcp_register_congestion_control);
  
  /*
   * Remove congestion control algorithm, called from
   * the module's remove function.  Module ref counts are used
   * to ensure that this can't be done till all sockets using
   * that method are closed.
   */
  void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
  {
  	spin_lock(&tcp_cong_list_lock);
  	list_del_rcu(&ca->list);
  	spin_unlock(&tcp_cong_list_lock);
  }
  EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
  
  /* Assign choice of congestion control. */
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
77
  void tcp_init_congestion_control(struct sock *sk)
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
78
  {
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
79
  	struct inet_connection_sock *icsk = inet_csk(sk);
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
80
  	struct tcp_congestion_ops *ca;
4d4d3d1e8   Stephen Hemminger   [TCP]: Congestion...
81
82
83
84
85
86
87
88
  	/* if no choice made yet assign the current value set as default */
  	if (icsk->icsk_ca_ops == &tcp_init_congestion_ops) {
  		rcu_read_lock();
  		list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
  			if (try_module_get(ca->owner)) {
  				icsk->icsk_ca_ops = ca;
  				break;
  			}
5f8ef48d2   Stephen Hemminger   [TCP]: Allow choo...
89

4d4d3d1e8   Stephen Hemminger   [TCP]: Congestion...
90
  			/* fallback to next available */
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
91
  		}
4d4d3d1e8   Stephen Hemminger   [TCP]: Congestion...
92
  		rcu_read_unlock();
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
93
  	}
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
94

6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
95
96
  	if (icsk->icsk_ca_ops->init)
  		icsk->icsk_ca_ops->init(sk);
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
97
98
99
  }
  
  /* Manage refcounts on socket close. */
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
100
  void tcp_cleanup_congestion_control(struct sock *sk)
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
101
  {
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
102
103
104
105
106
  	struct inet_connection_sock *icsk = inet_csk(sk);
  
  	if (icsk->icsk_ca_ops->release)
  		icsk->icsk_ca_ops->release(sk);
  	module_put(icsk->icsk_ca_ops->owner);
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
107
108
109
110
111
112
113
114
115
116
  }
  
  /* Used by sysctl to change default congestion control */
  int tcp_set_default_congestion_control(const char *name)
  {
  	struct tcp_congestion_ops *ca;
  	int ret = -ENOENT;
  
  	spin_lock(&tcp_cong_list_lock);
  	ca = tcp_ca_find(name);
95a5afca4   Johannes Berg   net: Remove CONFI...
117
  #ifdef CONFIG_MODULES
a8f80e8ff   Eric Paris   Networking: use C...
118
  	if (!ca && capable(CAP_NET_ADMIN)) {
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
119
120
121
122
123
124
125
126
127
  		spin_unlock(&tcp_cong_list_lock);
  
  		request_module("tcp_%s", name);
  		spin_lock(&tcp_cong_list_lock);
  		ca = tcp_ca_find(name);
  	}
  #endif
  
  	if (ca) {
164891aad   Stephen Hemminger   [TCP]: Congestion...
128
  		ca->flags |= TCP_CONG_NON_RESTRICTED;	/* default is always allowed */
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
129
130
131
132
133
134
135
  		list_move(&ca->list, &tcp_cong_list);
  		ret = 0;
  	}
  	spin_unlock(&tcp_cong_list_lock);
  
  	return ret;
  }
b1736a714   Stephen Hemminger   [TCP]: Set defaul...
136
137
138
139
140
141
  /* Set default value from kernel configuration at bootup */
  static int __init tcp_congestion_default(void)
  {
  	return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG);
  }
  late_initcall(tcp_congestion_default);
3ff825b28   Stephen Hemminger   [TCP]: Add tcp_av...
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
  /* Build string with list of available congestion control values */
  void tcp_get_available_congestion_control(char *buf, size_t maxlen)
  {
  	struct tcp_congestion_ops *ca;
  	size_t offs = 0;
  
  	rcu_read_lock();
  	list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
  		offs += snprintf(buf + offs, maxlen - offs,
  				 "%s%s",
  				 offs == 0 ? "" : " ", ca->name);
  
  	}
  	rcu_read_unlock();
  }
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
157
158
159
160
161
162
163
164
165
166
167
168
  /* Get current default congestion control */
  void tcp_get_default_congestion_control(char *name)
  {
  	struct tcp_congestion_ops *ca;
  	/* We will always have reno... */
  	BUG_ON(list_empty(&tcp_cong_list));
  
  	rcu_read_lock();
  	ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list);
  	strncpy(name, ca->name, TCP_CA_NAME_MAX);
  	rcu_read_unlock();
  }
ce7bc3bf1   Stephen Hemminger   [TCP]: Restrict c...
169
170
171
172
173
174
175
176
177
  /* Built list of non-restricted congestion control values */
  void tcp_get_allowed_congestion_control(char *buf, size_t maxlen)
  {
  	struct tcp_congestion_ops *ca;
  	size_t offs = 0;
  
  	*buf = '\0';
  	rcu_read_lock();
  	list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
164891aad   Stephen Hemminger   [TCP]: Congestion...
178
  		if (!(ca->flags & TCP_CONG_NON_RESTRICTED))
ce7bc3bf1   Stephen Hemminger   [TCP]: Restrict c...
179
180
181
182
183
184
185
186
187
188
189
190
191
  			continue;
  		offs += snprintf(buf + offs, maxlen - offs,
  				 "%s%s",
  				 offs == 0 ? "" : " ", ca->name);
  
  	}
  	rcu_read_unlock();
  }
  
  /* Change list of non-restricted congestion control */
  int tcp_set_allowed_congestion_control(char *val)
  {
  	struct tcp_congestion_ops *ca;
c34186ed0   Julia Lawall   net/ipv4: Elimina...
192
  	char *saved_clone, *clone, *name;
ce7bc3bf1   Stephen Hemminger   [TCP]: Restrict c...
193
  	int ret = 0;
c34186ed0   Julia Lawall   net/ipv4: Elimina...
194
  	saved_clone = clone = kstrdup(val, GFP_USER);
ce7bc3bf1   Stephen Hemminger   [TCP]: Restrict c...
195
196
197
198
199
200
201
202
203
204
205
206
  	if (!clone)
  		return -ENOMEM;
  
  	spin_lock(&tcp_cong_list_lock);
  	/* pass 1 check for bad entries */
  	while ((name = strsep(&clone, " ")) && *name) {
  		ca = tcp_ca_find(name);
  		if (!ca) {
  			ret = -ENOENT;
  			goto out;
  		}
  	}
164891aad   Stephen Hemminger   [TCP]: Congestion...
207
  	/* pass 2 clear old values */
ce7bc3bf1   Stephen Hemminger   [TCP]: Restrict c...
208
  	list_for_each_entry_rcu(ca, &tcp_cong_list, list)
164891aad   Stephen Hemminger   [TCP]: Congestion...
209
  		ca->flags &= ~TCP_CONG_NON_RESTRICTED;
ce7bc3bf1   Stephen Hemminger   [TCP]: Restrict c...
210
211
212
213
214
215
  
  	/* pass 3 mark as allowed */
  	while ((name = strsep(&val, " ")) && *name) {
  		ca = tcp_ca_find(name);
  		WARN_ON(!ca);
  		if (ca)
164891aad   Stephen Hemminger   [TCP]: Congestion...
216
  			ca->flags |= TCP_CONG_NON_RESTRICTED;
ce7bc3bf1   Stephen Hemminger   [TCP]: Restrict c...
217
218
219
  	}
  out:
  	spin_unlock(&tcp_cong_list_lock);
c34186ed0   Julia Lawall   net/ipv4: Elimina...
220
  	kfree(saved_clone);
ce7bc3bf1   Stephen Hemminger   [TCP]: Restrict c...
221
222
223
  
  	return ret;
  }
5f8ef48d2   Stephen Hemminger   [TCP]: Allow choo...
224
  /* Change congestion control for socket */
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
225
  int tcp_set_congestion_control(struct sock *sk, const char *name)
5f8ef48d2   Stephen Hemminger   [TCP]: Allow choo...
226
  {
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
227
  	struct inet_connection_sock *icsk = inet_csk(sk);
5f8ef48d2   Stephen Hemminger   [TCP]: Allow choo...
228
229
230
231
232
  	struct tcp_congestion_ops *ca;
  	int err = 0;
  
  	rcu_read_lock();
  	ca = tcp_ca_find(name);
4d4d3d1e8   Stephen Hemminger   [TCP]: Congestion...
233

35bfbc940   Stephen Hemminger   [TCP]: Allow auto...
234
  	/* no change asking for existing value */
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
235
  	if (ca == icsk->icsk_ca_ops)
5f8ef48d2   Stephen Hemminger   [TCP]: Allow choo...
236
  		goto out;
95a5afca4   Johannes Berg   net: Remove CONFI...
237
  #ifdef CONFIG_MODULES
35bfbc940   Stephen Hemminger   [TCP]: Allow auto...
238
  	/* not found attempt to autoload module */
a8f80e8ff   Eric Paris   Networking: use C...
239
  	if (!ca && capable(CAP_NET_ADMIN)) {
35bfbc940   Stephen Hemminger   [TCP]: Allow auto...
240
241
242
243
244
245
  		rcu_read_unlock();
  		request_module("tcp_%s", name);
  		rcu_read_lock();
  		ca = tcp_ca_find(name);
  	}
  #endif
5f8ef48d2   Stephen Hemminger   [TCP]: Allow choo...
246
247
  	if (!ca)
  		err = -ENOENT;
52e804c6d   Eric W. Biederman   net: Allow userns...
248
249
  	else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
  		   ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)))
ce7bc3bf1   Stephen Hemminger   [TCP]: Restrict c...
250
  		err = -EPERM;
5f8ef48d2   Stephen Hemminger   [TCP]: Allow choo...
251
252
253
254
  	else if (!try_module_get(ca->owner))
  		err = -EBUSY;
  
  	else {
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
255
256
  		tcp_cleanup_congestion_control(sk);
  		icsk->icsk_ca_ops = ca;
4d4d3d1e8   Stephen Hemminger   [TCP]: Congestion...
257
258
  
  		if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init)
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
259
  			icsk->icsk_ca_ops->init(sk);
5f8ef48d2   Stephen Hemminger   [TCP]: Allow choo...
260
261
262
263
264
  	}
   out:
  	rcu_read_unlock();
  	return err;
  }
cea14e0ed   Ilpo Järvinen   [TCP]: Uninline t...
265
266
267
  /* RFC2861 Check whether we are limited by application or congestion window
   * This is the inverse of cwnd check in tcp_tso_should_defer
   */
a2a385d62   Eric Dumazet   tcp: bool convers...
268
  bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
cea14e0ed   Ilpo Järvinen   [TCP]: Uninline t...
269
270
271
272
273
  {
  	const struct tcp_sock *tp = tcp_sk(sk);
  	u32 left;
  
  	if (in_flight >= tp->snd_cwnd)
a2a385d62   Eric Dumazet   tcp: bool convers...
274
  		return true;
cea14e0ed   Ilpo Järvinen   [TCP]: Uninline t...
275

cea14e0ed   Ilpo Järvinen   [TCP]: Uninline t...
276
  	left = tp->snd_cwnd - in_flight;
ce447eb91   John Heffner   tcp: Allow send-l...
277
  	if (sk_can_gso(sk) &&
246eb2af0   John Heffner   tcp: Limit cwnd g...
278
  	    left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
d10473d4e   Eric Dumazet   tcp: reduce the b...
279
  	    left < tp->xmit_size_goal_segs)
a2a385d62   Eric Dumazet   tcp: bool convers...
280
  		return true;
6b5a5c0db   Neal Cardwell   tcp: do not scale...
281
  	return left <= tcp_max_tso_deferred_mss(tp);
cea14e0ed   Ilpo Järvinen   [TCP]: Uninline t...
282
283
  }
  EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited);
40efc6fa1   Stephen Hemminger   [TCP]: less inline's
284

9f9843a75   Yuchung Cheng   tcp: properly han...
285
286
287
288
289
290
291
292
  /* Slow start is used when congestion window is no greater than the slow start
   * threshold. We base on RFC2581 and also handle stretch ACKs properly.
   * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but
   * something better;) a packet is only considered (s)acked in its entirety to
   * defend the ACK attacks described in the RFC. Slow start processes a stretch
   * ACK of degree N as if N acks of degree 1 are received back to back except
   * ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and
   * returns the leftover acks to adjust cwnd in congestion avoidance mode.
40efc6fa1   Stephen Hemminger   [TCP]: less inline's
293
   */
9f9843a75   Yuchung Cheng   tcp: properly han...
294
  int tcp_slow_start(struct tcp_sock *tp, u32 acked)
40efc6fa1   Stephen Hemminger   [TCP]: less inline's
295
  {
9f9843a75   Yuchung Cheng   tcp: properly han...
296
  	u32 cwnd = tp->snd_cwnd + acked;
a02ba0416   Stephen Hemminger   [TCP] slow start:...
297

9f9843a75   Yuchung Cheng   tcp: properly han...
298
299
300
301
302
  	if (cwnd > tp->snd_ssthresh)
  		cwnd = tp->snd_ssthresh + 1;
  	acked -= cwnd - tp->snd_cwnd;
  	tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
  	return acked;
40efc6fa1   Stephen Hemminger   [TCP]: less inline's
303
304
  }
  EXPORT_SYMBOL_GPL(tcp_slow_start);
758ce5c8d   Ilpo Järvinen   tcp: add helper f...
305
306
307
308
309
310
311
312
313
314
315
316
  /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w) */
  void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w)
  {
  	if (tp->snd_cwnd_cnt >= w) {
  		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
  			tp->snd_cwnd++;
  		tp->snd_cwnd_cnt = 0;
  	} else {
  		tp->snd_cwnd_cnt++;
  	}
  }
  EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
317
318
319
320
321
322
323
  /*
   * TCP Reno congestion control
   * This is special case used for fallback as well.
   */
  /* This is Jacobson's slow start and congestion avoidance.
   * SIGCOMM '88, p. 328.
   */
9f9843a75   Yuchung Cheng   tcp: properly han...
324
  void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
325
  {
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
326
  	struct tcp_sock *tp = tcp_sk(sk);
f4805eded   Stephen Hemminger   [TCP]: fix conges...
327
  	if (!tcp_is_cwnd_limited(sk, in_flight))
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
328
  		return;
7faffa1c7   Stephen Hemminger   [TCP]: add tcp_sl...
329
  	/* In "safe" area, increase. */
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
330
  	if (tp->snd_cwnd <= tp->snd_ssthresh)
9f9843a75   Yuchung Cheng   tcp: properly han...
331
  		tcp_slow_start(tp, acked);
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
332
  	/* In dangerous area, increase slowly. */
ca2eb5679   Stephen Hemminger   tcp: remove Appro...
333
  	else
758ce5c8d   Ilpo Järvinen   tcp: add helper f...
334
  		tcp_cong_avoid_ai(tp, tp->snd_cwnd);
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
335
336
337
338
  }
  EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
  
  /* Slow start threshold is half the congestion window (min 2) */
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
339
  u32 tcp_reno_ssthresh(struct sock *sk)
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
340
  {
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
341
  	const struct tcp_sock *tp = tcp_sk(sk);
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
342
343
344
  	return max(tp->snd_cwnd >> 1U, 2U);
  }
  EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
345
  struct tcp_congestion_ops tcp_reno = {
164891aad   Stephen Hemminger   [TCP]: Congestion...
346
  	.flags		= TCP_CONG_NON_RESTRICTED,
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
347
348
349
350
  	.name		= "reno",
  	.owner		= THIS_MODULE,
  	.ssthresh	= tcp_reno_ssthresh,
  	.cong_avoid	= tcp_reno_cong_avoid,
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
351
  };
5f8ef48d2   Stephen Hemminger   [TCP]: Allow choo...
352
353
354
355
356
357
358
359
360
  /* Initial congestion control used (until SYN)
   * really reno under another name so we can tell difference
   * during tcp_set_default_congestion_control
   */
  struct tcp_congestion_ops tcp_init_congestion_ops  = {
  	.name		= "",
  	.owner		= THIS_MODULE,
  	.ssthresh	= tcp_reno_ssthresh,
  	.cong_avoid	= tcp_reno_cong_avoid,
5f8ef48d2   Stephen Hemminger   [TCP]: Allow choo...
361
362
  };
  EXPORT_SYMBOL_GPL(tcp_init_congestion_ops);