Blame view

net/ipv4/tcp_cong.c 9.65 KB
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
1
2
3
4
5
6
7
  /*
   * Plugable TCP congestion control support and newReno
   * congestion control.
   * Based on ideas from I/O scheduler suport and Web100.
   *
   * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org>
   */
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
8
9
10
11
12
  #include <linux/module.h>
  #include <linux/mm.h>
  #include <linux/types.h>
  #include <linux/list.h>
  #include <net/tcp.h>
886236c12   John Heffner   [TCP]: Add RFC374...
13
  int sysctl_tcp_max_ssthresh = 0;
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
14
15
16
17
18
19
20
  static DEFINE_SPINLOCK(tcp_cong_list_lock);
  static LIST_HEAD(tcp_cong_list);
  
  /* Simple linear search, don't expect many entries! */
  static struct tcp_congestion_ops *tcp_ca_find(const char *name)
  {
  	struct tcp_congestion_ops *e;
5f8ef48d2   Stephen Hemminger   [TCP]: Allow choo...
21
  	list_for_each_entry_rcu(e, &tcp_cong_list, list) {
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
22
23
24
25
26
27
28
29
  		if (strcmp(e->name, name) == 0)
  			return e;
  	}
  
  	return NULL;
  }
  
  /*
d08df601a   Robert P. J. Day   Various typo fixes.
30
   * Attach new congestion control algorithm to the list
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
31
32
33
34
35
36
37
   * of available options.
   */
  int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
  {
  	int ret = 0;
  
  	/* all algorithms must implement ssthresh and cong_avoid ops */
72dc5b922   Stephen Hemminger   [TCP]: Minimum co...
38
  	if (!ca->ssthresh || !ca->cong_avoid) {
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
39
40
41
42
43
44
45
46
47
48
49
50
  		printk(KERN_ERR "TCP %s does not implement required ops
  ",
  		       ca->name);
  		return -EINVAL;
  	}
  
  	spin_lock(&tcp_cong_list_lock);
  	if (tcp_ca_find(ca->name)) {
  		printk(KERN_NOTICE "TCP %s already registered
  ", ca->name);
  		ret = -EEXIST;
  	} else {
3d2573f7e   Stephen Hemminger   [TCP]: default co...
51
  		list_add_tail_rcu(&ca->list, &tcp_cong_list);
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
  		printk(KERN_INFO "TCP %s registered
  ", ca->name);
  	}
  	spin_unlock(&tcp_cong_list_lock);
  
  	return ret;
  }
  EXPORT_SYMBOL_GPL(tcp_register_congestion_control);
  
  /*
   * Remove congestion control algorithm, called from
   * the module's remove function.  Module ref counts are used
   * to ensure that this can't be done till all sockets using
   * that method are closed.
   */
  void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
  {
  	spin_lock(&tcp_cong_list_lock);
  	list_del_rcu(&ca->list);
  	spin_unlock(&tcp_cong_list_lock);
  }
  EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
  
  /* Assign choice of congestion control. */
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
76
  void tcp_init_congestion_control(struct sock *sk)
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
77
  {
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
78
  	struct inet_connection_sock *icsk = inet_csk(sk);
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
79
  	struct tcp_congestion_ops *ca;
4d4d3d1e8   Stephen Hemminger   [TCP]: Congestion...
80
81
82
83
84
85
86
87
  	/* if no choice made yet assign the current value set as default */
  	if (icsk->icsk_ca_ops == &tcp_init_congestion_ops) {
  		rcu_read_lock();
  		list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
  			if (try_module_get(ca->owner)) {
  				icsk->icsk_ca_ops = ca;
  				break;
  			}
5f8ef48d2   Stephen Hemminger   [TCP]: Allow choo...
88

4d4d3d1e8   Stephen Hemminger   [TCP]: Congestion...
89
  			/* fallback to next available */
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
90
  		}
4d4d3d1e8   Stephen Hemminger   [TCP]: Congestion...
91
  		rcu_read_unlock();
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
92
  	}
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
93

6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
94
95
  	if (icsk->icsk_ca_ops->init)
  		icsk->icsk_ca_ops->init(sk);
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
96
97
98
  }
  
  /* Manage refcounts on socket close. */
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
99
  void tcp_cleanup_congestion_control(struct sock *sk)
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
100
  {
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
101
102
103
104
105
  	struct inet_connection_sock *icsk = inet_csk(sk);
  
  	if (icsk->icsk_ca_ops->release)
  		icsk->icsk_ca_ops->release(sk);
  	module_put(icsk->icsk_ca_ops->owner);
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
106
107
108
109
110
111
112
113
114
115
116
  }
  
  /* Used by sysctl to change default congestion control */
  int tcp_set_default_congestion_control(const char *name)
  {
  	struct tcp_congestion_ops *ca;
  	int ret = -ENOENT;
  
  	spin_lock(&tcp_cong_list_lock);
  	ca = tcp_ca_find(name);
  #ifdef CONFIG_KMOD
35bfbc940   Stephen Hemminger   [TCP]: Allow auto...
117
  	if (!ca && capable(CAP_SYS_MODULE)) {
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
118
119
120
121
122
123
124
125
126
  		spin_unlock(&tcp_cong_list_lock);
  
  		request_module("tcp_%s", name);
  		spin_lock(&tcp_cong_list_lock);
  		ca = tcp_ca_find(name);
  	}
  #endif
  
  	if (ca) {
164891aad   Stephen Hemminger   [TCP]: Congestion...
127
  		ca->flags |= TCP_CONG_NON_RESTRICTED;	/* default is always allowed */
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
128
129
130
131
132
133
134
  		list_move(&ca->list, &tcp_cong_list);
  		ret = 0;
  	}
  	spin_unlock(&tcp_cong_list_lock);
  
  	return ret;
  }
b1736a714   Stephen Hemminger   [TCP]: Set defaul...
135
136
137
138
139
140
  /* Set default value from kernel configuration at bootup */
  static int __init tcp_congestion_default(void)
  {
  	return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG);
  }
  late_initcall(tcp_congestion_default);
3ff825b28   Stephen Hemminger   [TCP]: Add tcp_av...
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
  /* Build string with list of available congestion control values */
  void tcp_get_available_congestion_control(char *buf, size_t maxlen)
  {
  	struct tcp_congestion_ops *ca;
  	size_t offs = 0;
  
  	rcu_read_lock();
  	list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
  		offs += snprintf(buf + offs, maxlen - offs,
  				 "%s%s",
  				 offs == 0 ? "" : " ", ca->name);
  
  	}
  	rcu_read_unlock();
  }
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
156
157
158
159
160
161
162
163
164
165
166
167
  /* Get current default congestion control */
  void tcp_get_default_congestion_control(char *name)
  {
  	struct tcp_congestion_ops *ca;
  	/* We will always have reno... */
  	BUG_ON(list_empty(&tcp_cong_list));
  
  	rcu_read_lock();
  	ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list);
  	strncpy(name, ca->name, TCP_CA_NAME_MAX);
  	rcu_read_unlock();
  }
ce7bc3bf1   Stephen Hemminger   [TCP]: Restrict c...
168
169
170
171
172
173
174
175
176
  /* Built list of non-restricted congestion control values */
  void tcp_get_allowed_congestion_control(char *buf, size_t maxlen)
  {
  	struct tcp_congestion_ops *ca;
  	size_t offs = 0;
  
  	*buf = '\0';
  	rcu_read_lock();
  	list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
164891aad   Stephen Hemminger   [TCP]: Congestion...
177
  		if (!(ca->flags & TCP_CONG_NON_RESTRICTED))
ce7bc3bf1   Stephen Hemminger   [TCP]: Restrict c...
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
  			continue;
  		offs += snprintf(buf + offs, maxlen - offs,
  				 "%s%s",
  				 offs == 0 ? "" : " ", ca->name);
  
  	}
  	rcu_read_unlock();
  }
  
  /* Change list of non-restricted congestion control */
  int tcp_set_allowed_congestion_control(char *val)
  {
  	struct tcp_congestion_ops *ca;
  	char *clone, *name;
  	int ret = 0;
  
  	clone = kstrdup(val, GFP_USER);
  	if (!clone)
  		return -ENOMEM;
  
  	spin_lock(&tcp_cong_list_lock);
  	/* pass 1 check for bad entries */
  	while ((name = strsep(&clone, " ")) && *name) {
  		ca = tcp_ca_find(name);
  		if (!ca) {
  			ret = -ENOENT;
  			goto out;
  		}
  	}
164891aad   Stephen Hemminger   [TCP]: Congestion...
207
  	/* pass 2 clear old values */
ce7bc3bf1   Stephen Hemminger   [TCP]: Restrict c...
208
  	list_for_each_entry_rcu(ca, &tcp_cong_list, list)
164891aad   Stephen Hemminger   [TCP]: Congestion...
209
  		ca->flags &= ~TCP_CONG_NON_RESTRICTED;
ce7bc3bf1   Stephen Hemminger   [TCP]: Restrict c...
210
211
212
213
214
215
  
  	/* pass 3 mark as allowed */
  	while ((name = strsep(&val, " ")) && *name) {
  		ca = tcp_ca_find(name);
  		WARN_ON(!ca);
  		if (ca)
164891aad   Stephen Hemminger   [TCP]: Congestion...
216
  			ca->flags |= TCP_CONG_NON_RESTRICTED;
ce7bc3bf1   Stephen Hemminger   [TCP]: Restrict c...
217
218
219
220
221
222
  	}
  out:
  	spin_unlock(&tcp_cong_list_lock);
  
  	return ret;
  }
5f8ef48d2   Stephen Hemminger   [TCP]: Allow choo...
223
  /* Change congestion control for socket */
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
224
  int tcp_set_congestion_control(struct sock *sk, const char *name)
5f8ef48d2   Stephen Hemminger   [TCP]: Allow choo...
225
  {
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
226
  	struct inet_connection_sock *icsk = inet_csk(sk);
5f8ef48d2   Stephen Hemminger   [TCP]: Allow choo...
227
228
229
230
231
  	struct tcp_congestion_ops *ca;
  	int err = 0;
  
  	rcu_read_lock();
  	ca = tcp_ca_find(name);
4d4d3d1e8   Stephen Hemminger   [TCP]: Congestion...
232

35bfbc940   Stephen Hemminger   [TCP]: Allow auto...
233
  	/* no change asking for existing value */
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
234
  	if (ca == icsk->icsk_ca_ops)
5f8ef48d2   Stephen Hemminger   [TCP]: Allow choo...
235
  		goto out;
35bfbc940   Stephen Hemminger   [TCP]: Allow auto...
236
237
238
239
240
241
242
243
244
  #ifdef CONFIG_KMOD
  	/* not found attempt to autoload module */
  	if (!ca && capable(CAP_SYS_MODULE)) {
  		rcu_read_unlock();
  		request_module("tcp_%s", name);
  		rcu_read_lock();
  		ca = tcp_ca_find(name);
  	}
  #endif
5f8ef48d2   Stephen Hemminger   [TCP]: Allow choo...
245
246
  	if (!ca)
  		err = -ENOENT;
164891aad   Stephen Hemminger   [TCP]: Congestion...
247
  	else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || capable(CAP_NET_ADMIN)))
ce7bc3bf1   Stephen Hemminger   [TCP]: Restrict c...
248
  		err = -EPERM;
5f8ef48d2   Stephen Hemminger   [TCP]: Allow choo...
249
250
251
252
  	else if (!try_module_get(ca->owner))
  		err = -EBUSY;
  
  	else {
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
253
254
  		tcp_cleanup_congestion_control(sk);
  		icsk->icsk_ca_ops = ca;
4d4d3d1e8   Stephen Hemminger   [TCP]: Congestion...
255
256
  
  		if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init)
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
257
  			icsk->icsk_ca_ops->init(sk);
5f8ef48d2   Stephen Hemminger   [TCP]: Allow choo...
258
259
260
261
262
  	}
   out:
  	rcu_read_unlock();
  	return err;
  }
40efc6fa1   Stephen Hemminger   [TCP]: less inline's
263
264
  
  /*
a02ba0416   Stephen Hemminger   [TCP] slow start:...
265
266
267
268
269
   * Slow start is used when congestion window is less than slow start
   * threshold. This version implements the basic RFC2581 version
   * and optionally supports:
   * 	RFC3742 Limited Slow Start  	  - growth limited to max_ssthresh
   *	RFC3465 Appropriate Byte Counting - growth limited by bytes acknowledged
40efc6fa1   Stephen Hemminger   [TCP]: less inline's
270
271
272
   */
  void tcp_slow_start(struct tcp_sock *tp)
  {
a02ba0416   Stephen Hemminger   [TCP] slow start:...
273
274
275
276
277
278
279
280
281
282
283
284
285
286
  	int cnt; /* increase in packets */
  
  	/* RFC3465: ABC Slow start
  	 * Increase only after a full MSS of bytes is acked
  	 *
  	 * TCP sender SHOULD increase cwnd by the number of
  	 * previously unacknowledged bytes ACKed by each incoming
  	 * acknowledgment, provided the increase is not more than L
  	 */
  	if (sysctl_tcp_abc && tp->bytes_acked < tp->mss_cache)
  		return;
  
  	if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh)
  		cnt = sysctl_tcp_max_ssthresh >> 1;	/* limited slow start */
886236c12   John Heffner   [TCP]: Add RFC374...
287
  	else
a02ba0416   Stephen Hemminger   [TCP] slow start:...
288
  		cnt = tp->snd_cwnd;			/* exponential increase */
886236c12   John Heffner   [TCP]: Add RFC374...
289

a02ba0416   Stephen Hemminger   [TCP] slow start:...
290
291
292
  	/* RFC3465: ABC
  	 * We MAY increase by 2 if discovered delayed ack
  	 */
886236c12   John Heffner   [TCP]: Add RFC374...
293
294
  	if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache)
  		cnt <<= 1;
40efc6fa1   Stephen Hemminger   [TCP]: less inline's
295
  	tp->bytes_acked = 0;
886236c12   John Heffner   [TCP]: Add RFC374...
296
297
298
299
300
301
  	tp->snd_cwnd_cnt += cnt;
  	while (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
  		tp->snd_cwnd_cnt -= tp->snd_cwnd;
  		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
  			tp->snd_cwnd++;
  	}
40efc6fa1   Stephen Hemminger   [TCP]: less inline's
302
303
  }
  EXPORT_SYMBOL_GPL(tcp_slow_start);
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
304
305
306
307
308
309
310
  /*
   * TCP Reno congestion control
   * This is special case used for fallback as well.
   */
  /* This is Jacobson's slow start and congestion avoidance.
   * SIGCOMM '88, p. 328.
   */
16751347a   Stephen Hemminger   [TCP]: remove unu...
311
  void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag)
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
312
  {
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
313
  	struct tcp_sock *tp = tcp_sk(sk);
f4805eded   Stephen Hemminger   [TCP]: fix conges...
314
  	if (!tcp_is_cwnd_limited(sk, in_flight))
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
315
  		return;
7faffa1c7   Stephen Hemminger   [TCP]: add tcp_sl...
316
  	/* In "safe" area, increase. */
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
317
  	if (tp->snd_cwnd <= tp->snd_ssthresh)
7faffa1c7   Stephen Hemminger   [TCP]: add tcp_sl...
318
  		tcp_slow_start(tp);
9772efb97   Stephen Hemminger   [TCP]: Appropriat...
319

e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
320
  	/* In dangerous area, increase slowly. */
9772efb97   Stephen Hemminger   [TCP]: Appropriat...
321
  	else if (sysctl_tcp_abc) {
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
  		/* RFC3465: Appropriate Byte Count
  		 * increase once for each full cwnd acked
  		 */
  		if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
  			tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
  			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
  				tp->snd_cwnd++;
  		}
  	} else {
  		/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
  		if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
  			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
  				tp->snd_cwnd++;
  			tp->snd_cwnd_cnt = 0;
  		} else
  			tp->snd_cwnd_cnt++;
  	}
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
339
340
341
342
  }
  EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
  
  /* Slow start threshold is half the congestion window (min 2) */
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
343
  u32 tcp_reno_ssthresh(struct sock *sk)
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
344
  {
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
345
  	const struct tcp_sock *tp = tcp_sk(sk);
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
346
347
348
  	return max(tp->snd_cwnd >> 1U, 2U);
  }
  EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
72dc5b922   Stephen Hemminger   [TCP]: Minimum co...
349
350
  /* Lower bound on congestion window with halving. */
  u32 tcp_reno_min_cwnd(const struct sock *sk)
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
351
  {
6687e988d   Arnaldo Carvalho de Melo   [ICSK]: Move TCP ...
352
  	const struct tcp_sock *tp = tcp_sk(sk);
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
353
354
355
356
357
  	return tp->snd_ssthresh/2;
  }
  EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
  
  struct tcp_congestion_ops tcp_reno = {
164891aad   Stephen Hemminger   [TCP]: Congestion...
358
  	.flags		= TCP_CONG_NON_RESTRICTED,
317a76f9a   Stephen Hemminger   [TCP]: Add plugga...
359
360
361
362
363
364
  	.name		= "reno",
  	.owner		= THIS_MODULE,
  	.ssthresh	= tcp_reno_ssthresh,
  	.cong_avoid	= tcp_reno_cong_avoid,
  	.min_cwnd	= tcp_reno_min_cwnd,
  };
5f8ef48d2   Stephen Hemminger   [TCP]: Allow choo...
365
366
367
368
369
370
371
372
373
374
375
376
  /* Initial congestion control used (until SYN)
   * really reno under another name so we can tell difference
   * during tcp_set_default_congestion_control
   */
  struct tcp_congestion_ops tcp_init_congestion_ops  = {
  	.name		= "",
  	.owner		= THIS_MODULE,
  	.ssthresh	= tcp_reno_ssthresh,
  	.cong_avoid	= tcp_reno_cong_avoid,
  	.min_cwnd	= tcp_reno_min_cwnd,
  };
  EXPORT_SYMBOL_GPL(tcp_init_congestion_ops);