Blame view

net/ipv4/tcp_cubic.c 15.5 KB
09c434b8a   Thomas Gleixner   treewide: Add SPD...
1
  // SPDX-License-Identifier: GPL-2.0-only
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
2
  /*
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
3
   * TCP CUBIC: Binary Increase Congestion control for TCP v2.3
6b3d62632   Sangtae Ha   [TCP]: TCP cubic ...
4
5
   * Home page:
   *      http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
6
   * This is from the implementation of CUBIC TCP in
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
7
8
9
   * Sangtae Ha, Injong Rhee and Lisong Xu,
   *  "CUBIC: A New TCP-Friendly High-Speed TCP Variant"
   *  in ACM SIGOPS Operating System Review, July 2008.
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
10
   * Available from:
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
11
12
13
14
15
16
17
18
19
20
21
   *  http://netsrv.csc.ncsu.edu/export/cubic_a_new_tcp_2008.pdf
   *
   * CUBIC integrates a new slow start algorithm, called HyStart.
   * The details of HyStart are presented in
   *  Sangtae Ha and Injong Rhee,
   *  "Taming the Elephants: New TCP Slow Start", NCSU TechReport 2008.
   * Available from:
   *  http://netsrv.csc.ncsu.edu/export/hystart_techreport_2008.pdf
   *
   * All testing results are available from:
   * http://netsrv.csc.ncsu.edu/wiki/index.php/TCP_Testing
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
22
23
24
25
   *
   * Unless CUBIC is enabled and congestion window is large
   * this behaves the same as the original Reno.
   */
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
26
27
  #include <linux/mm.h>
  #include <linux/module.h>
6f6d6a1a6   Roman Zippel   rename div64_64 t...
28
  #include <linux/math64.h>
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
29
  #include <net/tcp.h>
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
30
31
32
33
  
  #define BICTCP_BETA_SCALE    1024	/* Scale factor beta calculation
  					 * max_cwnd = snd_cwnd * beta
  					 */
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
34
  #define	BICTCP_HZ		10	/* BIC HZ 2^10 = 1024 */
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
35
36
37
38
39
40
  /* Two methods of hybrid slow start */
  #define HYSTART_ACK_TRAIN	0x1
  #define HYSTART_DELAY		0x2
  
  /* Number of delay samples for detecting the increase of delay */
  #define HYSTART_MIN_SAMPLES	8
cff04e2da   Eric Dumazet   tcp_cubic: switch...
41
42
  #define HYSTART_DELAY_MIN	(4000U)	/* 4 ms */
  #define HYSTART_DELAY_MAX	(16000U)	/* 16 ms */
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
43
  #define HYSTART_DELAY_THRESH(x)	clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
59758f445   Stephen Hemminger   [TCP]: Use read m...
44
  static int fast_convergence __read_mostly = 1;
6b3d62632   Sangtae Ha   [TCP]: TCP cubic ...
45
  static int beta __read_mostly = 717;	/* = 717/1024 (BICTCP_BETA_SCALE) */
66e1e3b20   David S. Miller   [TCP]: Set initia...
46
  static int initial_ssthresh __read_mostly;
59758f445   Stephen Hemminger   [TCP]: Use read m...
47
48
  static int bic_scale __read_mostly = 41;
  static int tcp_friendliness __read_mostly = 1;
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
49

ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
50
51
52
  static int hystart __read_mostly = 1;
  static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY;
  static int hystart_low_window __read_mostly = 16;
cff04e2da   Eric Dumazet   tcp_cubic: switch...
53
  static int hystart_ack_delta_us __read_mostly = 2000;
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
54

59758f445   Stephen Hemminger   [TCP]: Use read m...
55
56
57
  static u32 cube_rtt_scale __read_mostly;
  static u32 beta_scale __read_mostly;
  static u64 cube_factor __read_mostly;
89b3d9aaf   Stephen Hemminger   [TCP] cubic: prec...
58
59
  
  /* Note parameters that are used for precomputing scale factors are read-only */
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
60
61
  module_param(fast_convergence, int, 0644);
  MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
6b3d62632   Sangtae Ha   [TCP]: TCP cubic ...
62
  module_param(beta, int, 0644);
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
63
64
65
  MODULE_PARM_DESC(beta, "beta for multiplicative increase");
  module_param(initial_ssthresh, int, 0644);
  MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
89b3d9aaf   Stephen Hemminger   [TCP] cubic: prec...
66
  module_param(bic_scale, int, 0444);
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
67
68
69
  MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)");
  module_param(tcp_friendliness, int, 0644);
  MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
70
71
72
  module_param(hystart, int, 0644);
  MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm");
  module_param(hystart_detect, int, 0644);
d6ecf3280   Chema Gonzalez   tcp_cubic: fix ty...
73
  MODULE_PARM_DESC(hystart_detect, "hybrid slow start detection mechanisms"
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
74
75
76
  		 " 1: packet-train 2: delay 3: both packet-train and delay");
  module_param(hystart_low_window, int, 0644);
  MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
cff04e2da   Eric Dumazet   tcp_cubic: switch...
77
78
  module_param(hystart_ack_delta_us, int, 0644);
  MODULE_PARM_DESC(hystart_ack_delta_us, "spacing between ack's indicating train (usecs)");
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
79

df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
80
81
82
  /* BIC TCP Parameters */
  struct bictcp {
  	u32	cnt;		/* increase cwnd by 1 after ACKs */
688d1945b   stephen hemminger   tcp: whitespace f...
83
  	u32	last_max_cwnd;	/* last maximum snd_cwnd */
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
84
85
86
  	u32	last_cwnd;	/* the last snd_cwnd */
  	u32	last_time;	/* time when updated last_cwnd */
  	u32	bic_origin_point;/* origin point of bic function */
688d1945b   stephen hemminger   tcp: whitespace f...
87
88
  	u32	bic_K;		/* time to origin point
  				   from the beginning of the current epoch */
cff04e2da   Eric Dumazet   tcp_cubic: switch...
89
  	u32	delay_min;	/* min delay (usec) */
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
90
91
92
  	u32	epoch_start;	/* beginning of an epoch */
  	u32	ack_cnt;	/* number of acks */
  	u32	tcp_cwnd;	/* estimated tcp cwnd */
9cd981dcf   Neal Cardwell   tcp: fix stretch ...
93
  	u16	unused;
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
94
95
96
97
  	u8	sample_cnt;	/* number of samples to decide curr_rtt */
  	u8	found;		/* the exit point is found? */
  	u32	round_start;	/* beginning of each round */
  	u32	end_seq;	/* end_seq of the round */
17a6e9f1a   stephen hemminger   tcp_cubic: fix cl...
98
  	u32	last_ack;	/* last time when the ACK spacing is close */
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
99
  	u32	curr_rtt;	/* the minimum rtt of current round */
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
100
101
102
103
104
105
  };
  
  static inline void bictcp_reset(struct bictcp *ca)
  {
  	ca->cnt = 0;
  	ca->last_max_cwnd = 0;
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
106
107
108
109
110
111
  	ca->last_cwnd = 0;
  	ca->last_time = 0;
  	ca->bic_origin_point = 0;
  	ca->bic_K = 0;
  	ca->delay_min = 0;
  	ca->epoch_start = 0;
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
112
113
  	ca->ack_cnt = 0;
  	ca->tcp_cwnd = 0;
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
114
115
  	ca->found = 0;
  }
cff04e2da   Eric Dumazet   tcp_cubic: switch...
116
  static inline u32 bictcp_clock_us(const struct sock *sk)
17a6e9f1a   stephen hemminger   tcp_cubic: fix cl...
117
  {
cff04e2da   Eric Dumazet   tcp_cubic: switch...
118
  	return tcp_sk(sk)->tcp_mstamp;
17a6e9f1a   stephen hemminger   tcp_cubic: fix cl...
119
  }
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
120
121
122
123
  static inline void bictcp_hystart_reset(struct sock *sk)
  {
  	struct tcp_sock *tp = tcp_sk(sk);
  	struct bictcp *ca = inet_csk_ca(sk);
cff04e2da   Eric Dumazet   tcp_cubic: switch...
124
  	ca->round_start = ca->last_ack = bictcp_clock_us(sk);
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
125
  	ca->end_seq = tp->snd_nxt;
35821fc2b   Eric Dumazet   tcp_cubic: remove...
126
  	ca->curr_rtt = ~0U;
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
127
  	ca->sample_cnt = 0;
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
128
129
130
131
  }
  
  static void bictcp_init(struct sock *sk)
  {
5a45f0086   Neal Cardwell   tcp: fix undo aft...
132
133
134
  	struct bictcp *ca = inet_csk_ca(sk);
  
  	bictcp_reset(ca);
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
135
136
137
138
139
  
  	if (hystart)
  		bictcp_hystart_reset(sk);
  
  	if (!hystart && initial_ssthresh)
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
140
141
  		tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
  }
30927520d   Eric Dumazet   tcp_cubic: better...
142
143
144
  static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
  {
  	if (event == CA_EVENT_TX_START) {
30927520d   Eric Dumazet   tcp_cubic: better...
145
  		struct bictcp *ca = inet_csk_ca(sk);
d635fbe27   Eric Dumazet   tcp: use tcp_jiff...
146
  		u32 now = tcp_jiffies32;
c2e7204d1   Eric Dumazet   tcp_cubic: do not...
147
148
149
  		s32 delta;
  
  		delta = now - tcp_sk(sk)->lsndtime;
30927520d   Eric Dumazet   tcp_cubic: better...
150
151
152
153
  
  		/* We were application limited (idle) for a while.
  		 * Shift epoch_start to keep cwnd growth to cubic curve.
  		 */
c2e7204d1   Eric Dumazet   tcp_cubic: do not...
154
  		if (ca->epoch_start && delta > 0) {
30927520d   Eric Dumazet   tcp_cubic: better...
155
  			ca->epoch_start += delta;
c2e7204d1   Eric Dumazet   tcp_cubic: do not...
156
157
158
  			if (after(ca->epoch_start, now))
  				ca->epoch_start = now;
  		}
30927520d   Eric Dumazet   tcp_cubic: better...
159
160
161
  		return;
  	}
  }
7e58886b4   Stephen Hemminger   [TCP]: cubic opti...
162
163
164
  /* calculate the cubic root of x using a table lookup followed by one
   * Newton-Raphson iteration.
   * Avg err ~= 0.195%
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
165
   */
9eb2d6271   Stephen Hemminger   [TCP] cubic: use ...
166
  static u32 cubic_root(u64 a)
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
167
  {
7e58886b4   Stephen Hemminger   [TCP]: cubic opti...
168
169
170
171
172
173
174
175
  	u32 x, b, shift;
  	/*
  	 * cbrt(x) MSB values for x MSB values in [0..63].
  	 * Precomputed then refined by hand - Willy Tarreau
  	 *
  	 * For x in [0..63],
  	 *   v = cbrt(x << 18) - 1
  	 *   cbrt(x) = (v[x] + 10) >> 6
9eb2d6271   Stephen Hemminger   [TCP] cubic: use ...
176
  	 */
7e58886b4   Stephen Hemminger   [TCP]: cubic opti...
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
  	static const u8 v[] = {
  		/* 0x00 */    0,   54,   54,   54,  118,  118,  118,  118,
  		/* 0x08 */  123,  129,  134,  138,  143,  147,  151,  156,
  		/* 0x10 */  157,  161,  164,  168,  170,  173,  176,  179,
  		/* 0x18 */  181,  185,  187,  190,  192,  194,  197,  199,
  		/* 0x20 */  200,  202,  204,  206,  209,  211,  213,  215,
  		/* 0x28 */  217,  219,  221,  222,  224,  225,  227,  229,
  		/* 0x30 */  231,  232,  234,  236,  237,  239,  240,  242,
  		/* 0x38 */  244,  245,  246,  248,  250,  251,  252,  254,
  	};
  
  	b = fls64(a);
  	if (b < 7) {
  		/* a in [0..63] */
  		return ((u32)v[(u32)a] + 35) >> 6;
  	}
  
  	b = ((b * 84) >> 8) - 1;
  	shift = (a >> (b * 3));
  
  	x = ((u32)(((u32)v[shift] + 10) << b)) >> 6;
  
  	/*
  	 * Newton-Raphson iteration
  	 *                         2
  	 * x    = ( 2 * x  +  a / x  ) / 3
  	 *  k+1          k         k
  	 */
6f6d6a1a6   Roman Zippel   rename div64_64 t...
205
  	x = (2 * x + (u32)div64_u64(a, (u64)x * (u64)(x - 1)));
7e58886b4   Stephen Hemminger   [TCP]: cubic opti...
206
  	x = ((x * 341) >> 10);
9eb2d6271   Stephen Hemminger   [TCP] cubic: use ...
207
  	return x;
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
208
  }
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
209
210
211
  /*
   * Compute congestion window to use.
   */
9cd981dcf   Neal Cardwell   tcp: fix stretch ...
212
  static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked)
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
213
  {
2ed0edf90   Eric Dumazet   tcp: cubic: fix o...
214
215
  	u32 delta, bic_target, max_cnt;
  	u64 offs, t;
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
216

9cd981dcf   Neal Cardwell   tcp: fix stretch ...
217
  	ca->ack_cnt += acked;	/* count the number of ACKed packets */
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
218
219
  
  	if (ca->last_cwnd == cwnd &&
ac35f5622   Eric Dumazet   tcp: bic, cubic: ...
220
  	    (s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32)
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
221
  		return;
d6b1a8a92   Neal Cardwell   tcp: fix timing i...
222
223
224
225
  	/* The CUBIC function can update ca->cnt at most once per jiffy.
  	 * On all cwnd reduction events, ca->epoch_start is set to 0,
  	 * which will force a recalculation of ca->cnt.
  	 */
ac35f5622   Eric Dumazet   tcp: bic, cubic: ...
226
  	if (ca->epoch_start && tcp_jiffies32 == ca->last_time)
d6b1a8a92   Neal Cardwell   tcp: fix timing i...
227
  		goto tcp_friendliness;
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
228
  	ca->last_cwnd = cwnd;
ac35f5622   Eric Dumazet   tcp: bic, cubic: ...
229
  	ca->last_time = tcp_jiffies32;
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
230

df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
231
  	if (ca->epoch_start == 0) {
ac35f5622   Eric Dumazet   tcp: bic, cubic: ...
232
  		ca->epoch_start = tcp_jiffies32;	/* record beginning */
9cd981dcf   Neal Cardwell   tcp: fix stretch ...
233
  		ca->ack_cnt = acked;			/* start counting */
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
234
235
236
237
238
239
  		ca->tcp_cwnd = cwnd;			/* syn with cubic */
  
  		if (ca->last_max_cwnd <= cwnd) {
  			ca->bic_K = 0;
  			ca->bic_origin_point = cwnd;
  		} else {
89b3d9aaf   Stephen Hemminger   [TCP] cubic: prec...
240
241
242
243
244
  			/* Compute new K based on
  			 * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ)
  			 */
  			ca->bic_K = cubic_root(cube_factor
  					       * (ca->last_max_cwnd - cwnd));
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
245
246
247
  			ca->bic_origin_point = ca->last_max_cwnd;
  		}
  	}
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
248
249
250
  	/* cubic function - calc*/
  	/* calculate c * time^3 / rtt,
  	 *  while considering overflow in calculation of time^3
89b3d9aaf   Stephen Hemminger   [TCP] cubic: prec...
251
  	 * (so time^3 is done by using 64 bit)
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
252
  	 * and without the support of division of 64bit numbers
89b3d9aaf   Stephen Hemminger   [TCP] cubic: prec...
253
  	 * (so all divisions are done by using 32 bit)
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
254
255
256
  	 *  also NOTE the unit of those veriables
  	 *	  time  = (t - K) / 2^bictcp_HZ
  	 *	  c = bic_scale >> 10
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
257
258
259
  	 * rtt  = (srtt >> 3) / HZ
  	 * !!! The following code does not have overflow problems,
  	 * if the cwnd < 1 million packets !!!
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
260
  	 */
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
261

ac35f5622   Eric Dumazet   tcp: bic, cubic: ...
262
  	t = (s32)(tcp_jiffies32 - ca->epoch_start);
cff04e2da   Eric Dumazet   tcp_cubic: switch...
263
  	t += usecs_to_jiffies(ca->delay_min);
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
264
  	/* change the unit from HZ to bictcp_HZ */
2ed0edf90   Eric Dumazet   tcp: cubic: fix o...
265
266
  	t <<= BICTCP_HZ;
  	do_div(t, HZ);
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
267

e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
268
  	if (t < ca->bic_K)		/* t - K */
89b3d9aaf   Stephen Hemminger   [TCP] cubic: prec...
269
  		offs = ca->bic_K - t;
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
270
271
  	else
  		offs = t - ca->bic_K;
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
272

89b3d9aaf   Stephen Hemminger   [TCP] cubic: prec...
273
274
  	/* c/rtt * (t-K)^3 */
  	delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
688d1945b   stephen hemminger   tcp: whitespace f...
275
  	if (t < ca->bic_K)                            /* below origin*/
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
276
  		bic_target = ca->bic_origin_point - delta;
688d1945b   stephen hemminger   tcp: whitespace f...
277
  	else                                          /* above origin*/
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
278
  		bic_target = ca->bic_origin_point + delta;
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
279

e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
280
281
  	/* cubic function - calc bictcp_cnt*/
  	if (bic_target > cwnd) {
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
282
  		ca->cnt = cwnd / (bic_target - cwnd);
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
283
284
285
  	} else {
  		ca->cnt = 100 * cwnd;              /* very small increment*/
  	}
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
286

b5ccd0733   Sangtae Ha   tcp_cubic: fix lo...
287
288
289
290
  	/*
  	 * The initial growth of cubic function may be too conservative
  	 * when the available bandwidth is still unknown.
  	 */
5a45f0086   Neal Cardwell   tcp: fix undo aft...
291
  	if (ca->last_max_cwnd == 0 && ca->cnt > 20)
b5ccd0733   Sangtae Ha   tcp_cubic: fix lo...
292
  		ca->cnt = 20;	/* increase cwnd 5% per RTT */
d6b1a8a92   Neal Cardwell   tcp: fix timing i...
293
  tcp_friendliness:
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
294
295
  	/* TCP Friendly */
  	if (tcp_friendliness) {
89b3d9aaf   Stephen Hemminger   [TCP] cubic: prec...
296
  		u32 scale = beta_scale;
688d1945b   stephen hemminger   tcp: whitespace f...
297

89b3d9aaf   Stephen Hemminger   [TCP] cubic: prec...
298
  		delta = (cwnd * scale) >> 3;
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
299
300
301
  		while (ca->ack_cnt > delta) {		/* update tcp cwnd */
  			ca->ack_cnt -= delta;
  			ca->tcp_cwnd++;
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
302
  		}
688d1945b   stephen hemminger   tcp: whitespace f...
303
  		if (ca->tcp_cwnd > cwnd) {	/* if bic is slower than tcp */
89b3d9aaf   Stephen Hemminger   [TCP] cubic: prec...
304
305
  			delta = ca->tcp_cwnd - cwnd;
  			max_cnt = cwnd / delta;
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
306
307
308
  			if (ca->cnt > max_cnt)
  				ca->cnt = max_cnt;
  		}
e905a9eda   YOSHIFUJI Hideaki   [NET] IPV4: Fix w...
309
  	}
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
310

d578e18ce   Neal Cardwell   tcp: restore 1.5x...
311
312
313
314
  	/* The maximum rate of cwnd increase CUBIC allows is 1 packet per
  	 * 2 packets ACKed, meaning cwnd grows at 1.5x per RTT.
  	 */
  	ca->cnt = max(ca->cnt, 2U);
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
315
  }
249015515   Eric Dumazet   tcp: remove in_fl...
316
  static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
317
318
319
  {
  	struct tcp_sock *tp = tcp_sk(sk);
  	struct bictcp *ca = inet_csk_ca(sk);
249015515   Eric Dumazet   tcp: remove in_fl...
320
  	if (!tcp_is_cwnd_limited(sk))
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
321
  		return;
071d5080e   Yuchung Cheng   tcp: add tcp_in_s...
322
  	if (tcp_in_slow_start(tp)) {
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
323
324
  		if (hystart && after(ack, ca->end_seq))
  			bictcp_hystart_reset(sk);
9cd981dcf   Neal Cardwell   tcp: fix stretch ...
325
326
327
  		acked = tcp_slow_start(tp, acked);
  		if (!acked)
  			return;
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
328
  	}
9cd981dcf   Neal Cardwell   tcp: fix stretch ...
329
330
  	bictcp_update(ca, tp->snd_cwnd, acked);
  	tcp_cong_avoid_ai(tp, ca->cnt, acked);
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
  }
  
  static u32 bictcp_recalc_ssthresh(struct sock *sk)
  {
  	const struct tcp_sock *tp = tcp_sk(sk);
  	struct bictcp *ca = inet_csk_ca(sk);
  
  	ca->epoch_start = 0;	/* end of epoch */
  
  	/* Wmax and fast convergence */
  	if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
  		ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
  			/ (2 * BICTCP_BETA_SCALE);
  	else
  		ca->last_max_cwnd = tp->snd_cwnd;
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
346
347
  	return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
  }
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
348
349
  static void bictcp_state(struct sock *sk, u8 new_state)
  {
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
350
  	if (new_state == TCP_CA_Loss) {
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
351
  		bictcp_reset(inet_csk_ca(sk));
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
352
353
354
  		bictcp_hystart_reset(sk);
  	}
  }
f278b99ca   Eric Dumazet   tcp_cubic: refact...
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
  /* Account for TSO/GRO delays.
   * Otherwise short RTT flows could get too small ssthresh, since during
   * slow start we begin with small TSO packets and ca->delay_min would
   * not account for long aggregation delay when TSO packets get bigger.
   * Ideally even with a very small RTT we would like to have at least one
   * TSO packet being sent and received by GRO, and another one in qdisc layer.
   * We apply another 100% factor because @rate is doubled at this point.
   * We cap the cushion to 1ms.
   */
  static u32 hystart_ack_delay(struct sock *sk)
  {
  	unsigned long rate;
  
  	rate = READ_ONCE(sk->sk_pacing_rate);
  	if (!rate)
  		return 0;
  	return min_t(u64, USEC_PER_MSEC,
  		     div64_ul((u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate));
  }
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
374
375
376
377
  static void hystart_update(struct sock *sk, u32 delay)
  {
  	struct tcp_sock *tp = tcp_sk(sk);
  	struct bictcp *ca = inet_csk_ca(sk);
ede656e84   Eric Dumazet   tcp_cubic: make H...
378
  	u32 threshold;
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
379

6e3a8a937   Eric Dumazet   tcp_cubic: add SN...
380
  	if (hystart_detect & HYSTART_ACK_TRAIN) {
cff04e2da   Eric Dumazet   tcp_cubic: switch...
381
  		u32 now = bictcp_clock_us(sk);
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
382
383
  
  		/* first detection parameter - ack-train detection */
cff04e2da   Eric Dumazet   tcp_cubic: switch...
384
  		if ((s32)(now - ca->last_ack) <= hystart_ack_delta_us) {
17a6e9f1a   stephen hemminger   tcp_cubic: fix cl...
385
  			ca->last_ack = now;
ede656e84   Eric Dumazet   tcp_cubic: make H...
386

f278b99ca   Eric Dumazet   tcp_cubic: refact...
387
  			threshold = ca->delay_min + hystart_ack_delay(sk);
ede656e84   Eric Dumazet   tcp_cubic: make H...
388
389
390
391
392
393
394
395
396
  			/* Hystart ack train triggers if we get ack past
  			 * ca->delay_min/2.
  			 * Pacing might have delayed packets up to RTT/2
  			 * during slow start.
  			 */
  			if (sk->sk_pacing_status == SK_PACING_NONE)
  				threshold >>= 1;
  
  			if ((s32)(now - ca->round_start) > threshold) {
473900a50   Eric Dumazet   tcp_cubic: optimi...
397
  				ca->found = 1;
f278b99ca   Eric Dumazet   tcp_cubic: refact...
398
399
400
401
  				pr_debug("hystart_ack_train (%u > %u) delay_min %u (+ ack_delay %u) cwnd %u
  ",
  					 now - ca->round_start, threshold,
  					 ca->delay_min, hystart_ack_delay(sk), tp->snd_cwnd);
c10d9310e   Eric Dumazet   tcp: do not assum...
402
403
404
405
406
  				NET_INC_STATS(sock_net(sk),
  					      LINUX_MIB_TCPHYSTARTTRAINDETECT);
  				NET_ADD_STATS(sock_net(sk),
  					      LINUX_MIB_TCPHYSTARTTRAINCWND,
  					      tp->snd_cwnd);
6e3a8a937   Eric Dumazet   tcp_cubic: add SN...
407
408
  				tp->snd_ssthresh = tp->snd_cwnd;
  			}
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
409
  		}
6e3a8a937   Eric Dumazet   tcp_cubic: add SN...
410
  	}
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
411

6e3a8a937   Eric Dumazet   tcp_cubic: add SN...
412
  	if (hystart_detect & HYSTART_DELAY) {
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
413
  		/* obtain the minimum delay of more than sampling packets */
b344579ca   Neal Cardwell   tcp_cubic: fix sp...
414
415
  		if (ca->curr_rtt > delay)
  			ca->curr_rtt = delay;
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
416
  		if (ca->sample_cnt < HYSTART_MIN_SAMPLES) {
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
417
418
419
  			ca->sample_cnt++;
  		} else {
  			if (ca->curr_rtt > ca->delay_min +
42eef7a0b   Eric Dumazet   tcp_cubic: refine...
420
  			    HYSTART_DELAY_THRESH(ca->delay_min >> 3)) {
473900a50   Eric Dumazet   tcp_cubic: optimi...
421
  				ca->found = 1;
c10d9310e   Eric Dumazet   tcp: do not assum...
422
423
424
425
426
  				NET_INC_STATS(sock_net(sk),
  					      LINUX_MIB_TCPHYSTARTDELAYDETECT);
  				NET_ADD_STATS(sock_net(sk),
  					      LINUX_MIB_TCPHYSTARTDELAYCWND,
  					      tp->snd_cwnd);
6e3a8a937   Eric Dumazet   tcp_cubic: add SN...
427
428
  				tp->snd_ssthresh = tp->snd_cwnd;
  			}
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
429
  		}
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
430
  	}
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
431
  }
756ee1729   Lawrence Brakmo   tcp: replace cnt ...
432
  static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
433
  {
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
434
  	const struct tcp_sock *tp = tcp_sk(sk);
e7d0c8858   Stephen Hemminger   [TCP]: cubic - el...
435
436
  	struct bictcp *ca = inet_csk_ca(sk);
  	u32 delay;
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
437

e7d0c8858   Stephen Hemminger   [TCP]: cubic - el...
438
  	/* Some calls are for duplicates without timetamps */
756ee1729   Lawrence Brakmo   tcp: replace cnt ...
439
  	if (sample->rtt_us < 0)
e7d0c8858   Stephen Hemminger   [TCP]: cubic - el...
440
441
442
  		return;
  
  	/* Discard delay samples right after fast recovery */
ac35f5622   Eric Dumazet   tcp: bic, cubic: ...
443
  	if (ca->epoch_start && (s32)(tcp_jiffies32 - ca->epoch_start) < HZ)
e7d0c8858   Stephen Hemminger   [TCP]: cubic - el...
444
  		return;
cff04e2da   Eric Dumazet   tcp_cubic: switch...
445
  	delay = sample->rtt_us;
e7d0c8858   Stephen Hemminger   [TCP]: cubic - el...
446
447
448
449
  	if (delay == 0)
  		delay = 1;
  
  	/* first time call or link delay decreases */
f278b99ca   Eric Dumazet   tcp_cubic: refact...
450
451
  	if (ca->delay_min == 0 || ca->delay_min > delay)
  		ca->delay_min = delay;
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
452
453
  
  	/* hystart triggers when cwnd is larger than some threshold */
f278b99ca   Eric Dumazet   tcp_cubic: refact...
454
  	if (!ca->found && tcp_in_slow_start(tp) && hystart &&
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
455
456
  	    tp->snd_cwnd >= hystart_low_window)
  		hystart_update(sk, delay);
e7d0c8858   Stephen Hemminger   [TCP]: cubic - el...
457
  }
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
458

a252bebe2   Stephen Hemminger   tcp: mark tcp_con...
459
  static struct tcp_congestion_ops cubictcp __read_mostly = {
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
460
461
462
463
  	.init		= bictcp_init,
  	.ssthresh	= bictcp_recalc_ssthresh,
  	.cong_avoid	= bictcp_cong_avoid,
  	.set_state	= bictcp_state,
f1722a1be   Yuchung Cheng   tcp: consolidate ...
464
  	.undo_cwnd	= tcp_reno_undo_cwnd,
30927520d   Eric Dumazet   tcp_cubic: better...
465
  	.cwnd_event	= bictcp_cwnd_event,
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
466
467
468
469
470
471
472
  	.pkts_acked     = bictcp_acked,
  	.owner		= THIS_MODULE,
  	.name		= "cubic",
  };
  
  static int __init cubictcp_register(void)
  {
74975d40b   Alexey Dobriyan   [TCP] Congestion ...
473
  	BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
89b3d9aaf   Stephen Hemminger   [TCP] cubic: prec...
474
475
476
477
  
  	/* Precompute a bunch of the scaling factors that are used per-packet
  	 * based on SRTT of 100ms
  	 */
688d1945b   stephen hemminger   tcp: whitespace f...
478
479
  	beta_scale = 8*(BICTCP_BETA_SCALE+beta) / 3
  		/ (BICTCP_BETA_SCALE - beta);
89b3d9aaf   Stephen Hemminger   [TCP] cubic: prec...
480

22119240b   Stephen Hemminger   [TCP] cubic: scal...
481
  	cube_rtt_scale = (bic_scale * 10);	/* 1024*c/rtt */
89b3d9aaf   Stephen Hemminger   [TCP] cubic: prec...
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
  
  	/* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
  	 *  so K = cubic_root( (wmax-cwnd)*rtt/c )
  	 * the unit of K is bictcp_HZ=2^10, not HZ
  	 *
  	 *  c = bic_scale >> 10
  	 *  rtt = 100ms
  	 *
  	 * the following code has been designed and tested for
  	 * cwnd < 1 million packets
  	 * RTT < 100 seconds
  	 * HZ < 1,000,00  (corresponding to 10 nano-second)
  	 */
  
  	/* 1/c * 2^2*bictcp_HZ * srtt */
  	cube_factor = 1ull << (10+3*BICTCP_HZ); /* 2^40 */
  
  	/* divide by bic_scale and by constant Srtt (100ms) */
  	do_div(cube_factor, bic_scale * 10);
df3271f33   Stephen Hemminger   [TCP] BIC: CUBIC ...
501
502
503
504
505
506
507
508
509
510
511
512
513
514
  	return tcp_register_congestion_control(&cubictcp);
  }
  
  static void __exit cubictcp_unregister(void)
  {
  	tcp_unregister_congestion_control(&cubictcp);
  }
  
  module_init(cubictcp_register);
  module_exit(cubictcp_unregister);
  
  MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
  MODULE_LICENSE("GPL");
  MODULE_DESCRIPTION("CUBIC TCP");
ae27e98a5   Sangtae Ha   [TCP] CUBIC v2.3
515
  MODULE_VERSION("2.3");