Blame view

net/sched/sch_fq_codel.c 18.9 KB
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
1
2
3
4
5
6
7
8
  /*
   * Fair Queue CoDel discipline
   *
   *	This program is free software; you can redistribute it and/or
   *	modify it under the terms of the GNU General Public License
   *	as published by the Free Software Foundation; either version
   *	2 of the License, or (at your option) any later version.
   *
80ba92fa1   Eric Dumazet   codel: add ce_thr...
9
   *  Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
   */
  
  #include <linux/module.h>
  #include <linux/types.h>
  #include <linux/kernel.h>
  #include <linux/jiffies.h>
  #include <linux/string.h>
  #include <linux/in.h>
  #include <linux/errno.h>
  #include <linux/init.h>
  #include <linux/skbuff.h>
  #include <linux/jhash.h>
  #include <linux/slab.h>
  #include <linux/vmalloc.h>
  #include <net/netlink.h>
  #include <net/pkt_sched.h>
cf1facda2   Jiri Pirko   sched: move tcf_p...
26
  #include <net/pkt_cls.h>
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
27
  #include <net/codel.h>
d068ca2ae   Michal Kazior   codel: split into...
28
29
  #include <net/codel_impl.h>
  #include <net/codel_qdisc.h>
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
  
  /*	Fair Queue CoDel.
   *
   * Principles :
   * Packets are classified (internal classifier or external) on flows.
   * This is a Stochastic model (as we use a hash, several flows
   *			       might be hashed on same slot)
   * Each flow has a CoDel managed queue.
   * Flows are linked onto two (Round Robin) lists,
   * so that new flows have priority on old ones.
   *
   * For a given flow, packets are not reordered (CoDel uses a FIFO)
   * head drops only.
   * ECN capability is on by default.
   * Low memory footprint (64 bytes per flow)
   */
  
  struct fq_codel_flow {
  	struct sk_buff	  *head;
  	struct sk_buff	  *tail;
  	struct list_head  flowchain;
  	int		  deficit;
  	u32		  dropped; /* number of drops (or ECN marks) on this flow */
  	struct codel_vars cvars;
  }; /* please try to keep this structure <= 64 bytes */
  
  struct fq_codel_sched_data {
25d8c0d55   John Fastabend   net: rcu-ify tcf_...
57
  	struct tcf_proto __rcu *filter_list; /* optional external classifier */
6529eaba3   Jiri Pirko   net: sched: intro...
58
  	struct tcf_block *block;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
59
60
61
  	struct fq_codel_flow *flows;	/* Flows table [flows_cnt] */
  	u32		*backlogs;	/* backlog table [flows_cnt] */
  	u32		flows_cnt;	/* number of flows */
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
62
  	u32		quantum;	/* psched_mtu(qdisc_dev(sch)); */
9d18562a2   Eric Dumazet   fq_codel: add bat...
63
  	u32		drop_batch_size;
95b58430a   Eric Dumazet   fq_codel: add mem...
64
  	u32		memory_limit;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
65
66
  	struct codel_params cparams;
  	struct codel_stats cstats;
95b58430a   Eric Dumazet   fq_codel: add mem...
67
68
  	u32		memory_usage;
  	u32		drop_overmemory;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
69
70
71
72
73
74
75
76
  	u32		drop_overlimit;
  	u32		new_flow_count;
  
  	struct list_head new_flows;	/* list of new flows */
  	struct list_head old_flows;	/* list of old flows */
  };
  
  static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q,
342db2218   Tom Herbert   sched: Call skb_g...
77
  				  struct sk_buff *skb)
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
78
  {
264b87fa6   Andrew Collins   fq_codel: Avoid r...
79
  	return reciprocal_scale(skb_get_hash(skb), q->flows_cnt);
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
80
81
82
83
84
85
  }
  
  static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
  				      int *qerr)
  {
  	struct fq_codel_sched_data *q = qdisc_priv(sch);
25d8c0d55   John Fastabend   net: rcu-ify tcf_...
86
  	struct tcf_proto *filter;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
87
88
89
90
91
92
93
  	struct tcf_result res;
  	int result;
  
  	if (TC_H_MAJ(skb->priority) == sch->handle &&
  	    TC_H_MIN(skb->priority) > 0 &&
  	    TC_H_MIN(skb->priority) <= q->flows_cnt)
  		return TC_H_MIN(skb->priority);
69204cf7e   Valdis Kletnieks   net: fix suspicio...
94
  	filter = rcu_dereference_bh(q->filter_list);
25d8c0d55   John Fastabend   net: rcu-ify tcf_...
95
  	if (!filter)
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
96
97
98
  		return fq_codel_hash(q, skb) + 1;
  
  	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
87d83093b   Jiri Pirko   net: sched: move ...
99
  	result = tcf_classify(skb, filter, &res, false);
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
100
101
102
103
104
  	if (result >= 0) {
  #ifdef CONFIG_NET_CLS_ACT
  		switch (result) {
  		case TC_ACT_STOLEN:
  		case TC_ACT_QUEUED:
e25ea21ff   Jiri Pirko   net: sched: intro...
105
  		case TC_ACT_TRAP:
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
  			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
  		case TC_ACT_SHOT:
  			return 0;
  		}
  #endif
  		if (TC_H_MIN(res.classid) <= q->flows_cnt)
  			return TC_H_MIN(res.classid);
  	}
  	return 0;
  }
  
  /* helper functions : might be changed when/if skb use a standard list_head */
  
  /* remove one skb from head of slot queue */
  static inline struct sk_buff *dequeue_head(struct fq_codel_flow *flow)
  {
  	struct sk_buff *skb = flow->head;
  
  	flow->head = skb->next;
  	skb->next = NULL;
  	return skb;
  }
  
  /* add skb to flow queue (tail add) */
  static inline void flow_queue_add(struct fq_codel_flow *flow,
  				  struct sk_buff *skb)
  {
  	if (flow->head == NULL)
  		flow->head = skb;
  	else
  		flow->tail->next = skb;
  	flow->tail = skb;
  	skb->next = NULL;
  }
520ac30f4   Eric Dumazet   net_sched: drop p...
140
141
  static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets,
  				  struct sk_buff **to_free)
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
142
143
144
145
146
  {
  	struct fq_codel_sched_data *q = qdisc_priv(sch);
  	struct sk_buff *skb;
  	unsigned int maxbacklog = 0, idx = 0, i, len;
  	struct fq_codel_flow *flow;
9d18562a2   Eric Dumazet   fq_codel: add bat...
147
  	unsigned int threshold;
95b58430a   Eric Dumazet   fq_codel: add mem...
148
  	unsigned int mem = 0;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
149

9d18562a2   Eric Dumazet   fq_codel: add bat...
150
  	/* Queue is full! Find the fat flow and drop packet(s) from it.
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
151
152
153
  	 * This might sound expensive, but with 1024 flows, we scan
  	 * 4KB of memory, and we dont need to handle a complex tree
  	 * in fast path (packet queue/enqueue) with many cache misses.
9d18562a2   Eric Dumazet   fq_codel: add bat...
154
155
  	 * In stress mode, we'll try to drop 64 packets from the flow,
  	 * amortizing this linear lookup to one cache line per drop.
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
156
157
158
159
160
161
162
  	 */
  	for (i = 0; i < q->flows_cnt; i++) {
  		if (q->backlogs[i] > maxbacklog) {
  			maxbacklog = q->backlogs[i];
  			idx = i;
  		}
  	}
9d18562a2   Eric Dumazet   fq_codel: add bat...
163
164
165
  
  	/* Our goal is to drop half of this fat flow backlog */
  	threshold = maxbacklog >> 1;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
166
  	flow = &q->flows[idx];
9d18562a2   Eric Dumazet   fq_codel: add bat...
167
168
169
170
171
  	len = 0;
  	i = 0;
  	do {
  		skb = dequeue_head(flow);
  		len += qdisc_pkt_len(skb);
008830bc3   Eric Dumazet   net_sched: fq_cod...
172
  		mem += get_codel_cb(skb)->mem_usage;
520ac30f4   Eric Dumazet   net_sched: drop p...
173
  		__qdisc_drop(skb, to_free);
9d18562a2   Eric Dumazet   fq_codel: add bat...
174
175
176
  	} while (++i < max_packets && len < threshold);
  
  	flow->dropped += i;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
177
  	q->backlogs[idx] -= len;
95b58430a   Eric Dumazet   fq_codel: add mem...
178
  	q->memory_usage -= mem;
9d18562a2   Eric Dumazet   fq_codel: add bat...
179
180
181
  	sch->qstats.drops += i;
  	sch->qstats.backlog -= len;
  	sch->q.qlen -= i;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
182
183
  	return idx;
  }
520ac30f4   Eric Dumazet   net_sched: drop p...
184
185
  static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch,
  			    struct sk_buff **to_free)
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
186
187
  {
  	struct fq_codel_sched_data *q = qdisc_priv(sch);
9d18562a2   Eric Dumazet   fq_codel: add bat...
188
  	unsigned int idx, prev_backlog, prev_qlen;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
189
190
  	struct fq_codel_flow *flow;
  	int uninitialized_var(ret);
80e509db5   Eric Dumazet   fq_codel: fix NET...
191
  	unsigned int pkt_len;
95b58430a   Eric Dumazet   fq_codel: add mem...
192
  	bool memory_limited;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
193
194
195
196
  
  	idx = fq_codel_classify(skb, sch, &ret);
  	if (idx == 0) {
  		if (ret & __NET_XMIT_BYPASS)
25331d6ce   John Fastabend   net: sched: imple...
197
  			qdisc_qstats_drop(sch);
520ac30f4   Eric Dumazet   net_sched: drop p...
198
  		__qdisc_drop(skb, to_free);
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
199
200
201
202
203
204
205
206
  		return ret;
  	}
  	idx--;
  
  	codel_set_enqueue_time(skb);
  	flow = &q->flows[idx];
  	flow_queue_add(flow, skb);
  	q->backlogs[idx] += qdisc_pkt_len(skb);
25331d6ce   John Fastabend   net: sched: imple...
207
  	qdisc_qstats_backlog_inc(sch, skb);
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
208
209
210
  
  	if (list_empty(&flow->flowchain)) {
  		list_add_tail(&flow->flowchain, &q->new_flows);
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
211
212
213
214
  		q->new_flow_count++;
  		flow->deficit = q->quantum;
  		flow->dropped = 0;
  	}
008830bc3   Eric Dumazet   net_sched: fq_cod...
215
216
  	get_codel_cb(skb)->mem_usage = skb->truesize;
  	q->memory_usage += get_codel_cb(skb)->mem_usage;
95b58430a   Eric Dumazet   fq_codel: add mem...
217
218
  	memory_limited = q->memory_usage > q->memory_limit;
  	if (++sch->q.qlen <= sch->limit && !memory_limited)
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
219
  		return NET_XMIT_SUCCESS;
2ccccf5fb   WANG Cong   net_sched: update...
220
  	prev_backlog = sch->qstats.backlog;
9d18562a2   Eric Dumazet   fq_codel: add bat...
221
  	prev_qlen = sch->q.qlen;
80e509db5   Eric Dumazet   fq_codel: fix NET...
222
223
  	/* save this packet length as it might be dropped by fq_codel_drop() */
  	pkt_len = qdisc_pkt_len(skb);
9d18562a2   Eric Dumazet   fq_codel: add bat...
224
225
226
227
  	/* fq_codel_drop() is quite expensive, as it performs a linear search
  	 * in q->backlogs[] to find a fat flow.
  	 * So instead of dropping a single packet, drop half of its backlog
  	 * with a 64 packets limit to not add a too big cpu spike here.
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
228
  	 */
520ac30f4   Eric Dumazet   net_sched: drop p...
229
  	ret = fq_codel_drop(sch, q->drop_batch_size, to_free);
9d18562a2   Eric Dumazet   fq_codel: add bat...
230

80e509db5   Eric Dumazet   fq_codel: fix NET...
231
232
233
  	prev_qlen -= sch->q.qlen;
  	prev_backlog -= sch->qstats.backlog;
  	q->drop_overlimit += prev_qlen;
95b58430a   Eric Dumazet   fq_codel: add mem...
234
  	if (memory_limited)
80e509db5   Eric Dumazet   fq_codel: fix NET...
235
  		q->drop_overmemory += prev_qlen;
9d18562a2   Eric Dumazet   fq_codel: add bat...
236

80e509db5   Eric Dumazet   fq_codel: fix NET...
237
238
239
240
241
242
243
244
245
246
247
  	/* As we dropped packet(s), better let upper stack know this.
  	 * If we dropped a packet for this flow, return NET_XMIT_CN,
  	 * but in this case, our parents wont increase their backlogs.
  	 */
  	if (ret == idx) {
  		qdisc_tree_reduce_backlog(sch, prev_qlen - 1,
  					  prev_backlog - pkt_len);
  		return NET_XMIT_CN;
  	}
  	qdisc_tree_reduce_backlog(sch, prev_qlen, prev_backlog);
  	return NET_XMIT_SUCCESS;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
248
249
250
251
252
253
  }
  
  /* This is the specific function called from codel_dequeue()
   * to dequeue a packet from queue. Note: backlog is handled in
   * codel, we dont need to reduce it here.
   */
79bdc4c86   Michal Kazior   codel: generalize...
254
  static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx)
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
255
  {
79bdc4c86   Michal Kazior   codel: generalize...
256
  	struct Qdisc *sch = ctx;
865ec5523   Eric Dumazet   fq_codel: should ...
257
  	struct fq_codel_sched_data *q = qdisc_priv(sch);
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
258
259
260
261
262
263
  	struct fq_codel_flow *flow;
  	struct sk_buff *skb = NULL;
  
  	flow = container_of(vars, struct fq_codel_flow, cvars);
  	if (flow->head) {
  		skb = dequeue_head(flow);
865ec5523   Eric Dumazet   fq_codel: should ...
264
  		q->backlogs[flow - q->flows] -= qdisc_pkt_len(skb);
008830bc3   Eric Dumazet   net_sched: fq_cod...
265
  		q->memory_usage -= get_codel_cb(skb)->mem_usage;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
266
  		sch->q.qlen--;
79bdc4c86   Michal Kazior   codel: generalize...
267
  		sch->qstats.backlog -= qdisc_pkt_len(skb);
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
268
269
270
  	}
  	return skb;
  }
79bdc4c86   Michal Kazior   codel: generalize...
271
272
273
  static void drop_func(struct sk_buff *skb, void *ctx)
  {
  	struct Qdisc *sch = ctx;
520ac30f4   Eric Dumazet   net_sched: drop p...
274
275
  	kfree_skb(skb);
  	qdisc_qstats_drop(sch);
79bdc4c86   Michal Kazior   codel: generalize...
276
  }
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
  static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch)
  {
  	struct fq_codel_sched_data *q = qdisc_priv(sch);
  	struct sk_buff *skb;
  	struct fq_codel_flow *flow;
  	struct list_head *head;
  	u32 prev_drop_count, prev_ecn_mark;
  
  begin:
  	head = &q->new_flows;
  	if (list_empty(head)) {
  		head = &q->old_flows;
  		if (list_empty(head))
  			return NULL;
  	}
  	flow = list_first_entry(head, struct fq_codel_flow, flowchain);
  
  	if (flow->deficit <= 0) {
  		flow->deficit += q->quantum;
  		list_move_tail(&flow->flowchain, &q->old_flows);
  		goto begin;
  	}
  
  	prev_drop_count = q->cstats.drop_count;
  	prev_ecn_mark = q->cstats.ecn_mark;
79bdc4c86   Michal Kazior   codel: generalize...
302
303
304
  	skb = codel_dequeue(sch, &sch->qstats.backlog, &q->cparams,
  			    &flow->cvars, &q->cstats, qdisc_pkt_len,
  			    codel_get_enqueue_time, drop_func, dequeue_func);
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
305
306
307
308
309
310
311
312
313
314
315
316
317
318
  
  	flow->dropped += q->cstats.drop_count - prev_drop_count;
  	flow->dropped += q->cstats.ecn_mark - prev_ecn_mark;
  
  	if (!skb) {
  		/* force a pass through old_flows to prevent starvation */
  		if ((head == &q->new_flows) && !list_empty(&q->old_flows))
  			list_move_tail(&flow->flowchain, &q->old_flows);
  		else
  			list_del_init(&flow->flowchain);
  		goto begin;
  	}
  	qdisc_bstats_update(sch, skb);
  	flow->deficit -= qdisc_pkt_len(skb);
2ccccf5fb   WANG Cong   net_sched: update...
319
  	/* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
320
321
322
  	 * or HTB crashes. Defer it for next round.
  	 */
  	if (q->cstats.drop_count && sch->q.qlen) {
2ccccf5fb   WANG Cong   net_sched: update...
323
324
  		qdisc_tree_reduce_backlog(sch, q->cstats.drop_count,
  					  q->cstats.drop_len);
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
325
  		q->cstats.drop_count = 0;
2ccccf5fb   WANG Cong   net_sched: update...
326
  		q->cstats.drop_len = 0;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
327
328
329
  	}
  	return skb;
  }
ece5d4c72   Eric Dumazet   net_sched: fq_cod...
330
331
332
333
334
  static void fq_codel_flow_purge(struct fq_codel_flow *flow)
  {
  	rtnl_kfree_skbs(flow->head, flow->tail);
  	flow->head = NULL;
  }
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
335
336
  static void fq_codel_reset(struct Qdisc *sch)
  {
3d0e0af40   Eric Dumazet   fq_codel: explici...
337
338
  	struct fq_codel_sched_data *q = qdisc_priv(sch);
  	int i;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
339

3d0e0af40   Eric Dumazet   fq_codel: explici...
340
341
342
343
  	INIT_LIST_HEAD(&q->new_flows);
  	INIT_LIST_HEAD(&q->old_flows);
  	for (i = 0; i < q->flows_cnt; i++) {
  		struct fq_codel_flow *flow = q->flows + i;
ece5d4c72   Eric Dumazet   net_sched: fq_cod...
344
  		fq_codel_flow_purge(flow);
3d0e0af40   Eric Dumazet   fq_codel: explici...
345
346
347
348
349
  		INIT_LIST_HEAD(&flow->flowchain);
  		codel_vars_init(&flow->cvars);
  	}
  	memset(q->backlogs, 0, q->flows_cnt * sizeof(u32));
  	sch->q.qlen = 0;
ece5d4c72   Eric Dumazet   net_sched: fq_cod...
350
  	sch->qstats.backlog = 0;
77f577614   Eric Dumazet   fq_codel: fix mem...
351
  	q->memory_usage = 0;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
352
353
354
355
356
357
358
359
360
  }
  
  static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = {
  	[TCA_FQ_CODEL_TARGET]	= { .type = NLA_U32 },
  	[TCA_FQ_CODEL_LIMIT]	= { .type = NLA_U32 },
  	[TCA_FQ_CODEL_INTERVAL]	= { .type = NLA_U32 },
  	[TCA_FQ_CODEL_ECN]	= { .type = NLA_U32 },
  	[TCA_FQ_CODEL_FLOWS]	= { .type = NLA_U32 },
  	[TCA_FQ_CODEL_QUANTUM]	= { .type = NLA_U32 },
80ba92fa1   Eric Dumazet   codel: add ce_thr...
361
  	[TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 },
9d18562a2   Eric Dumazet   fq_codel: add bat...
362
  	[TCA_FQ_CODEL_DROP_BATCH_SIZE] = { .type = NLA_U32 },
95b58430a   Eric Dumazet   fq_codel: add mem...
363
  	[TCA_FQ_CODEL_MEMORY_LIMIT] = { .type = NLA_U32 },
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
364
365
366
367
368
369
370
371
372
373
  };
  
  static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
  {
  	struct fq_codel_sched_data *q = qdisc_priv(sch);
  	struct nlattr *tb[TCA_FQ_CODEL_MAX + 1];
  	int err;
  
  	if (!opt)
  		return -EINVAL;
fceb6435e   Johannes Berg   netlink: pass ext...
374
375
  	err = nla_parse_nested(tb, TCA_FQ_CODEL_MAX, opt, fq_codel_policy,
  			       NULL);
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
  	if (err < 0)
  		return err;
  	if (tb[TCA_FQ_CODEL_FLOWS]) {
  		if (q->flows)
  			return -EINVAL;
  		q->flows_cnt = nla_get_u32(tb[TCA_FQ_CODEL_FLOWS]);
  		if (!q->flows_cnt ||
  		    q->flows_cnt > 65536)
  			return -EINVAL;
  	}
  	sch_tree_lock(sch);
  
  	if (tb[TCA_FQ_CODEL_TARGET]) {
  		u64 target = nla_get_u32(tb[TCA_FQ_CODEL_TARGET]);
  
  		q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT;
  	}
80ba92fa1   Eric Dumazet   codel: add ce_thr...
393
394
395
396
397
  	if (tb[TCA_FQ_CODEL_CE_THRESHOLD]) {
  		u64 val = nla_get_u32(tb[TCA_FQ_CODEL_CE_THRESHOLD]);
  
  		q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
  	}
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
398
399
400
401
402
403
404
405
406
407
408
409
410
411
  	if (tb[TCA_FQ_CODEL_INTERVAL]) {
  		u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]);
  
  		q->cparams.interval = (interval * NSEC_PER_USEC) >> CODEL_SHIFT;
  	}
  
  	if (tb[TCA_FQ_CODEL_LIMIT])
  		sch->limit = nla_get_u32(tb[TCA_FQ_CODEL_LIMIT]);
  
  	if (tb[TCA_FQ_CODEL_ECN])
  		q->cparams.ecn = !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]);
  
  	if (tb[TCA_FQ_CODEL_QUANTUM])
  		q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM]));
9d18562a2   Eric Dumazet   fq_codel: add bat...
412
413
  	if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])
  		q->drop_batch_size = min(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
95b58430a   Eric Dumazet   fq_codel: add mem...
414
415
416
417
418
  	if (tb[TCA_FQ_CODEL_MEMORY_LIMIT])
  		q->memory_limit = min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT]));
  
  	while (sch->q.qlen > sch->limit ||
  	       q->memory_usage > q->memory_limit) {
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
419
  		struct sk_buff *skb = fq_codel_dequeue(sch);
2ccccf5fb   WANG Cong   net_sched: update...
420
  		q->cstats.drop_len += qdisc_pkt_len(skb);
ece5d4c72   Eric Dumazet   net_sched: fq_cod...
421
  		rtnl_kfree_skbs(skb, skb);
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
422
423
  		q->cstats.drop_count++;
  	}
2ccccf5fb   WANG Cong   net_sched: update...
424
  	qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len);
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
425
  	q->cstats.drop_count = 0;
2ccccf5fb   WANG Cong   net_sched: update...
426
  	q->cstats.drop_len = 0;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
427
428
429
430
  
  	sch_tree_unlock(sch);
  	return 0;
  }
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
431
432
433
  static void fq_codel_destroy(struct Qdisc *sch)
  {
  	struct fq_codel_sched_data *q = qdisc_priv(sch);
6529eaba3   Jiri Pirko   net: sched: intro...
434
  	tcf_block_put(q->block);
752ade68c   Michal Hocko   treewide: use kv[...
435
436
  	kvfree(q->backlogs);
  	kvfree(q->flows);
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
437
438
439
440
441
442
  }
  
  static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
  {
  	struct fq_codel_sched_data *q = qdisc_priv(sch);
  	int i;
6529eaba3   Jiri Pirko   net: sched: intro...
443
  	int err;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
444
445
446
  
  	sch->limit = 10*1024;
  	q->flows_cnt = 1024;
95b58430a   Eric Dumazet   fq_codel: add mem...
447
  	q->memory_limit = 32 << 20; /* 32 MBytes */
9d18562a2   Eric Dumazet   fq_codel: add bat...
448
  	q->drop_batch_size = 64;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
449
  	q->quantum = psched_mtu(qdisc_dev(sch));
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
450
451
  	INIT_LIST_HEAD(&q->new_flows);
  	INIT_LIST_HEAD(&q->old_flows);
79bdc4c86   Michal Kazior   codel: generalize...
452
  	codel_params_init(&q->cparams);
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
453
454
  	codel_stats_init(&q->cstats);
  	q->cparams.ecn = true;
79bdc4c86   Michal Kazior   codel: generalize...
455
  	q->cparams.mtu = psched_mtu(qdisc_dev(sch));
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
456
457
458
459
460
461
  
  	if (opt) {
  		int err = fq_codel_change(sch, opt);
  		if (err)
  			return err;
  	}
6529eaba3   Jiri Pirko   net: sched: intro...
462
463
464
  	err = tcf_block_get(&q->block, &q->filter_list);
  	if (err)
  		return err;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
465
  	if (!q->flows) {
752ade68c   Michal Hocko   treewide: use kv[...
466
467
  		q->flows = kvzalloc(q->flows_cnt *
  					   sizeof(struct fq_codel_flow), GFP_KERNEL);
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
468
469
  		if (!q->flows)
  			return -ENOMEM;
752ade68c   Michal Hocko   treewide: use kv[...
470
  		q->backlogs = kvzalloc(q->flows_cnt * sizeof(u32), GFP_KERNEL);
30c31d746   Nikolay Aleksandrov   sch_fq_codel: avo...
471
  		if (!q->backlogs)
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
472
  			return -ENOMEM;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
473
474
475
476
  		for (i = 0; i < q->flows_cnt; i++) {
  			struct fq_codel_flow *flow = q->flows + i;
  
  			INIT_LIST_HEAD(&flow->flowchain);
b379135c4   Eric Dumazet   fq_codel: dont re...
477
  			codel_vars_init(&flow->cvars);
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
  		}
  	}
  	if (sch->limit >= 1)
  		sch->flags |= TCQ_F_CAN_BYPASS;
  	else
  		sch->flags &= ~TCQ_F_CAN_BYPASS;
  	return 0;
  }
  
  static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
  {
  	struct fq_codel_sched_data *q = qdisc_priv(sch);
  	struct nlattr *opts;
  
  	opts = nla_nest_start(skb, TCA_OPTIONS);
  	if (opts == NULL)
  		goto nla_put_failure;
  
  	if (nla_put_u32(skb, TCA_FQ_CODEL_TARGET,
  			codel_time_to_us(q->cparams.target)) ||
  	    nla_put_u32(skb, TCA_FQ_CODEL_LIMIT,
  			sch->limit) ||
  	    nla_put_u32(skb, TCA_FQ_CODEL_INTERVAL,
  			codel_time_to_us(q->cparams.interval)) ||
  	    nla_put_u32(skb, TCA_FQ_CODEL_ECN,
  			q->cparams.ecn) ||
  	    nla_put_u32(skb, TCA_FQ_CODEL_QUANTUM,
  			q->quantum) ||
9d18562a2   Eric Dumazet   fq_codel: add bat...
506
507
  	    nla_put_u32(skb, TCA_FQ_CODEL_DROP_BATCH_SIZE,
  			q->drop_batch_size) ||
95b58430a   Eric Dumazet   fq_codel: add mem...
508
509
  	    nla_put_u32(skb, TCA_FQ_CODEL_MEMORY_LIMIT,
  			q->memory_limit) ||
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
510
511
512
  	    nla_put_u32(skb, TCA_FQ_CODEL_FLOWS,
  			q->flows_cnt))
  		goto nla_put_failure;
80ba92fa1   Eric Dumazet   codel: add ce_thr...
513
514
515
516
  	if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD &&
  	    nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD,
  			codel_time_to_us(q->cparams.ce_threshold)))
  		goto nla_put_failure;
d59b7d805   Yang Yingliang   net_sched: return...
517
  	return nla_nest_end(skb, opts);
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
518
519
520
521
522
523
524
525
526
527
  
  nla_put_failure:
  	return -1;
  }
  
  static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
  {
  	struct fq_codel_sched_data *q = qdisc_priv(sch);
  	struct tc_fq_codel_xstats st = {
  		.type				= TCA_FQ_CODEL_XSTATS_QDISC,
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
528
529
  	};
  	struct list_head *pos;
669d67bf7   Sasha Levin   net: codel: fix b...
530
531
532
533
  	st.qdisc_stats.maxpacket = q->cstats.maxpacket;
  	st.qdisc_stats.drop_overlimit = q->drop_overlimit;
  	st.qdisc_stats.ecn_mark = q->cstats.ecn_mark;
  	st.qdisc_stats.new_flow_count = q->new_flow_count;
80ba92fa1   Eric Dumazet   codel: add ce_thr...
534
  	st.qdisc_stats.ce_mark = q->cstats.ce_mark;
95b58430a   Eric Dumazet   fq_codel: add mem...
535
536
  	st.qdisc_stats.memory_usage  = q->memory_usage;
  	st.qdisc_stats.drop_overmemory = q->drop_overmemory;
669d67bf7   Sasha Levin   net: codel: fix b...
537

edb09eb17   Eric Dumazet   net: sched: do no...
538
  	sch_tree_lock(sch);
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
539
540
541
542
543
  	list_for_each(pos, &q->new_flows)
  		st.qdisc_stats.new_flows_len++;
  
  	list_for_each(pos, &q->old_flows)
  		st.qdisc_stats.old_flows_len++;
edb09eb17   Eric Dumazet   net: sched: do no...
544
  	sch_tree_unlock(sch);
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
545
546
547
548
549
550
551
552
  
  	return gnet_stats_copy_app(d, &st, sizeof(st));
  }
  
  static struct Qdisc *fq_codel_leaf(struct Qdisc *sch, unsigned long arg)
  {
  	return NULL;
  }
143976ce9   WANG Cong   net_sched: remove...
553
  static unsigned long fq_codel_find(struct Qdisc *sch, u32 classid)
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
554
555
556
557
558
559
560
561
562
563
564
  {
  	return 0;
  }
  
  static unsigned long fq_codel_bind(struct Qdisc *sch, unsigned long parent,
  			      u32 classid)
  {
  	/* we cannot bypass queue discipline anymore */
  	sch->flags &= ~TCQ_F_CAN_BYPASS;
  	return 0;
  }
143976ce9   WANG Cong   net_sched: remove...
565
  static void fq_codel_unbind(struct Qdisc *q, unsigned long cl)
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
566
567
  {
  }
6529eaba3   Jiri Pirko   net: sched: intro...
568
  static struct tcf_block *fq_codel_tcf_block(struct Qdisc *sch, unsigned long cl)
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
569
570
571
572
573
  {
  	struct fq_codel_sched_data *q = qdisc_priv(sch);
  
  	if (cl)
  		return NULL;
6529eaba3   Jiri Pirko   net: sched: intro...
574
  	return q->block;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
  }
  
  static int fq_codel_dump_class(struct Qdisc *sch, unsigned long cl,
  			  struct sk_buff *skb, struct tcmsg *tcm)
  {
  	tcm->tcm_handle |= TC_H_MIN(cl);
  	return 0;
  }
  
  static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl,
  				     struct gnet_dump *d)
  {
  	struct fq_codel_sched_data *q = qdisc_priv(sch);
  	u32 idx = cl - 1;
  	struct gnet_stats_queue qs = { 0 };
  	struct tc_fq_codel_xstats xstats;
  
  	if (idx < q->flows_cnt) {
  		const struct fq_codel_flow *flow = &q->flows[idx];
edb09eb17   Eric Dumazet   net: sched: do no...
594
  		const struct sk_buff *skb;
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
  
  		memset(&xstats, 0, sizeof(xstats));
  		xstats.type = TCA_FQ_CODEL_XSTATS_CLASS;
  		xstats.class_stats.deficit = flow->deficit;
  		xstats.class_stats.ldelay =
  			codel_time_to_us(flow->cvars.ldelay);
  		xstats.class_stats.count = flow->cvars.count;
  		xstats.class_stats.lastcount = flow->cvars.lastcount;
  		xstats.class_stats.dropping = flow->cvars.dropping;
  		if (flow->cvars.dropping) {
  			codel_tdiff_t delta = flow->cvars.drop_next -
  					      codel_get_time();
  
  			xstats.class_stats.drop_next = (delta >= 0) ?
  				codel_time_to_us(delta) :
  				-codel_time_to_us(-delta);
  		}
edb09eb17   Eric Dumazet   net: sched: do no...
612
613
614
615
616
617
618
619
  		if (flow->head) {
  			sch_tree_lock(sch);
  			skb = flow->head;
  			while (skb) {
  				qs.qlen++;
  				skb = skb->next;
  			}
  			sch_tree_unlock(sch);
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
620
621
622
623
  		}
  		qs.backlog = q->backlogs[idx];
  		qs.drops = flow->dropped;
  	}
aafddbf0c   Eric Dumazet   fq_codel: return ...
624
  	if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
  		return -1;
  	if (idx < q->flows_cnt)
  		return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
  	return 0;
  }
  
  static void fq_codel_walk(struct Qdisc *sch, struct qdisc_walker *arg)
  {
  	struct fq_codel_sched_data *q = qdisc_priv(sch);
  	unsigned int i;
  
  	if (arg->stop)
  		return;
  
  	for (i = 0; i < q->flows_cnt; i++) {
  		if (list_empty(&q->flows[i].flowchain) ||
  		    arg->count < arg->skip) {
  			arg->count++;
  			continue;
  		}
  		if (arg->fn(sch, i + 1, arg) < 0) {
  			arg->stop = 1;
  			break;
  		}
  		arg->count++;
  	}
  }
  
  static const struct Qdisc_class_ops fq_codel_class_ops = {
  	.leaf		=	fq_codel_leaf,
143976ce9   WANG Cong   net_sched: remove...
655
  	.find		=	fq_codel_find,
6529eaba3   Jiri Pirko   net: sched: intro...
656
  	.tcf_block	=	fq_codel_tcf_block,
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
657
  	.bind_tcf	=	fq_codel_bind,
143976ce9   WANG Cong   net_sched: remove...
658
  	.unbind_tcf	=	fq_codel_unbind,
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
659
660
661
662
663
664
665
666
667
668
669
670
  	.dump		=	fq_codel_dump_class,
  	.dump_stats	=	fq_codel_dump_class_stats,
  	.walk		=	fq_codel_walk,
  };
  
  static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
  	.cl_ops		=	&fq_codel_class_ops,
  	.id		=	"fq_codel",
  	.priv_size	=	sizeof(struct fq_codel_sched_data),
  	.enqueue	=	fq_codel_enqueue,
  	.dequeue	=	fq_codel_dequeue,
  	.peek		=	qdisc_peek_dequeued,
4b549a2ef   Eric Dumazet   fq_codel: Fair Qu...
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
  	.init		=	fq_codel_init,
  	.reset		=	fq_codel_reset,
  	.destroy	=	fq_codel_destroy,
  	.change		=	fq_codel_change,
  	.dump		=	fq_codel_dump,
  	.dump_stats =	fq_codel_dump_stats,
  	.owner		=	THIS_MODULE,
  };
  
  static int __init fq_codel_module_init(void)
  {
  	return register_qdisc(&fq_codel_qdisc_ops);
  }
  
  static void __exit fq_codel_module_exit(void)
  {
  	unregister_qdisc(&fq_codel_qdisc_ops);
  }
  
  module_init(fq_codel_module_init)
  module_exit(fq_codel_module_exit)
  MODULE_AUTHOR("Eric Dumazet");
  MODULE_LICENSE("GPL");