Blame view

net/sched/sch_sfq.c 22.5 KB
2874c5fd2   Thomas Gleixner   treewide: Replace...
1
  // SPDX-License-Identifier: GPL-2.0-or-later
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
  /*
   * net/sched/sch_sfq.c	Stochastic Fairness Queueing discipline.
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
5
6
   * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
7
  #include <linux/module.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
8
9
10
11
  #include <linux/types.h>
  #include <linux/kernel.h>
  #include <linux/jiffies.h>
  #include <linux/string.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
  #include <linux/in.h>
  #include <linux/errno.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
  #include <linux/init.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
15
  #include <linux/skbuff.h>
55667441c   Eric Dumazet   net/flow_dissecto...
16
  #include <linux/siphash.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
17
  #include <linux/slab.h>
817fb15df   Eric Dumazet   net_sched: sfq: a...
18
  #include <linux/vmalloc.h>
0ba480538   Patrick McHardy   [NET_SCHED]: Remo...
19
  #include <net/netlink.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
  #include <net/pkt_sched.h>
cf1facda2   Jiri Pirko   sched: move tcf_p...
21
  #include <net/pkt_cls.h>
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
22
  #include <net/red.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
  
  
  /*	Stochastic Fairness Queuing algorithm.
  	=======================================
  
  	Source:
  	Paul E. McKenney "Stochastic Fairness Queuing",
  	IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
  
  	Paul E. McKenney "Stochastic Fairness Queuing",
  	"Interworking: Research and Experience", v.2, 1991, p.113-131.
  
  
  	See also:
  	M. Shreedhar and George Varghese "Efficient Fair
  	Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
10297b993   YOSHIFUJI Hideaki   [NET] SCHED: Fix ...
39
  	This is not the thing that is usually called (W)FQ nowadays.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
40
41
42
43
44
45
46
47
  	It does not use any timestamp mechanism, but instead
  	processes queues in round-robin order.
  
  	ADVANTAGE:
  
  	- It is very cheap. Both CPU and memory requirements are minimal.
  
  	DRAWBACKS:
10297b993   YOSHIFUJI Hideaki   [NET] SCHED: Fix ...
48
  	- "Stochastic" -> It is not 100% fair.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
49
50
51
52
53
54
55
56
57
58
59
60
61
  	When hash collisions occur, several flows are considered as one.
  
  	- "Round-robin" -> It introduces larger delays than virtual clock
  	based schemes, and should not be used for isolating interactive
  	traffic	from non-interactive. It means, that this scheduler
  	should be used as leaf of CBQ or P3, which put interactive traffic
  	to higher priority band.
  
  	We still need true WFQ for top level CSZ, but using WFQ
  	for the best effort traffic is absolutely pointless:
  	SFQ is superior for this purpose.
  
  	IMPLEMENTATION:
18cb80985   Eric Dumazet   net_sched: sfq: e...
62
63
64
65
66
  	This implementation limits :
  	- maximal queue length per flow to 127 packets.
  	- max mtu to 2^18-1;
  	- max 65408 flows,
  	- number of hash buckets to 65536.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
67
68
  
  	It is easy to increase these values, but not in flight.  */
18cb80985   Eric Dumazet   net_sched: sfq: e...
69
70
71
72
  #define SFQ_MAX_DEPTH		127 /* max number of packets per flow */
  #define SFQ_DEFAULT_FLOWS	128
  #define SFQ_MAX_FLOWS		(0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */
  #define SFQ_EMPTY_SLOT		0xffff
817fb15df   Eric Dumazet   net_sched: sfq: a...
73
  #define SFQ_DEFAULT_HASH_DIVISOR 1024
eeaeb068f   Eric Dumazet   sch_sfq: allow bi...
74
75
76
77
78
  /* We use 16 bits to store allot, and want to handle packets up to 64K
   * Scale allot by 8 (1<<3) so that no overflow occurs.
   */
  #define SFQ_ALLOT_SHIFT		3
  #define SFQ_ALLOT_SIZE(X)	DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
79

18cb80985   Eric Dumazet   net_sched: sfq: e...
80
81
  /* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
  typedef u16 sfq_index;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
82

eda83e3b6   Eric Dumazet   net_sched: sch_sf...
83
84
  /*
   * We dont use pointers to save space.
18cb80985   Eric Dumazet   net_sched: sfq: e...
85
86
   * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array
   * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH]
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
87
88
   * are 'pointers' to dep[] array
   */
cc7ec456f   Eric Dumazet   net_sched: cleanups
89
  struct sfq_head {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
90
91
92
  	sfq_index	next;
  	sfq_index	prev;
  };
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
93
94
95
96
  struct sfq_slot {
  	struct sk_buff	*skblist_next;
  	struct sk_buff	*skblist_prev;
  	sfq_index	qlen; /* number of skbs in skblist */
18cb80985   Eric Dumazet   net_sched: sfq: e...
97
  	sfq_index	next; /* next slot in sfq RR chain */
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
98
99
100
  	struct sfq_head dep; /* anchor in dep[] chains */
  	unsigned short	hash; /* hash value (index in ht[]) */
  	short		allot; /* credit for this slot */
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
101
102
103
  
  	unsigned int    backlog;
  	struct red_vars vars;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
104
  };
cc7ec456f   Eric Dumazet   net_sched: cleanups
105
  struct sfq_sched_data {
18cb80985   Eric Dumazet   net_sched: sfq: e...
106
107
  /* frequently used fields */
  	int		limit;		/* limit of total number of packets in this qdisc */
817fb15df   Eric Dumazet   net_sched: sfq: a...
108
  	unsigned int	divisor;	/* number of slots in hash table */
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
109
110
  	u8		headdrop;
  	u8		maxdepth;	/* limit of packets per flow */
18cb80985   Eric Dumazet   net_sched: sfq: e...
111

55667441c   Eric Dumazet   net/flow_dissecto...
112
  	siphash_key_t 	perturbation;
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
113
114
  	u8		cur_depth;	/* depth of longest slot */
  	u8		flags;
eeaeb068f   Eric Dumazet   sch_sfq: allow bi...
115
  	unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
25d8c0d55   John Fastabend   net: rcu-ify tcf_...
116
  	struct tcf_proto __rcu *filter_list;
6529eaba3   Jiri Pirko   net: sched: intro...
117
  	struct tcf_block *block;
18cb80985   Eric Dumazet   net_sched: sfq: e...
118
119
  	sfq_index	*ht;		/* Hash table ('divisor' slots) */
  	struct sfq_slot	*slots;		/* Flows table ('maxflows' entries) */
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
120
121
122
  	struct red_parms *red_parms;
  	struct tc_sfqred_stats stats;
  	struct sfq_slot *tail;		/* current slot in round */
18cb80985   Eric Dumazet   net_sched: sfq: e...
123
124
125
126
127
128
  	struct sfq_head	dep[SFQ_MAX_DEPTH + 1];
  					/* Linked lists of slots, indexed by depth
  					 * dep[0] : list of unused flows
  					 * dep[1] : list of flows with 1 packet
  					 * dep[X] : list of flows with X packets
  					 */
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
129
  	unsigned int	maxflows;	/* number of flows in flows array */
18cb80985   Eric Dumazet   net_sched: sfq: e...
130
131
132
  	int		perturb_period;
  	unsigned int	quantum;	/* Allotment per round: MUST BE >= MTU */
  	struct timer_list perturb_timer;
cdeabbb88   Kees Cook   net: sched: Conve...
133
  	struct Qdisc	*sch;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
134
  };
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
135
136
137
138
139
  /*
   * sfq_head are either in a sfq_slot or in dep[] array
   */
  static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
  {
18cb80985   Eric Dumazet   net_sched: sfq: e...
140
  	if (val < SFQ_MAX_FLOWS)
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
141
  		return &q->slots[val].dep;
18cb80985   Eric Dumazet   net_sched: sfq: e...
142
  	return &q->dep[val - SFQ_MAX_FLOWS];
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
143
  }
11fca931d   Eric Dumazet   sch_sfq: use skb_...
144
145
  static unsigned int sfq_hash(const struct sfq_sched_data *q,
  			     const struct sk_buff *skb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
146
  {
55667441c   Eric Dumazet   net/flow_dissecto...
147
  	return skb_get_hash_perturb(skb, &q->perturbation) & (q->divisor - 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
148
  }
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
149
150
151
152
153
  static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
  				 int *qerr)
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
  	struct tcf_result res;
25d8c0d55   John Fastabend   net: rcu-ify tcf_...
154
  	struct tcf_proto *fl;
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
155
156
157
158
  	int result;
  
  	if (TC_H_MAJ(skb->priority) == sch->handle &&
  	    TC_H_MIN(skb->priority) > 0 &&
817fb15df   Eric Dumazet   net_sched: sfq: a...
159
  	    TC_H_MIN(skb->priority) <= q->divisor)
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
160
  		return TC_H_MIN(skb->priority);
25d8c0d55   John Fastabend   net: rcu-ify tcf_...
161
  	fl = rcu_dereference_bh(q->filter_list);
ada1dba04   Tom Herbert   sched: Call skb_g...
162
  	if (!fl)
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
163
  		return sfq_hash(q, skb) + 1;
c27f339af   Jarek Poplawski   net_sched: Add qd...
164
  	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
87d83093b   Jiri Pirko   net: sched: move ...
165
  	result = tcf_classify(skb, fl, &res, false);
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
166
167
168
169
170
  	if (result >= 0) {
  #ifdef CONFIG_NET_CLS_ACT
  		switch (result) {
  		case TC_ACT_STOLEN:
  		case TC_ACT_QUEUED:
e25ea21ff   Jiri Pirko   net: sched: intro...
171
  		case TC_ACT_TRAP:
378a2f090   Jarek Poplawski   net_sched: Add qd...
172
  			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
f3ae608ed   Gustavo A. R. Silva   net: sched: mark ...
173
  			/* fall through */
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
174
175
176
177
  		case TC_ACT_SHOT:
  			return 0;
  		}
  #endif
817fb15df   Eric Dumazet   net_sched: sfq: a...
178
  		if (TC_H_MIN(res.classid) <= q->divisor)
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
179
180
181
182
  			return TC_H_MIN(res.classid);
  	}
  	return 0;
  }
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
183
  /*
18cb80985   Eric Dumazet   net_sched: sfq: e...
184
   * x : slot number [0 .. SFQ_MAX_FLOWS - 1]
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
185
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
186
187
188
  static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
  {
  	sfq_index p, n;
18cb80985   Eric Dumazet   net_sched: sfq: e...
189
190
  	struct sfq_slot *slot = &q->slots[x];
  	int qlen = slot->qlen;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
191

18cb80985   Eric Dumazet   net_sched: sfq: e...
192
  	p = qlen + SFQ_MAX_FLOWS;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
193
  	n = q->dep[qlen].next;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
194

18cb80985   Eric Dumazet   net_sched: sfq: e...
195
196
  	slot->dep.next = n;
  	slot->dep.prev = p;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
197
198
199
  
  	q->dep[qlen].next = x;		/* sfq_dep_head(q, p)->next = x */
  	sfq_dep_head(q, n)->prev = x;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
200
  }
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
201
  #define sfq_unlink(q, x, n, p)			\
fa08943b9   Yang Yingliang   net_sched: sfq: p...
202
203
204
205
206
207
  	do {					\
  		n = q->slots[x].dep.next;	\
  		p = q->slots[x].dep.prev;	\
  		sfq_dep_head(q, p)->next = n;	\
  		sfq_dep_head(q, n)->prev = p;	\
  	} while (0)
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
208

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
209
210
211
  static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
  {
  	sfq_index p, n;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
212
  	int d;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
213

eda83e3b6   Eric Dumazet   net_sched: sch_sf...
214
  	sfq_unlink(q, x, n, p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
215

eda83e3b6   Eric Dumazet   net_sched: sch_sf...
216
217
218
  	d = q->slots[x].qlen--;
  	if (n == p && q->cur_depth == d)
  		q->cur_depth--;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
219
220
221
222
223
224
225
  	sfq_link(q, x);
  }
  
  static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
  {
  	sfq_index p, n;
  	int d;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
226
  	sfq_unlink(q, x, n, p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
227

eda83e3b6   Eric Dumazet   net_sched: sch_sf...
228
229
230
  	d = ++q->slots[x].qlen;
  	if (q->cur_depth < d)
  		q->cur_depth = d;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
231
232
  	sfq_link(q, x);
  }
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
233
234
235
236
237
238
239
240
  /* helper functions : might be changed when/if skb use a standard list_head */
  
  /* remove one skb from tail of slot queue */
  static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
  {
  	struct sk_buff *skb = slot->skblist_prev;
  
  	slot->skblist_prev = skb->prev;
ee09b3c1c   Eric Dumazet   sfq: fix sfq clas...
241
  	skb->prev->next = (struct sk_buff *)slot;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
242
243
244
245
246
247
248
249
250
251
  	skb->next = skb->prev = NULL;
  	return skb;
  }
  
  /* remove one skb from head of slot queue */
  static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
  {
  	struct sk_buff *skb = slot->skblist_next;
  
  	slot->skblist_next = skb->next;
18c8d82ae   Eric Dumazet   sfq: fix slot_deq...
252
  	skb->next->prev = (struct sk_buff *)slot;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
253
254
255
256
257
258
  	skb->next = skb->prev = NULL;
  	return skb;
  }
  
  static inline void slot_queue_init(struct sfq_slot *slot)
  {
18cb80985   Eric Dumazet   net_sched: sfq: e...
259
  	memset(slot, 0, sizeof(*slot));
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
260
261
262
263
264
265
266
267
268
269
270
  	slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
  }
  
  /* add skb to slot queue (tail add) */
  static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
  {
  	skb->prev = slot->skblist_prev;
  	skb->next = (struct sk_buff *)slot;
  	slot->skblist_prev->next = skb;
  	slot->skblist_prev = skb;
  }
f9ab7425b   Gao Feng   sched: sfq: drop ...
271
  static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
272
273
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
274
  	sfq_index x, d = q->cur_depth;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
275
276
  	struct sk_buff *skb;
  	unsigned int len;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
277
  	struct sfq_slot *slot;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
278

eda83e3b6   Eric Dumazet   net_sched: sch_sf...
279
  	/* Queue is full! Find the longest slot and drop tail packet from it */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
280
  	if (d > 1) {
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
281
282
283
  		x = q->dep[d].next;
  		slot = &q->slots[x];
  drop:
18cb80985   Eric Dumazet   net_sched: sfq: e...
284
  		skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot);
0abf77e55   Jussi Kivilinna   net_sched: Add ac...
285
  		len = qdisc_pkt_len(skb);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
286
  		slot->backlog -= len;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
287
288
  		sfq_dec(q, x);
  		sch->q.qlen--;
25331d6ce   John Fastabend   net: sched: imple...
289
  		qdisc_qstats_backlog_dec(sch, skb);
f9ab7425b   Gao Feng   sched: sfq: drop ...
290
  		qdisc_drop(skb, sch, to_free);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
291
292
293
294
295
  		return len;
  	}
  
  	if (d == 1) {
  		/* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
296
297
298
299
300
  		x = q->tail->next;
  		slot = &q->slots[x];
  		q->tail->next = slot->next;
  		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
  		goto drop;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
301
302
303
304
  	}
  
  	return 0;
  }
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
  /* Is ECN parameter configured */
  static int sfq_prob_mark(const struct sfq_sched_data *q)
  {
  	return q->flags & TC_RED_ECN;
  }
  
  /* Should packets over max threshold just be marked */
  static int sfq_hard_mark(const struct sfq_sched_data *q)
  {
  	return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN;
  }
  
  static int sfq_headdrop(const struct sfq_sched_data *q)
  {
  	return q->headdrop;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
321
  static int
520ac30f4   Eric Dumazet   net_sched: drop p...
322
  sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
323
324
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
2ccccf5fb   WANG Cong   net_sched: update...
325
  	unsigned int hash, dropped;
8efa88540   Eric Dumazet   sch_sfq: avoid gi...
326
  	sfq_index x, qlen;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
327
  	struct sfq_slot *slot;
7f3ff4f63   Jarek Poplawski   pkt_sched: Annota...
328
  	int uninitialized_var(ret);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
329
330
  	struct sk_buff *head;
  	int delta;
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
331
332
333
  
  	hash = sfq_classify(skb, sch, &ret);
  	if (hash == 0) {
c27f339af   Jarek Poplawski   net_sched: Add qd...
334
  		if (ret & __NET_XMIT_BYPASS)
25331d6ce   John Fastabend   net: sched: imple...
335
  			qdisc_qstats_drop(sch);
f9ab7425b   Gao Feng   sched: sfq: drop ...
336
  		__qdisc_drop(skb, to_free);
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
337
338
339
  		return ret;
  	}
  	hash--;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
340
341
  
  	x = q->ht[hash];
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
342
343
344
  	slot = &q->slots[x];
  	if (x == SFQ_EMPTY_SLOT) {
  		x = q->dep[0].next; /* get a free slot */
18cb80985   Eric Dumazet   net_sched: sfq: e...
345
  		if (x >= SFQ_MAX_FLOWS)
520ac30f4   Eric Dumazet   net_sched: drop p...
346
  			return qdisc_drop(skb, sch, to_free);
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
347
348
349
  		q->ht[hash] = x;
  		slot = &q->slots[x];
  		slot->hash = hash;
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
  		slot->backlog = 0; /* should already be 0 anyway... */
  		red_set_vars(&slot->vars);
  		goto enqueue;
  	}
  	if (q->red_parms) {
  		slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms,
  							&slot->vars,
  							slot->backlog);
  		switch (red_action(q->red_parms,
  				   &slot->vars,
  				   slot->vars.qavg)) {
  		case RED_DONT_MARK:
  			break;
  
  		case RED_PROB_MARK:
25331d6ce   John Fastabend   net: sched: imple...
365
  			qdisc_qstats_overlimit(sch);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
  			if (sfq_prob_mark(q)) {
  				/* We know we have at least one packet in queue */
  				if (sfq_headdrop(q) &&
  				    INET_ECN_set_ce(slot->skblist_next)) {
  					q->stats.prob_mark_head++;
  					break;
  				}
  				if (INET_ECN_set_ce(skb)) {
  					q->stats.prob_mark++;
  					break;
  				}
  			}
  			q->stats.prob_drop++;
  			goto congestion_drop;
  
  		case RED_HARD_MARK:
25331d6ce   John Fastabend   net: sched: imple...
382
  			qdisc_qstats_overlimit(sch);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
  			if (sfq_hard_mark(q)) {
  				/* We know we have at least one packet in queue */
  				if (sfq_headdrop(q) &&
  				    INET_ECN_set_ce(slot->skblist_next)) {
  					q->stats.forced_mark_head++;
  					break;
  				}
  				if (INET_ECN_set_ce(skb)) {
  					q->stats.forced_mark++;
  					break;
  				}
  			}
  			q->stats.forced_drop++;
  			goto congestion_drop;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
398
  	}
6f9e98f7a   Stephen Hemminger   [PKT_SCHED] SFQ: ...
399

18cb80985   Eric Dumazet   net_sched: sfq: e...
400
  	if (slot->qlen >= q->maxdepth) {
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
401
402
  congestion_drop:
  		if (!sfq_headdrop(q))
520ac30f4   Eric Dumazet   net_sched: drop p...
403
  			return qdisc_drop(skb, sch, to_free);
18cb80985   Eric Dumazet   net_sched: sfq: e...
404

ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
405
  		/* We know we have at least one packet in queue */
18cb80985   Eric Dumazet   net_sched: sfq: e...
406
  		head = slot_dequeue_head(slot);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
407
408
409
  		delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb);
  		sch->qstats.backlog -= delta;
  		slot->backlog -= delta;
520ac30f4   Eric Dumazet   net_sched: drop p...
410
  		qdisc_drop(head, sch, to_free);
18cb80985   Eric Dumazet   net_sched: sfq: e...
411

18cb80985   Eric Dumazet   net_sched: sfq: e...
412
  		slot_queue_add(slot, skb);
325d5dc3f   Konstantin Khlebnikov   net_sched/sfq: up...
413
  		qdisc_tree_reduce_backlog(sch, 0, delta);
18cb80985   Eric Dumazet   net_sched: sfq: e...
414
415
  		return NET_XMIT_CN;
  	}
32740ddc1   Alexey Kuznetsov   [SFQ]: Remove art...
416

ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
417
  enqueue:
25331d6ce   John Fastabend   net: sched: imple...
418
  	qdisc_qstats_backlog_inc(sch, skb);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
419
  	slot->backlog += qdisc_pkt_len(skb);
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
420
  	slot_queue_add(slot, skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
421
  	sfq_inc(q, x);
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
422
423
424
  	if (slot->qlen == 1) {		/* The flow is new */
  		if (q->tail == NULL) {	/* It is the first flow */
  			slot->next = x;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
425
  		} else {
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
426
427
  			slot->next = q->tail->next;
  			q->tail->next = x;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
428
  		}
cc34eb672   Eric Dumazet   sch_sfq: revert d...
429
430
431
432
433
  		/* We put this flow at the end of our flow list.
  		 * This might sound unfair for a new flow to wait after old ones,
  		 * but we could endup servicing new flows only, and freeze old ones.
  		 */
  		q->tail = slot;
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
434
  		/* We could use a bigger initial quantum for new flows */
eeaeb068f   Eric Dumazet   sch_sfq: allow bi...
435
  		slot->allot = q->scaled_quantum;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
436
  	}
9190b3b32   Eric Dumazet   net_sched: accura...
437
  	if (++sch->q.qlen <= q->limit)
9871e50ed   Ben Greear   net: Use NET_XMIT...
438
  		return NET_XMIT_SUCCESS;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
439

8efa88540   Eric Dumazet   sch_sfq: avoid gi...
440
  	qlen = slot->qlen;
f9ab7425b   Gao Feng   sched: sfq: drop ...
441
  	dropped = sfq_drop(sch, to_free);
8efa88540   Eric Dumazet   sch_sfq: avoid gi...
442
443
444
  	/* Return Congestion Notification only if we dropped a packet
  	 * from this flow.
  	 */
325d5dc3f   Konstantin Khlebnikov   net_sched/sfq: up...
445
446
  	if (qlen != slot->qlen) {
  		qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb));
e1738bd9c   Eric Dumazet   sch_sfq: fix sfq_...
447
  		return NET_XMIT_CN;
325d5dc3f   Konstantin Khlebnikov   net_sched/sfq: up...
448
  	}
e1738bd9c   Eric Dumazet   sch_sfq: fix sfq_...
449
450
  
  	/* As we dropped a packet, better let upper stack know this */
2ccccf5fb   WANG Cong   net_sched: update...
451
  	qdisc_tree_reduce_backlog(sch, 1, dropped);
e1738bd9c   Eric Dumazet   sch_sfq: fix sfq_...
452
  	return NET_XMIT_SUCCESS;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
453
  }
48a8f519e   Patrick McHardy   pkt_sched: Add ->...
454
  static struct sk_buff *
6f9e98f7a   Stephen Hemminger   [PKT_SCHED] SFQ: ...
455
  sfq_dequeue(struct Qdisc *sch)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
456
457
458
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
  	struct sk_buff *skb;
aa3e21999   Eric Dumazet   net_sched: sch_sf...
459
  	sfq_index a, next_a;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
460
  	struct sfq_slot *slot;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
461
462
  
  	/* No active slots */
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
463
  	if (q->tail == NULL)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
464
  		return NULL;
eeaeb068f   Eric Dumazet   sch_sfq: allow bi...
465
  next_slot:
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
466
467
  	a = q->tail->next;
  	slot = &q->slots[a];
eeaeb068f   Eric Dumazet   sch_sfq: allow bi...
468
469
470
471
472
  	if (slot->allot <= 0) {
  		q->tail = slot;
  		slot->allot += q->scaled_quantum;
  		goto next_slot;
  	}
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
473
  	skb = slot_dequeue_head(slot);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
474
  	sfq_dec(q, a);
9190b3b32   Eric Dumazet   net_sched: accura...
475
  	qdisc_bstats_update(sch, skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
476
  	sch->q.qlen--;
25331d6ce   John Fastabend   net: sched: imple...
477
  	qdisc_qstats_backlog_dec(sch, skb);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
478
  	slot->backlog -= qdisc_pkt_len(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
479
  	/* Is the slot empty? */
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
480
481
482
  	if (slot->qlen == 0) {
  		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
  		next_a = slot->next;
aa3e21999   Eric Dumazet   net_sched: sch_sf...
483
  		if (a == next_a) {
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
484
  			q->tail = NULL; /* no more active slots */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
485
486
  			return skb;
  		}
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
487
  		q->tail->next = next_a;
eeaeb068f   Eric Dumazet   sch_sfq: allow bi...
488
489
  	} else {
  		slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
490
491
492
493
494
  	}
  	return skb;
  }
  
  static void
6f9e98f7a   Stephen Hemminger   [PKT_SCHED] SFQ: ...
495
  sfq_reset(struct Qdisc *sch)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
496
497
498
499
  {
  	struct sk_buff *skb;
  
  	while ((skb = sfq_dequeue(sch)) != NULL)
fea024784   Eric Dumazet   net_sched: sch_fq...
500
  		rtnl_kfree_skbs(skb, skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
501
  }
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
502
503
504
505
506
507
  /*
   * When q->perturbation is changed, we rehash all queued skbs
   * to avoid OOO (Out Of Order) effects.
   * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
   * counters.
   */
18cb80985   Eric Dumazet   net_sched: sfq: e...
508
  static void sfq_rehash(struct Qdisc *sch)
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
509
  {
18cb80985   Eric Dumazet   net_sched: sfq: e...
510
  	struct sfq_sched_data *q = qdisc_priv(sch);
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
511
512
513
514
  	struct sk_buff *skb;
  	int i;
  	struct sfq_slot *slot;
  	struct sk_buff_head list;
18cb80985   Eric Dumazet   net_sched: sfq: e...
515
  	int dropped = 0;
2ccccf5fb   WANG Cong   net_sched: update...
516
  	unsigned int drop_len = 0;
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
517
518
  
  	__skb_queue_head_init(&list);
18cb80985   Eric Dumazet   net_sched: sfq: e...
519
  	for (i = 0; i < q->maxflows; i++) {
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
520
521
522
523
524
525
526
527
  		slot = &q->slots[i];
  		if (!slot->qlen)
  			continue;
  		while (slot->qlen) {
  			skb = slot_dequeue_head(slot);
  			sfq_dec(q, i);
  			__skb_queue_tail(&list, skb);
  		}
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
528
529
  		slot->backlog = 0;
  		red_set_vars(&slot->vars);
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
530
531
532
533
534
535
536
537
538
539
540
  		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
  	}
  	q->tail = NULL;
  
  	while ((skb = __skb_dequeue(&list)) != NULL) {
  		unsigned int hash = sfq_hash(q, skb);
  		sfq_index x = q->ht[hash];
  
  		slot = &q->slots[x];
  		if (x == SFQ_EMPTY_SLOT) {
  			x = q->dep[0].next; /* get a free slot */
18cb80985   Eric Dumazet   net_sched: sfq: e...
541
  			if (x >= SFQ_MAX_FLOWS) {
25331d6ce   John Fastabend   net: sched: imple...
542
543
  drop:
  				qdisc_qstats_backlog_dec(sch, skb);
2ccccf5fb   WANG Cong   net_sched: update...
544
  				drop_len += qdisc_pkt_len(skb);
18cb80985   Eric Dumazet   net_sched: sfq: e...
545
546
547
548
  				kfree_skb(skb);
  				dropped++;
  				continue;
  			}
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
549
550
551
552
  			q->ht[hash] = x;
  			slot = &q->slots[x];
  			slot->hash = hash;
  		}
18cb80985   Eric Dumazet   net_sched: sfq: e...
553
554
  		if (slot->qlen >= q->maxdepth)
  			goto drop;
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
555
  		slot_queue_add(slot, skb);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
556
557
558
559
560
  		if (q->red_parms)
  			slot->vars.qavg = red_calc_qavg(q->red_parms,
  							&slot->vars,
  							slot->backlog);
  		slot->backlog += qdisc_pkt_len(skb);
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
561
562
563
564
565
566
567
568
569
570
571
572
  		sfq_inc(q, x);
  		if (slot->qlen == 1) {		/* The flow is new */
  			if (q->tail == NULL) {	/* It is the first flow */
  				slot->next = x;
  			} else {
  				slot->next = q->tail->next;
  				q->tail->next = x;
  			}
  			q->tail = slot;
  			slot->allot = q->scaled_quantum;
  		}
  	}
18cb80985   Eric Dumazet   net_sched: sfq: e...
573
  	sch->q.qlen -= dropped;
2ccccf5fb   WANG Cong   net_sched: update...
574
  	qdisc_tree_reduce_backlog(sch, dropped, drop_len);
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
575
  }
cdeabbb88   Kees Cook   net: sched: Conve...
576
  static void sfq_perturbation(struct timer_list *t)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
577
  {
cdeabbb88   Kees Cook   net: sched: Conve...
578
579
  	struct sfq_sched_data *q = from_timer(q, t, perturb_timer);
  	struct Qdisc *sch = q->sch;
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
580
  	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
55667441c   Eric Dumazet   net/flow_dissecto...
581
  	siphash_key_t nkey;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
582

55667441c   Eric Dumazet   net/flow_dissecto...
583
  	get_random_bytes(&nkey, sizeof(nkey));
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
584
  	spin_lock(root_lock);
55667441c   Eric Dumazet   net/flow_dissecto...
585
  	q->perturbation = nkey;
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
586
  	if (!q->filter_list && q->tail)
18cb80985   Eric Dumazet   net_sched: sfq: e...
587
  		sfq_rehash(sch);
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
588
  	spin_unlock(root_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
589

32740ddc1   Alexey Kuznetsov   [SFQ]: Remove art...
590
591
  	if (q->perturb_period)
  		mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
592
  }
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
593
  static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
594
595
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
596
  	struct tc_sfq_qopt *ctl = nla_data(opt);
18cb80985   Eric Dumazet   net_sched: sfq: e...
597
  	struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
2ccccf5fb   WANG Cong   net_sched: update...
598
  	unsigned int qlen, dropped = 0;
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
599
  	struct red_parms *p = NULL;
f9ab7425b   Gao Feng   sched: sfq: drop ...
600
601
  	struct sk_buff *to_free = NULL;
  	struct sk_buff *tail = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
602

1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
603
  	if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
604
  		return -EINVAL;
18cb80985   Eric Dumazet   net_sched: sfq: e...
605
606
  	if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
  		ctl_v1 = nla_data(opt);
119b3d386   stephen hemminger   sfq: deadlock in ...
607
608
609
  	if (ctl->divisor &&
  	    (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
  		return -EINVAL;
f2d581951   Eric Dumazet   sch_sfq: validate...
610
611
612
613
614
615
616
617
  
  	/* slot->allot is a short, make sure quantum is not too big. */
  	if (ctl->quantum) {
  		unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum);
  
  		if (scaled <= 0 || scaled > SHRT_MAX)
  			return -EINVAL;
  	}
8afa10cbe   Nogah Frankel   net_sched: red: A...
618
619
620
  	if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
  					ctl_v1->Wlog))
  		return -EINVAL;
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
621
622
623
624
625
  	if (ctl_v1 && ctl_v1->qth_min) {
  		p = kmalloc(sizeof(*p), GFP_KERNEL);
  		if (!p)
  			return -ENOMEM;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
626
  	sch_tree_lock(sch);
18cb80985   Eric Dumazet   net_sched: sfq: e...
627
628
629
630
  	if (ctl->quantum) {
  		q->quantum = ctl->quantum;
  		q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
  	}
6f9e98f7a   Stephen Hemminger   [PKT_SCHED] SFQ: ...
631
  	q->perturb_period = ctl->perturb_period * HZ;
18cb80985   Eric Dumazet   net_sched: sfq: e...
632
633
634
  	if (ctl->flows)
  		q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
  	if (ctl->divisor) {
817fb15df   Eric Dumazet   net_sched: sfq: a...
635
  		q->divisor = ctl->divisor;
18cb80985   Eric Dumazet   net_sched: sfq: e...
636
637
638
639
640
  		q->maxflows = min_t(u32, q->maxflows, q->divisor);
  	}
  	if (ctl_v1) {
  		if (ctl_v1->depth)
  			q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
641
642
643
644
645
646
647
648
649
650
  		if (p) {
  			swap(q->red_parms, p);
  			red_set_parms(q->red_parms,
  				      ctl_v1->qth_min, ctl_v1->qth_max,
  				      ctl_v1->Wlog,
  				      ctl_v1->Plog, ctl_v1->Scell_log,
  				      NULL,
  				      ctl_v1->max_P);
  		}
  		q->flags = ctl_v1->flags;
18cb80985   Eric Dumazet   net_sched: sfq: e...
651
652
653
654
655
656
  		q->headdrop = ctl_v1->headdrop;
  	}
  	if (ctl->limit) {
  		q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
  		q->maxflows = min_t(u32, q->maxflows, q->limit);
  	}
5e50da01d   Patrick McHardy   [NET_SCHED]: Fix ...
657
  	qlen = sch->q.qlen;
f9ab7425b   Gao Feng   sched: sfq: drop ...
658
659
660
661
662
663
664
  	while (sch->q.qlen > q->limit) {
  		dropped += sfq_drop(sch, &to_free);
  		if (!tail)
  			tail = to_free;
  	}
  
  	rtnl_kfree_skbs(to_free, tail);
2ccccf5fb   WANG Cong   net_sched: update...
665
  	qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
666
667
668
  
  	del_timer(&q->perturb_timer);
  	if (q->perturb_period) {
32740ddc1   Alexey Kuznetsov   [SFQ]: Remove art...
669
  		mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
55667441c   Eric Dumazet   net/flow_dissecto...
670
  		get_random_bytes(&q->perturbation, sizeof(q->perturbation));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
671
672
  	}
  	sch_tree_unlock(sch);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
673
  	kfree(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
674
675
  	return 0;
  }
bd16a6cce   Eric Dumazet   net_sched: sfq: f...
676
677
  static void *sfq_alloc(size_t sz)
  {
752ade68c   Michal Hocko   treewide: use kv[...
678
  	return  kvmalloc(sz, GFP_KERNEL);
bd16a6cce   Eric Dumazet   net_sched: sfq: f...
679
680
681
682
  }
  
  static void sfq_free(void *addr)
  {
4cb28970a   WANG Cong   net: use the new ...
683
  	kvfree(addr);
bd16a6cce   Eric Dumazet   net_sched: sfq: f...
684
685
686
687
688
  }
  
  static void sfq_destroy(struct Qdisc *sch)
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
6529eaba3   Jiri Pirko   net: sched: intro...
689
  	tcf_block_put(q->block);
bd16a6cce   Eric Dumazet   net_sched: sfq: f...
690
691
692
  	q->perturb_period = 0;
  	del_timer_sync(&q->perturb_timer);
  	sfq_free(q->ht);
18cb80985   Eric Dumazet   net_sched: sfq: e...
693
  	sfq_free(q->slots);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
694
  	kfree(q->red_parms);
bd16a6cce   Eric Dumazet   net_sched: sfq: f...
695
  }
e63d7dfd2   Alexander Aring   net: sched: sch: ...
696
697
  static int sfq_init(struct Qdisc *sch, struct nlattr *opt,
  		    struct netlink_ext_ack *extack)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
698
699
700
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
  	int i;
6529eaba3   Jiri Pirko   net: sched: intro...
701
  	int err;
f85729d07   Paolo Abeni   sch_sfq: fix null...
702
  	q->sch = sch;
cdeabbb88   Kees Cook   net: sched: Conve...
703
  	timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE);
e23265766   Nikolay Aleksandrov   sch_sfq: fix null...
704

8d1a77f97   Alexander Aring   net: sch: api: ad...
705
  	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
6529eaba3   Jiri Pirko   net: sched: intro...
706
707
  	if (err)
  		return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
708

18cb80985   Eric Dumazet   net_sched: sfq: e...
709
710
711
  	for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
  		q->dep[i].next = i + SFQ_MAX_FLOWS;
  		q->dep[i].prev = i + SFQ_MAX_FLOWS;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
712
  	}
6f9e98f7a   Stephen Hemminger   [PKT_SCHED] SFQ: ...
713

18cb80985   Eric Dumazet   net_sched: sfq: e...
714
715
  	q->limit = SFQ_MAX_DEPTH;
  	q->maxdepth = SFQ_MAX_DEPTH;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
716
717
  	q->cur_depth = 0;
  	q->tail = NULL;
817fb15df   Eric Dumazet   net_sched: sfq: a...
718
  	q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
18cb80985   Eric Dumazet   net_sched: sfq: e...
719
  	q->maxflows = SFQ_DEFAULT_FLOWS;
02a9098ed   Eric Dumazet   net_sched: sfq: a...
720
721
722
  	q->quantum = psched_mtu(qdisc_dev(sch));
  	q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
  	q->perturb_period = 0;
55667441c   Eric Dumazet   net/flow_dissecto...
723
  	get_random_bytes(&q->perturbation, sizeof(q->perturbation));
02a9098ed   Eric Dumazet   net_sched: sfq: a...
724
725
  
  	if (opt) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
726
727
728
729
  		int err = sfq_change(sch, opt);
  		if (err)
  			return err;
  	}
6f9e98f7a   Stephen Hemminger   [PKT_SCHED] SFQ: ...
730

bd16a6cce   Eric Dumazet   net_sched: sfq: f...
731
  	q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
18cb80985   Eric Dumazet   net_sched: sfq: e...
732
733
  	q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows);
  	if (!q->ht || !q->slots) {
87b60cfac   Eric Dumazet   net_sched: fix er...
734
  		/* Note: sfq_destroy() will be called by our caller */
817fb15df   Eric Dumazet   net_sched: sfq: a...
735
  		return -ENOMEM;
bd16a6cce   Eric Dumazet   net_sched: sfq: f...
736
  	}
87b60cfac   Eric Dumazet   net_sched: fix er...
737

817fb15df   Eric Dumazet   net_sched: sfq: a...
738
739
  	for (i = 0; i < q->divisor; i++)
  		q->ht[i] = SFQ_EMPTY_SLOT;
18cb80985   Eric Dumazet   net_sched: sfq: e...
740
  	for (i = 0; i < q->maxflows; i++) {
18c8d82ae   Eric Dumazet   sfq: fix slot_deq...
741
  		slot_queue_init(&q->slots[i]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
742
  		sfq_link(q, i);
18c8d82ae   Eric Dumazet   sfq: fix slot_deq...
743
  	}
23624935e   Eric Dumazet   net_sched: TCQ_F_...
744
745
746
747
  	if (q->limit >= 1)
  		sch->flags |= TCQ_F_CAN_BYPASS;
  	else
  		sch->flags &= ~TCQ_F_CAN_BYPASS;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
748
749
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
750
751
752
  static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
27a884dc3   Arnaldo Carvalho de Melo   [SK_BUFF]: Conver...
753
  	unsigned char *b = skb_tail_pointer(skb);
18cb80985   Eric Dumazet   net_sched: sfq: e...
754
  	struct tc_sfq_qopt_v1 opt;
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
755
  	struct red_parms *p = q->red_parms;
18cb80985   Eric Dumazet   net_sched: sfq: e...
756
757
758
759
760
761
762
763
764
  
  	memset(&opt, 0, sizeof(opt));
  	opt.v0.quantum	= q->quantum;
  	opt.v0.perturb_period = q->perturb_period / HZ;
  	opt.v0.limit	= q->limit;
  	opt.v0.divisor	= q->divisor;
  	opt.v0.flows	= q->maxflows;
  	opt.depth	= q->maxdepth;
  	opt.headdrop	= q->headdrop;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
765

ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
766
767
768
769
770
771
772
773
774
775
  	if (p) {
  		opt.qth_min	= p->qth_min >> p->Wlog;
  		opt.qth_max	= p->qth_max >> p->Wlog;
  		opt.Wlog	= p->Wlog;
  		opt.Plog	= p->Plog;
  		opt.Scell_log	= p->Scell_log;
  		opt.max_P	= p->max_P;
  	}
  	memcpy(&opt.stats, &q->stats, sizeof(opt.stats));
  	opt.flags	= q->flags;
1b34ec43c   David S. Miller   pkt_sched: Stop u...
776
777
  	if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
  		goto nla_put_failure;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
778
779
  
  	return skb->len;
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
780
  nla_put_failure:
dc5fc579b   Arnaldo Carvalho de Melo   [NETLINK]: Use nl...
781
  	nlmsg_trim(skb, b);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
782
783
  	return -1;
  }
41065fba8   Jarek Poplawski   pkt_sched: Fix sc...
784
785
786
787
  static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
  {
  	return NULL;
  }
143976ce9   WANG Cong   net_sched: remove...
788
  static unsigned long sfq_find(struct Qdisc *sch, u32 classid)
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
789
790
791
  {
  	return 0;
  }
eb4a5527b   Jarek Poplawski   pkt_sched: Fix sc...
792
793
794
795
796
  static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
  			      u32 classid)
  {
  	return 0;
  }
143976ce9   WANG Cong   net_sched: remove...
797
  static void sfq_unbind(struct Qdisc *q, unsigned long cl)
da7115d94   Jarek Poplawski   pkt_sched: sch_sf...
798
799
  {
  }
cbaacc4e8   Alexander Aring   net: sched: sch: ...
800
801
  static struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl,
  				       struct netlink_ext_ack *extack)
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
802
803
804
805
806
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
  
  	if (cl)
  		return NULL;
6529eaba3   Jiri Pirko   net: sched: intro...
807
  	return q->block;
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
808
  }
94de78d19   Patrick McHardy   [NET_SCHED]: sch_...
809
810
811
812
813
814
815
816
817
818
819
  static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
  			  struct sk_buff *skb, struct tcmsg *tcm)
  {
  	tcm->tcm_handle |= TC_H_MIN(cl);
  	return 0;
  }
  
  static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
  				struct gnet_dump *d)
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
ee09b3c1c   Eric Dumazet   sfq: fix sfq clas...
820
821
822
  	sfq_index idx = q->ht[cl - 1];
  	struct gnet_stats_queue qs = { 0 };
  	struct tc_sfq_xstats xstats = { 0 };
c42662632   Eric Dumazet   net_sched: sch_sf...
823

ee09b3c1c   Eric Dumazet   sfq: fix sfq clas...
824
825
  	if (idx != SFQ_EMPTY_SLOT) {
  		const struct sfq_slot *slot = &q->slots[idx];
94de78d19   Patrick McHardy   [NET_SCHED]: sch_...
826

eeaeb068f   Eric Dumazet   sch_sfq: allow bi...
827
  		xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
ee09b3c1c   Eric Dumazet   sfq: fix sfq clas...
828
  		qs.qlen = slot->qlen;
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
829
  		qs.backlog = slot->backlog;
ee09b3c1c   Eric Dumazet   sfq: fix sfq clas...
830
  	}
b0ab6f927   John Fastabend   net: sched: enabl...
831
  	if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
94de78d19   Patrick McHardy   [NET_SCHED]: sch_...
832
833
834
  		return -1;
  	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
  }
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
835
836
  static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
  {
94de78d19   Patrick McHardy   [NET_SCHED]: sch_...
837
838
839
840
841
  	struct sfq_sched_data *q = qdisc_priv(sch);
  	unsigned int i;
  
  	if (arg->stop)
  		return;
817fb15df   Eric Dumazet   net_sched: sfq: a...
842
  	for (i = 0; i < q->divisor; i++) {
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
843
  		if (q->ht[i] == SFQ_EMPTY_SLOT ||
94de78d19   Patrick McHardy   [NET_SCHED]: sch_...
844
845
846
847
848
849
850
851
852
853
  		    arg->count < arg->skip) {
  			arg->count++;
  			continue;
  		}
  		if (arg->fn(sch, i + 1, arg) < 0) {
  			arg->stop = 1;
  			break;
  		}
  		arg->count++;
  	}
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
854
855
856
  }
  
  static const struct Qdisc_class_ops sfq_class_ops = {
41065fba8   Jarek Poplawski   pkt_sched: Fix sc...
857
  	.leaf		=	sfq_leaf,
143976ce9   WANG Cong   net_sched: remove...
858
  	.find		=	sfq_find,
6529eaba3   Jiri Pirko   net: sched: intro...
859
  	.tcf_block	=	sfq_tcf_block,
eb4a5527b   Jarek Poplawski   pkt_sched: Fix sc...
860
  	.bind_tcf	=	sfq_bind,
143976ce9   WANG Cong   net_sched: remove...
861
  	.unbind_tcf	=	sfq_unbind,
94de78d19   Patrick McHardy   [NET_SCHED]: sch_...
862
863
  	.dump		=	sfq_dump_class,
  	.dump_stats	=	sfq_dump_class_stats,
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
864
865
  	.walk		=	sfq_walk,
  };
20fea08b5   Eric Dumazet   [NET]: Move Qdisc...
866
  static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
867
  	.cl_ops		=	&sfq_class_ops,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
868
869
870
871
  	.id		=	"sfq",
  	.priv_size	=	sizeof(struct sfq_sched_data),
  	.enqueue	=	sfq_enqueue,
  	.dequeue	=	sfq_dequeue,
07bd8df5d   Eric Dumazet   sch_sfq: fix peek...
872
  	.peek		=	qdisc_peek_dequeued,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
873
874
875
876
877
878
879
880
881
882
883
884
  	.init		=	sfq_init,
  	.reset		=	sfq_reset,
  	.destroy	=	sfq_destroy,
  	.change		=	NULL,
  	.dump		=	sfq_dump,
  	.owner		=	THIS_MODULE,
  };
  
  static int __init sfq_module_init(void)
  {
  	return register_qdisc(&sfq_qdisc_ops);
  }
10297b993   YOSHIFUJI Hideaki   [NET] SCHED: Fix ...
885
  static void __exit sfq_module_exit(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
886
887
888
889
890
891
  {
  	unregister_qdisc(&sfq_qdisc_ops);
  }
  module_init(sfq_module_init)
  module_exit(sfq_module_exit)
  MODULE_LICENSE("GPL");