Blame view

net/sched/sch_sfq.c 21.9 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
  /*
   * net/sched/sch_sfq.c	Stochastic Fairness Queueing discipline.
   *
   *		This program is free software; you can redistribute it and/or
   *		modify it under the terms of the GNU General Public License
   *		as published by the Free Software Foundation; either version
   *		2 of the License, or (at your option) any later version.
   *
   * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
11
  #include <linux/module.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
14
15
  #include <linux/types.h>
  #include <linux/kernel.h>
  #include <linux/jiffies.h>
  #include <linux/string.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16
17
  #include <linux/in.h>
  #include <linux/errno.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
  #include <linux/init.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19
  #include <linux/skbuff.h>
32740ddc1   Alexey Kuznetsov   [SFQ]: Remove art...
20
  #include <linux/jhash.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
21
  #include <linux/slab.h>
817fb15df   Eric Dumazet   net_sched: sfq: a...
22
  #include <linux/vmalloc.h>
0ba480538   Patrick McHardy   [NET_SCHED]: Remo...
23
  #include <net/netlink.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
24
  #include <net/pkt_sched.h>
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
25
  #include <net/red.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
  
  
  /*	Stochastic Fairness Queuing algorithm.
  	=======================================
  
  	Source:
  	Paul E. McKenney "Stochastic Fairness Queuing",
  	IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
  
  	Paul E. McKenney "Stochastic Fairness Queuing",
  	"Interworking: Research and Experience", v.2, 1991, p.113-131.
  
  
  	See also:
  	M. Shreedhar and George Varghese "Efficient Fair
  	Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
10297b993   YOSHIFUJI Hideaki   [NET] SCHED: Fix ...
42
  	This is not the thing that is usually called (W)FQ nowadays.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
43
44
45
46
47
48
49
50
  	It does not use any timestamp mechanism, but instead
  	processes queues in round-robin order.
  
  	ADVANTAGE:
  
  	- It is very cheap. Both CPU and memory requirements are minimal.
  
  	DRAWBACKS:
10297b993   YOSHIFUJI Hideaki   [NET] SCHED: Fix ...
51
  	- "Stochastic" -> It is not 100% fair.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
52
53
54
55
56
57
58
59
60
61
62
63
64
  	When hash collisions occur, several flows are considered as one.
  
  	- "Round-robin" -> It introduces larger delays than virtual clock
  	based schemes, and should not be used for isolating interactive
  	traffic	from non-interactive. It means, that this scheduler
  	should be used as leaf of CBQ or P3, which put interactive traffic
  	to higher priority band.
  
  	We still need true WFQ for top level CSZ, but using WFQ
  	for the best effort traffic is absolutely pointless:
  	SFQ is superior for this purpose.
  
  	IMPLEMENTATION:
18cb80985   Eric Dumazet   net_sched: sfq: e...
65
66
67
68
69
  	This implementation limits :
  	- maximal queue length per flow to 127 packets.
  	- max mtu to 2^18-1;
  	- max 65408 flows,
  	- number of hash buckets to 65536.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
70
71
  
  	It is easy to increase these values, but not in flight.  */
18cb80985   Eric Dumazet   net_sched: sfq: e...
72
73
74
75
  #define SFQ_MAX_DEPTH		127 /* max number of packets per flow */
  #define SFQ_DEFAULT_FLOWS	128
  #define SFQ_MAX_FLOWS		(0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */
  #define SFQ_EMPTY_SLOT		0xffff
817fb15df   Eric Dumazet   net_sched: sfq: a...
76
  #define SFQ_DEFAULT_HASH_DIVISOR 1024
eeaeb068f   Eric Dumazet   sch_sfq: allow bi...
77
78
79
80
81
  /* We use 16 bits to store allot, and want to handle packets up to 64K
   * Scale allot by 8 (1<<3) so that no overflow occurs.
   */
  #define SFQ_ALLOT_SHIFT		3
  #define SFQ_ALLOT_SIZE(X)	DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
82

18cb80985   Eric Dumazet   net_sched: sfq: e...
83
84
  /* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
  typedef u16 sfq_index;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
85

eda83e3b6   Eric Dumazet   net_sched: sch_sf...
86
87
  /*
   * We dont use pointers to save space.
18cb80985   Eric Dumazet   net_sched: sfq: e...
88
89
   * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array
   * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH]
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
90
91
   * are 'pointers' to dep[] array
   */
cc7ec456f   Eric Dumazet   net_sched: cleanups
92
  struct sfq_head {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
93
94
95
  	sfq_index	next;
  	sfq_index	prev;
  };
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
96
97
98
99
  struct sfq_slot {
  	struct sk_buff	*skblist_next;
  	struct sk_buff	*skblist_prev;
  	sfq_index	qlen; /* number of skbs in skblist */
18cb80985   Eric Dumazet   net_sched: sfq: e...
100
  	sfq_index	next; /* next slot in sfq RR chain */
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
101
102
103
  	struct sfq_head dep; /* anchor in dep[] chains */
  	unsigned short	hash; /* hash value (index in ht[]) */
  	short		allot; /* credit for this slot */
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
104
105
106
  
  	unsigned int    backlog;
  	struct red_vars vars;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
107
  };
cc7ec456f   Eric Dumazet   net_sched: cleanups
108
  struct sfq_sched_data {
18cb80985   Eric Dumazet   net_sched: sfq: e...
109
110
  /* frequently used fields */
  	int		limit;		/* limit of total number of packets in this qdisc */
817fb15df   Eric Dumazet   net_sched: sfq: a...
111
  	unsigned int	divisor;	/* number of slots in hash table */
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
112
113
  	u8		headdrop;
  	u8		maxdepth;	/* limit of packets per flow */
18cb80985   Eric Dumazet   net_sched: sfq: e...
114

32740ddc1   Alexey Kuznetsov   [SFQ]: Remove art...
115
  	u32		perturbation;
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
116
117
  	u8		cur_depth;	/* depth of longest slot */
  	u8		flags;
eeaeb068f   Eric Dumazet   sch_sfq: allow bi...
118
  	unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
25d8c0d55   John Fastabend   net: rcu-ify tcf_...
119
  	struct tcf_proto __rcu *filter_list;
18cb80985   Eric Dumazet   net_sched: sfq: e...
120
121
  	sfq_index	*ht;		/* Hash table ('divisor' slots) */
  	struct sfq_slot	*slots;		/* Flows table ('maxflows' entries) */
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
122
123
124
  	struct red_parms *red_parms;
  	struct tc_sfqred_stats stats;
  	struct sfq_slot *tail;		/* current slot in round */
18cb80985   Eric Dumazet   net_sched: sfq: e...
125
126
127
128
129
130
  	struct sfq_head	dep[SFQ_MAX_DEPTH + 1];
  					/* Linked lists of slots, indexed by depth
  					 * dep[0] : list of unused flows
  					 * dep[1] : list of flows with 1 packet
  					 * dep[X] : list of flows with X packets
  					 */
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
131
  	unsigned int	maxflows;	/* number of flows in flows array */
18cb80985   Eric Dumazet   net_sched: sfq: e...
132
133
134
  	int		perturb_period;
  	unsigned int	quantum;	/* Allotment per round: MUST BE >= MTU */
  	struct timer_list perturb_timer;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
135
  };
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
136
137
138
139
140
  /*
   * sfq_head are either in a sfq_slot or in dep[] array
   */
  static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
  {
18cb80985   Eric Dumazet   net_sched: sfq: e...
141
  	if (val < SFQ_MAX_FLOWS)
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
142
  		return &q->slots[val].dep;
18cb80985   Eric Dumazet   net_sched: sfq: e...
143
  	return &q->dep[val - SFQ_MAX_FLOWS];
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
144
  }
11fca931d   Eric Dumazet   sch_sfq: use skb_...
145
146
  static unsigned int sfq_hash(const struct sfq_sched_data *q,
  			     const struct sk_buff *skb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
147
  {
ada1dba04   Tom Herbert   sched: Call skb_g...
148
  	return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
149
  }
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
150
151
152
153
154
  static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
  				 int *qerr)
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
  	struct tcf_result res;
25d8c0d55   John Fastabend   net: rcu-ify tcf_...
155
  	struct tcf_proto *fl;
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
156
157
158
159
  	int result;
  
  	if (TC_H_MAJ(skb->priority) == sch->handle &&
  	    TC_H_MIN(skb->priority) > 0 &&
817fb15df   Eric Dumazet   net_sched: sfq: a...
160
  	    TC_H_MIN(skb->priority) <= q->divisor)
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
161
  		return TC_H_MIN(skb->priority);
25d8c0d55   John Fastabend   net: rcu-ify tcf_...
162
  	fl = rcu_dereference_bh(q->filter_list);
ada1dba04   Tom Herbert   sched: Call skb_g...
163
  	if (!fl)
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
164
  		return sfq_hash(q, skb) + 1;
c27f339af   Jarek Poplawski   net_sched: Add qd...
165
  	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
3b3ae8802   Daniel Borkmann   net: sched: conso...
166
  	result = tc_classify(skb, fl, &res, false);
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
167
168
169
170
171
  	if (result >= 0) {
  #ifdef CONFIG_NET_CLS_ACT
  		switch (result) {
  		case TC_ACT_STOLEN:
  		case TC_ACT_QUEUED:
378a2f090   Jarek Poplawski   net_sched: Add qd...
172
  			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
173
174
175
176
  		case TC_ACT_SHOT:
  			return 0;
  		}
  #endif
817fb15df   Eric Dumazet   net_sched: sfq: a...
177
  		if (TC_H_MIN(res.classid) <= q->divisor)
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
178
179
180
181
  			return TC_H_MIN(res.classid);
  	}
  	return 0;
  }
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
182
  /*
18cb80985   Eric Dumazet   net_sched: sfq: e...
183
   * x : slot number [0 .. SFQ_MAX_FLOWS - 1]
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
184
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
185
186
187
  static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
  {
  	sfq_index p, n;
18cb80985   Eric Dumazet   net_sched: sfq: e...
188
189
  	struct sfq_slot *slot = &q->slots[x];
  	int qlen = slot->qlen;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
190

18cb80985   Eric Dumazet   net_sched: sfq: e...
191
  	p = qlen + SFQ_MAX_FLOWS;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
192
  	n = q->dep[qlen].next;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
193

18cb80985   Eric Dumazet   net_sched: sfq: e...
194
195
  	slot->dep.next = n;
  	slot->dep.prev = p;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
196
197
198
  
  	q->dep[qlen].next = x;		/* sfq_dep_head(q, p)->next = x */
  	sfq_dep_head(q, n)->prev = x;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
199
  }
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
200
  #define sfq_unlink(q, x, n, p)			\
fa08943b9   Yang Yingliang   net_sched: sfq: p...
201
202
203
204
205
206
  	do {					\
  		n = q->slots[x].dep.next;	\
  		p = q->slots[x].dep.prev;	\
  		sfq_dep_head(q, p)->next = n;	\
  		sfq_dep_head(q, n)->prev = p;	\
  	} while (0)
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
207

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
208
209
210
  static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
  {
  	sfq_index p, n;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
211
  	int d;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
212

eda83e3b6   Eric Dumazet   net_sched: sch_sf...
213
  	sfq_unlink(q, x, n, p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
214

eda83e3b6   Eric Dumazet   net_sched: sch_sf...
215
216
217
  	d = q->slots[x].qlen--;
  	if (n == p && q->cur_depth == d)
  		q->cur_depth--;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
218
219
220
221
222
223
224
  	sfq_link(q, x);
  }
  
  static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
  {
  	sfq_index p, n;
  	int d;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
225
  	sfq_unlink(q, x, n, p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
226

eda83e3b6   Eric Dumazet   net_sched: sch_sf...
227
228
229
  	d = ++q->slots[x].qlen;
  	if (q->cur_depth < d)
  		q->cur_depth = d;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
230
231
  	sfq_link(q, x);
  }
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
232
233
234
235
236
237
238
239
  /* helper functions : might be changed when/if skb use a standard list_head */
  
  /* remove one skb from tail of slot queue */
  static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
  {
  	struct sk_buff *skb = slot->skblist_prev;
  
  	slot->skblist_prev = skb->prev;
ee09b3c1c   Eric Dumazet   sfq: fix sfq clas...
240
  	skb->prev->next = (struct sk_buff *)slot;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
241
242
243
244
245
246
247
248
249
250
  	skb->next = skb->prev = NULL;
  	return skb;
  }
  
  /* remove one skb from head of slot queue */
  static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
  {
  	struct sk_buff *skb = slot->skblist_next;
  
  	slot->skblist_next = skb->next;
18c8d82ae   Eric Dumazet   sfq: fix slot_deq...
251
  	skb->next->prev = (struct sk_buff *)slot;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
252
253
254
255
256
257
  	skb->next = skb->prev = NULL;
  	return skb;
  }
  
  static inline void slot_queue_init(struct sfq_slot *slot)
  {
18cb80985   Eric Dumazet   net_sched: sfq: e...
258
  	memset(slot, 0, sizeof(*slot));
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
259
260
261
262
263
264
265
266
267
268
269
  	slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
  }
  
  /* add skb to slot queue (tail add) */
  static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
  {
  	skb->prev = slot->skblist_prev;
  	skb->next = (struct sk_buff *)slot;
  	slot->skblist_prev->next = skb;
  	slot->skblist_prev = skb;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
270
271
272
  static unsigned int sfq_drop(struct Qdisc *sch)
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
273
  	sfq_index x, d = q->cur_depth;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
274
275
  	struct sk_buff *skb;
  	unsigned int len;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
276
  	struct sfq_slot *slot;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
277

eda83e3b6   Eric Dumazet   net_sched: sch_sf...
278
  	/* Queue is full! Find the longest slot and drop tail packet from it */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
279
  	if (d > 1) {
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
280
281
282
  		x = q->dep[d].next;
  		slot = &q->slots[x];
  drop:
18cb80985   Eric Dumazet   net_sched: sfq: e...
283
  		skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot);
0abf77e55   Jussi Kivilinna   net_sched: Add ac...
284
  		len = qdisc_pkt_len(skb);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
285
  		slot->backlog -= len;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
286
287
  		sfq_dec(q, x);
  		sch->q.qlen--;
25331d6ce   John Fastabend   net: sched: imple...
288
289
  		qdisc_qstats_drop(sch);
  		qdisc_qstats_backlog_dec(sch, skb);
e8d092aaf   WANG Cong   net_sched: fix a ...
290
  		kfree_skb(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
291
292
293
294
295
  		return len;
  	}
  
  	if (d == 1) {
  		/* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
296
297
298
299
300
  		x = q->tail->next;
  		slot = &q->slots[x];
  		q->tail->next = slot->next;
  		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
  		goto drop;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
301
302
303
304
  	}
  
  	return 0;
  }
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
  /* Is ECN parameter configured */
  static int sfq_prob_mark(const struct sfq_sched_data *q)
  {
  	return q->flags & TC_RED_ECN;
  }
  
  /* Should packets over max threshold just be marked */
  static int sfq_hard_mark(const struct sfq_sched_data *q)
  {
  	return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN;
  }
  
  static int sfq_headdrop(const struct sfq_sched_data *q)
  {
  	return q->headdrop;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
321
  static int
520ac30f4   Eric Dumazet   net_sched: drop p...
322
  sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
323
324
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
2ccccf5fb   WANG Cong   net_sched: update...
325
  	unsigned int hash, dropped;
8efa88540   Eric Dumazet   sch_sfq: avoid gi...
326
  	sfq_index x, qlen;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
327
  	struct sfq_slot *slot;
7f3ff4f63   Jarek Poplawski   pkt_sched: Annota...
328
  	int uninitialized_var(ret);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
329
330
  	struct sk_buff *head;
  	int delta;
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
331
332
333
  
  	hash = sfq_classify(skb, sch, &ret);
  	if (hash == 0) {
c27f339af   Jarek Poplawski   net_sched: Add qd...
334
  		if (ret & __NET_XMIT_BYPASS)
25331d6ce   John Fastabend   net: sched: imple...
335
  			qdisc_qstats_drop(sch);
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
336
337
338
339
  		kfree_skb(skb);
  		return ret;
  	}
  	hash--;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
340
341
  
  	x = q->ht[hash];
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
342
343
344
  	slot = &q->slots[x];
  	if (x == SFQ_EMPTY_SLOT) {
  		x = q->dep[0].next; /* get a free slot */
18cb80985   Eric Dumazet   net_sched: sfq: e...
345
  		if (x >= SFQ_MAX_FLOWS)
520ac30f4   Eric Dumazet   net_sched: drop p...
346
  			return qdisc_drop(skb, sch, to_free);
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
347
348
349
  		q->ht[hash] = x;
  		slot = &q->slots[x];
  		slot->hash = hash;
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
  		slot->backlog = 0; /* should already be 0 anyway... */
  		red_set_vars(&slot->vars);
  		goto enqueue;
  	}
  	if (q->red_parms) {
  		slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms,
  							&slot->vars,
  							slot->backlog);
  		switch (red_action(q->red_parms,
  				   &slot->vars,
  				   slot->vars.qavg)) {
  		case RED_DONT_MARK:
  			break;
  
  		case RED_PROB_MARK:
25331d6ce   John Fastabend   net: sched: imple...
365
  			qdisc_qstats_overlimit(sch);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
  			if (sfq_prob_mark(q)) {
  				/* We know we have at least one packet in queue */
  				if (sfq_headdrop(q) &&
  				    INET_ECN_set_ce(slot->skblist_next)) {
  					q->stats.prob_mark_head++;
  					break;
  				}
  				if (INET_ECN_set_ce(skb)) {
  					q->stats.prob_mark++;
  					break;
  				}
  			}
  			q->stats.prob_drop++;
  			goto congestion_drop;
  
  		case RED_HARD_MARK:
25331d6ce   John Fastabend   net: sched: imple...
382
  			qdisc_qstats_overlimit(sch);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
  			if (sfq_hard_mark(q)) {
  				/* We know we have at least one packet in queue */
  				if (sfq_headdrop(q) &&
  				    INET_ECN_set_ce(slot->skblist_next)) {
  					q->stats.forced_mark_head++;
  					break;
  				}
  				if (INET_ECN_set_ce(skb)) {
  					q->stats.forced_mark++;
  					break;
  				}
  			}
  			q->stats.forced_drop++;
  			goto congestion_drop;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
398
  	}
6f9e98f7a   Stephen Hemminger   [PKT_SCHED] SFQ: ...
399

18cb80985   Eric Dumazet   net_sched: sfq: e...
400
  	if (slot->qlen >= q->maxdepth) {
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
401
402
  congestion_drop:
  		if (!sfq_headdrop(q))
520ac30f4   Eric Dumazet   net_sched: drop p...
403
  			return qdisc_drop(skb, sch, to_free);
18cb80985   Eric Dumazet   net_sched: sfq: e...
404

ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
405
  		/* We know we have at least one packet in queue */
18cb80985   Eric Dumazet   net_sched: sfq: e...
406
  		head = slot_dequeue_head(slot);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
407
408
409
  		delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb);
  		sch->qstats.backlog -= delta;
  		slot->backlog -= delta;
520ac30f4   Eric Dumazet   net_sched: drop p...
410
  		qdisc_drop(head, sch, to_free);
18cb80985   Eric Dumazet   net_sched: sfq: e...
411

18cb80985   Eric Dumazet   net_sched: sfq: e...
412
413
414
  		slot_queue_add(slot, skb);
  		return NET_XMIT_CN;
  	}
32740ddc1   Alexey Kuznetsov   [SFQ]: Remove art...
415

ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
416
  enqueue:
25331d6ce   John Fastabend   net: sched: imple...
417
  	qdisc_qstats_backlog_inc(sch, skb);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
418
  	slot->backlog += qdisc_pkt_len(skb);
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
419
  	slot_queue_add(slot, skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
420
  	sfq_inc(q, x);
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
421
422
423
  	if (slot->qlen == 1) {		/* The flow is new */
  		if (q->tail == NULL) {	/* It is the first flow */
  			slot->next = x;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
424
  		} else {
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
425
426
  			slot->next = q->tail->next;
  			q->tail->next = x;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
427
  		}
cc34eb672   Eric Dumazet   sch_sfq: revert d...
428
429
430
431
432
  		/* We put this flow at the end of our flow list.
  		 * This might sound unfair for a new flow to wait after old ones,
  		 * but we could endup servicing new flows only, and freeze old ones.
  		 */
  		q->tail = slot;
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
433
  		/* We could use a bigger initial quantum for new flows */
eeaeb068f   Eric Dumazet   sch_sfq: allow bi...
434
  		slot->allot = q->scaled_quantum;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
435
  	}
9190b3b32   Eric Dumazet   net_sched: accura...
436
  	if (++sch->q.qlen <= q->limit)
9871e50ed   Ben Greear   net: Use NET_XMIT...
437
  		return NET_XMIT_SUCCESS;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
438

8efa88540   Eric Dumazet   sch_sfq: avoid gi...
439
  	qlen = slot->qlen;
2ccccf5fb   WANG Cong   net_sched: update...
440
  	dropped = sfq_drop(sch);
8efa88540   Eric Dumazet   sch_sfq: avoid gi...
441
442
443
  	/* Return Congestion Notification only if we dropped a packet
  	 * from this flow.
  	 */
e1738bd9c   Eric Dumazet   sch_sfq: fix sfq_...
444
445
446
447
  	if (qlen != slot->qlen)
  		return NET_XMIT_CN;
  
  	/* As we dropped a packet, better let upper stack know this */
2ccccf5fb   WANG Cong   net_sched: update...
448
  	qdisc_tree_reduce_backlog(sch, 1, dropped);
e1738bd9c   Eric Dumazet   sch_sfq: fix sfq_...
449
  	return NET_XMIT_SUCCESS;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
450
  }
48a8f519e   Patrick McHardy   pkt_sched: Add ->...
451
  static struct sk_buff *
6f9e98f7a   Stephen Hemminger   [PKT_SCHED] SFQ: ...
452
  sfq_dequeue(struct Qdisc *sch)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
453
454
455
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
  	struct sk_buff *skb;
aa3e21999   Eric Dumazet   net_sched: sch_sf...
456
  	sfq_index a, next_a;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
457
  	struct sfq_slot *slot;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
458
459
  
  	/* No active slots */
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
460
  	if (q->tail == NULL)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
461
  		return NULL;
eeaeb068f   Eric Dumazet   sch_sfq: allow bi...
462
  next_slot:
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
463
464
  	a = q->tail->next;
  	slot = &q->slots[a];
eeaeb068f   Eric Dumazet   sch_sfq: allow bi...
465
466
467
468
469
  	if (slot->allot <= 0) {
  		q->tail = slot;
  		slot->allot += q->scaled_quantum;
  		goto next_slot;
  	}
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
470
  	skb = slot_dequeue_head(slot);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
471
  	sfq_dec(q, a);
9190b3b32   Eric Dumazet   net_sched: accura...
472
  	qdisc_bstats_update(sch, skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
473
  	sch->q.qlen--;
25331d6ce   John Fastabend   net: sched: imple...
474
  	qdisc_qstats_backlog_dec(sch, skb);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
475
  	slot->backlog -= qdisc_pkt_len(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
476
  	/* Is the slot empty? */
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
477
478
479
  	if (slot->qlen == 0) {
  		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
  		next_a = slot->next;
aa3e21999   Eric Dumazet   net_sched: sch_sf...
480
  		if (a == next_a) {
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
481
  			q->tail = NULL; /* no more active slots */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
482
483
  			return skb;
  		}
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
484
  		q->tail->next = next_a;
eeaeb068f   Eric Dumazet   sch_sfq: allow bi...
485
486
  	} else {
  		slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
487
488
489
490
491
  	}
  	return skb;
  }
  
  static void
6f9e98f7a   Stephen Hemminger   [PKT_SCHED] SFQ: ...
492
  sfq_reset(struct Qdisc *sch)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
493
494
495
496
  {
  	struct sk_buff *skb;
  
  	while ((skb = sfq_dequeue(sch)) != NULL)
fea024784   Eric Dumazet   net_sched: sch_fq...
497
  		rtnl_kfree_skbs(skb, skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
498
  }
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
499
500
501
502
503
504
  /*
   * When q->perturbation is changed, we rehash all queued skbs
   * to avoid OOO (Out Of Order) effects.
   * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
   * counters.
   */
18cb80985   Eric Dumazet   net_sched: sfq: e...
505
  static void sfq_rehash(struct Qdisc *sch)
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
506
  {
18cb80985   Eric Dumazet   net_sched: sfq: e...
507
  	struct sfq_sched_data *q = qdisc_priv(sch);
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
508
509
510
511
  	struct sk_buff *skb;
  	int i;
  	struct sfq_slot *slot;
  	struct sk_buff_head list;
18cb80985   Eric Dumazet   net_sched: sfq: e...
512
  	int dropped = 0;
2ccccf5fb   WANG Cong   net_sched: update...
513
  	unsigned int drop_len = 0;
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
514
515
  
  	__skb_queue_head_init(&list);
18cb80985   Eric Dumazet   net_sched: sfq: e...
516
  	for (i = 0; i < q->maxflows; i++) {
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
517
518
519
520
521
522
523
524
  		slot = &q->slots[i];
  		if (!slot->qlen)
  			continue;
  		while (slot->qlen) {
  			skb = slot_dequeue_head(slot);
  			sfq_dec(q, i);
  			__skb_queue_tail(&list, skb);
  		}
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
525
526
  		slot->backlog = 0;
  		red_set_vars(&slot->vars);
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
527
528
529
530
531
532
533
534
535
536
537
  		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
  	}
  	q->tail = NULL;
  
  	while ((skb = __skb_dequeue(&list)) != NULL) {
  		unsigned int hash = sfq_hash(q, skb);
  		sfq_index x = q->ht[hash];
  
  		slot = &q->slots[x];
  		if (x == SFQ_EMPTY_SLOT) {
  			x = q->dep[0].next; /* get a free slot */
18cb80985   Eric Dumazet   net_sched: sfq: e...
538
  			if (x >= SFQ_MAX_FLOWS) {
25331d6ce   John Fastabend   net: sched: imple...
539
540
  drop:
  				qdisc_qstats_backlog_dec(sch, skb);
2ccccf5fb   WANG Cong   net_sched: update...
541
  				drop_len += qdisc_pkt_len(skb);
18cb80985   Eric Dumazet   net_sched: sfq: e...
542
543
544
545
  				kfree_skb(skb);
  				dropped++;
  				continue;
  			}
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
546
547
548
549
  			q->ht[hash] = x;
  			slot = &q->slots[x];
  			slot->hash = hash;
  		}
18cb80985   Eric Dumazet   net_sched: sfq: e...
550
551
  		if (slot->qlen >= q->maxdepth)
  			goto drop;
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
552
  		slot_queue_add(slot, skb);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
553
554
555
556
557
  		if (q->red_parms)
  			slot->vars.qavg = red_calc_qavg(q->red_parms,
  							&slot->vars,
  							slot->backlog);
  		slot->backlog += qdisc_pkt_len(skb);
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
558
559
560
561
562
563
564
565
566
567
568
569
  		sfq_inc(q, x);
  		if (slot->qlen == 1) {		/* The flow is new */
  			if (q->tail == NULL) {	/* It is the first flow */
  				slot->next = x;
  			} else {
  				slot->next = q->tail->next;
  				q->tail->next = x;
  			}
  			q->tail = slot;
  			slot->allot = q->scaled_quantum;
  		}
  	}
18cb80985   Eric Dumazet   net_sched: sfq: e...
570
  	sch->q.qlen -= dropped;
2ccccf5fb   WANG Cong   net_sched: update...
571
  	qdisc_tree_reduce_backlog(sch, dropped, drop_len);
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
572
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
573
574
  static void sfq_perturbation(unsigned long arg)
  {
6f9e98f7a   Stephen Hemminger   [PKT_SCHED] SFQ: ...
575
  	struct Qdisc *sch = (struct Qdisc *)arg;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
576
  	struct sfq_sched_data *q = qdisc_priv(sch);
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
577
  	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
578

225d9b89c   Eric Dumazet   sch_sfq: rehash q...
579
  	spin_lock(root_lock);
63862b5be   Aruna-Hewapathirane   net: replace macr...
580
  	q->perturbation = prandom_u32();
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
581
  	if (!q->filter_list && q->tail)
18cb80985   Eric Dumazet   net_sched: sfq: e...
582
  		sfq_rehash(sch);
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
583
  	spin_unlock(root_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
584

32740ddc1   Alexey Kuznetsov   [SFQ]: Remove art...
585
586
  	if (q->perturb_period)
  		mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
587
  }
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
588
  static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
589
590
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
591
  	struct tc_sfq_qopt *ctl = nla_data(opt);
18cb80985   Eric Dumazet   net_sched: sfq: e...
592
  	struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
2ccccf5fb   WANG Cong   net_sched: update...
593
  	unsigned int qlen, dropped = 0;
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
594
  	struct red_parms *p = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
595

1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
596
  	if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
597
  		return -EINVAL;
18cb80985   Eric Dumazet   net_sched: sfq: e...
598
599
  	if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
  		ctl_v1 = nla_data(opt);
119b3d386   stephen hemminger   sfq: deadlock in ...
600
601
602
  	if (ctl->divisor &&
  	    (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
  		return -EINVAL;
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
603
604
605
606
607
  	if (ctl_v1 && ctl_v1->qth_min) {
  		p = kmalloc(sizeof(*p), GFP_KERNEL);
  		if (!p)
  			return -ENOMEM;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
608
  	sch_tree_lock(sch);
18cb80985   Eric Dumazet   net_sched: sfq: e...
609
610
611
612
  	if (ctl->quantum) {
  		q->quantum = ctl->quantum;
  		q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
  	}
6f9e98f7a   Stephen Hemminger   [PKT_SCHED] SFQ: ...
613
  	q->perturb_period = ctl->perturb_period * HZ;
18cb80985   Eric Dumazet   net_sched: sfq: e...
614
615
616
  	if (ctl->flows)
  		q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
  	if (ctl->divisor) {
817fb15df   Eric Dumazet   net_sched: sfq: a...
617
  		q->divisor = ctl->divisor;
18cb80985   Eric Dumazet   net_sched: sfq: e...
618
619
620
621
622
  		q->maxflows = min_t(u32, q->maxflows, q->divisor);
  	}
  	if (ctl_v1) {
  		if (ctl_v1->depth)
  			q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
623
624
625
626
627
628
629
630
631
632
  		if (p) {
  			swap(q->red_parms, p);
  			red_set_parms(q->red_parms,
  				      ctl_v1->qth_min, ctl_v1->qth_max,
  				      ctl_v1->Wlog,
  				      ctl_v1->Plog, ctl_v1->Scell_log,
  				      NULL,
  				      ctl_v1->max_P);
  		}
  		q->flags = ctl_v1->flags;
18cb80985   Eric Dumazet   net_sched: sfq: e...
633
634
635
636
637
638
  		q->headdrop = ctl_v1->headdrop;
  	}
  	if (ctl->limit) {
  		q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
  		q->maxflows = min_t(u32, q->maxflows, q->limit);
  	}
5e50da01d   Patrick McHardy   [NET_SCHED]: Fix ...
639
  	qlen = sch->q.qlen;
5588b40d7   Alexey Kuznetsov   [PKT_SCHED]: Fix ...
640
  	while (sch->q.qlen > q->limit)
2ccccf5fb   WANG Cong   net_sched: update...
641
642
  		dropped += sfq_drop(sch);
  	qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
643
644
645
  
  	del_timer(&q->perturb_timer);
  	if (q->perturb_period) {
32740ddc1   Alexey Kuznetsov   [SFQ]: Remove art...
646
  		mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
63862b5be   Aruna-Hewapathirane   net: replace macr...
647
  		q->perturbation = prandom_u32();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
648
649
  	}
  	sch_tree_unlock(sch);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
650
  	kfree(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
651
652
  	return 0;
  }
bd16a6cce   Eric Dumazet   net_sched: sfq: f...
653
654
655
656
657
658
659
660
661
662
663
  static void *sfq_alloc(size_t sz)
  {
  	void *ptr = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN);
  
  	if (!ptr)
  		ptr = vmalloc(sz);
  	return ptr;
  }
  
  static void sfq_free(void *addr)
  {
4cb28970a   WANG Cong   net: use the new ...
664
  	kvfree(addr);
bd16a6cce   Eric Dumazet   net_sched: sfq: f...
665
666
667
668
669
670
671
672
673
674
  }
  
  static void sfq_destroy(struct Qdisc *sch)
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
  
  	tcf_destroy_chain(&q->filter_list);
  	q->perturb_period = 0;
  	del_timer_sync(&q->perturb_timer);
  	sfq_free(q->ht);
18cb80985   Eric Dumazet   net_sched: sfq: e...
675
  	sfq_free(q->slots);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
676
  	kfree(q->red_parms);
bd16a6cce   Eric Dumazet   net_sched: sfq: f...
677
  }
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
678
  static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
679
680
681
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
  	int i;
d3e994830   Stephen Hemminger   [PKT_SCHED] SFQ: ...
682
  	q->perturb_timer.function = sfq_perturbation;
c19a28e11   Fernando Carrijo   remove lots of do...
683
  	q->perturb_timer.data = (unsigned long)sch;
d3e994830   Stephen Hemminger   [PKT_SCHED] SFQ: ...
684
  	init_timer_deferrable(&q->perturb_timer);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
685

18cb80985   Eric Dumazet   net_sched: sfq: e...
686
687
688
  	for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
  		q->dep[i].next = i + SFQ_MAX_FLOWS;
  		q->dep[i].prev = i + SFQ_MAX_FLOWS;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
689
  	}
6f9e98f7a   Stephen Hemminger   [PKT_SCHED] SFQ: ...
690

18cb80985   Eric Dumazet   net_sched: sfq: e...
691
692
  	q->limit = SFQ_MAX_DEPTH;
  	q->maxdepth = SFQ_MAX_DEPTH;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
693
694
  	q->cur_depth = 0;
  	q->tail = NULL;
817fb15df   Eric Dumazet   net_sched: sfq: a...
695
  	q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
18cb80985   Eric Dumazet   net_sched: sfq: e...
696
  	q->maxflows = SFQ_DEFAULT_FLOWS;
02a9098ed   Eric Dumazet   net_sched: sfq: a...
697
698
699
  	q->quantum = psched_mtu(qdisc_dev(sch));
  	q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
  	q->perturb_period = 0;
63862b5be   Aruna-Hewapathirane   net: replace macr...
700
  	q->perturbation = prandom_u32();
02a9098ed   Eric Dumazet   net_sched: sfq: a...
701
702
  
  	if (opt) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
703
704
705
706
  		int err = sfq_change(sch, opt);
  		if (err)
  			return err;
  	}
6f9e98f7a   Stephen Hemminger   [PKT_SCHED] SFQ: ...
707

bd16a6cce   Eric Dumazet   net_sched: sfq: f...
708
  	q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
18cb80985   Eric Dumazet   net_sched: sfq: e...
709
710
  	q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows);
  	if (!q->ht || !q->slots) {
bd16a6cce   Eric Dumazet   net_sched: sfq: f...
711
  		sfq_destroy(sch);
817fb15df   Eric Dumazet   net_sched: sfq: a...
712
  		return -ENOMEM;
bd16a6cce   Eric Dumazet   net_sched: sfq: f...
713
  	}
817fb15df   Eric Dumazet   net_sched: sfq: a...
714
715
  	for (i = 0; i < q->divisor; i++)
  		q->ht[i] = SFQ_EMPTY_SLOT;
18cb80985   Eric Dumazet   net_sched: sfq: e...
716
  	for (i = 0; i < q->maxflows; i++) {
18c8d82ae   Eric Dumazet   sfq: fix slot_deq...
717
  		slot_queue_init(&q->slots[i]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
718
  		sfq_link(q, i);
18c8d82ae   Eric Dumazet   sfq: fix slot_deq...
719
  	}
23624935e   Eric Dumazet   net_sched: TCQ_F_...
720
721
722
723
  	if (q->limit >= 1)
  		sch->flags |= TCQ_F_CAN_BYPASS;
  	else
  		sch->flags &= ~TCQ_F_CAN_BYPASS;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
724
725
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
726
727
728
  static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
27a884dc3   Arnaldo Carvalho de Melo   [SK_BUFF]: Conver...
729
  	unsigned char *b = skb_tail_pointer(skb);
18cb80985   Eric Dumazet   net_sched: sfq: e...
730
  	struct tc_sfq_qopt_v1 opt;
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
731
  	struct red_parms *p = q->red_parms;
18cb80985   Eric Dumazet   net_sched: sfq: e...
732
733
734
735
736
737
738
739
740
  
  	memset(&opt, 0, sizeof(opt));
  	opt.v0.quantum	= q->quantum;
  	opt.v0.perturb_period = q->perturb_period / HZ;
  	opt.v0.limit	= q->limit;
  	opt.v0.divisor	= q->divisor;
  	opt.v0.flows	= q->maxflows;
  	opt.depth	= q->maxdepth;
  	opt.headdrop	= q->headdrop;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
741

ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
742
743
744
745
746
747
748
749
750
751
  	if (p) {
  		opt.qth_min	= p->qth_min >> p->Wlog;
  		opt.qth_max	= p->qth_max >> p->Wlog;
  		opt.Wlog	= p->Wlog;
  		opt.Plog	= p->Plog;
  		opt.Scell_log	= p->Scell_log;
  		opt.max_P	= p->max_P;
  	}
  	memcpy(&opt.stats, &q->stats, sizeof(opt.stats));
  	opt.flags	= q->flags;
1b34ec43c   David S. Miller   pkt_sched: Stop u...
752
753
  	if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
  		goto nla_put_failure;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
754
755
  
  	return skb->len;
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
756
  nla_put_failure:
dc5fc579b   Arnaldo Carvalho de Melo   [NETLINK]: Use nl...
757
  	nlmsg_trim(skb, b);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
758
759
  	return -1;
  }
41065fba8   Jarek Poplawski   pkt_sched: Fix sc...
760
761
762
763
  static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
  {
  	return NULL;
  }
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
764
765
766
767
  static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
  {
  	return 0;
  }
eb4a5527b   Jarek Poplawski   pkt_sched: Fix sc...
768
769
770
  static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
  			      u32 classid)
  {
23624935e   Eric Dumazet   net_sched: TCQ_F_...
771
772
  	/* we cannot bypass queue discipline anymore */
  	sch->flags &= ~TCQ_F_CAN_BYPASS;
eb4a5527b   Jarek Poplawski   pkt_sched: Fix sc...
773
774
  	return 0;
  }
da7115d94   Jarek Poplawski   pkt_sched: sch_sf...
775
776
777
  static void sfq_put(struct Qdisc *q, unsigned long cl)
  {
  }
25d8c0d55   John Fastabend   net: rcu-ify tcf_...
778
779
  static struct tcf_proto __rcu **sfq_find_tcf(struct Qdisc *sch,
  					     unsigned long cl)
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
780
781
782
783
784
785
786
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
  
  	if (cl)
  		return NULL;
  	return &q->filter_list;
  }
94de78d19   Patrick McHardy   [NET_SCHED]: sch_...
787
788
789
790
791
792
793
794
795
796
797
  static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
  			  struct sk_buff *skb, struct tcmsg *tcm)
  {
  	tcm->tcm_handle |= TC_H_MIN(cl);
  	return 0;
  }
  
  static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
  				struct gnet_dump *d)
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
ee09b3c1c   Eric Dumazet   sfq: fix sfq clas...
798
799
800
  	sfq_index idx = q->ht[cl - 1];
  	struct gnet_stats_queue qs = { 0 };
  	struct tc_sfq_xstats xstats = { 0 };
c42662632   Eric Dumazet   net_sched: sch_sf...
801

ee09b3c1c   Eric Dumazet   sfq: fix sfq clas...
802
803
  	if (idx != SFQ_EMPTY_SLOT) {
  		const struct sfq_slot *slot = &q->slots[idx];
94de78d19   Patrick McHardy   [NET_SCHED]: sch_...
804

eeaeb068f   Eric Dumazet   sch_sfq: allow bi...
805
  		xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
ee09b3c1c   Eric Dumazet   sfq: fix sfq clas...
806
  		qs.qlen = slot->qlen;
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
807
  		qs.backlog = slot->backlog;
ee09b3c1c   Eric Dumazet   sfq: fix sfq clas...
808
  	}
b0ab6f927   John Fastabend   net: sched: enabl...
809
  	if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
94de78d19   Patrick McHardy   [NET_SCHED]: sch_...
810
811
812
  		return -1;
  	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
  }
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
813
814
  static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
  {
94de78d19   Patrick McHardy   [NET_SCHED]: sch_...
815
816
817
818
819
  	struct sfq_sched_data *q = qdisc_priv(sch);
  	unsigned int i;
  
  	if (arg->stop)
  		return;
817fb15df   Eric Dumazet   net_sched: sfq: a...
820
  	for (i = 0; i < q->divisor; i++) {
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
821
  		if (q->ht[i] == SFQ_EMPTY_SLOT ||
94de78d19   Patrick McHardy   [NET_SCHED]: sch_...
822
823
824
825
826
827
828
829
830
831
  		    arg->count < arg->skip) {
  			arg->count++;
  			continue;
  		}
  		if (arg->fn(sch, i + 1, arg) < 0) {
  			arg->stop = 1;
  			break;
  		}
  		arg->count++;
  	}
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
832
833
834
  }
  
  static const struct Qdisc_class_ops sfq_class_ops = {
41065fba8   Jarek Poplawski   pkt_sched: Fix sc...
835
  	.leaf		=	sfq_leaf,
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
836
  	.get		=	sfq_get,
da7115d94   Jarek Poplawski   pkt_sched: sch_sf...
837
  	.put		=	sfq_put,
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
838
  	.tcf_chain	=	sfq_find_tcf,
eb4a5527b   Jarek Poplawski   pkt_sched: Fix sc...
839
  	.bind_tcf	=	sfq_bind,
da7115d94   Jarek Poplawski   pkt_sched: sch_sf...
840
  	.unbind_tcf	=	sfq_put,
94de78d19   Patrick McHardy   [NET_SCHED]: sch_...
841
842
  	.dump		=	sfq_dump_class,
  	.dump_stats	=	sfq_dump_class_stats,
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
843
844
  	.walk		=	sfq_walk,
  };
20fea08b5   Eric Dumazet   [NET]: Move Qdisc...
845
  static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
846
  	.cl_ops		=	&sfq_class_ops,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
847
848
849
850
  	.id		=	"sfq",
  	.priv_size	=	sizeof(struct sfq_sched_data),
  	.enqueue	=	sfq_enqueue,
  	.dequeue	=	sfq_dequeue,
07bd8df5d   Eric Dumazet   sch_sfq: fix peek...
851
  	.peek		=	qdisc_peek_dequeued,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
852
853
854
855
856
857
858
859
860
861
862
863
  	.init		=	sfq_init,
  	.reset		=	sfq_reset,
  	.destroy	=	sfq_destroy,
  	.change		=	NULL,
  	.dump		=	sfq_dump,
  	.owner		=	THIS_MODULE,
  };
  
  static int __init sfq_module_init(void)
  {
  	return register_qdisc(&sfq_qdisc_ops);
  }
10297b993   YOSHIFUJI Hideaki   [NET] SCHED: Fix ...
864
  static void __exit sfq_module_exit(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
865
866
867
868
869
870
  {
  	unregister_qdisc(&sfq_qdisc_ops);
  }
  module_init(sfq_module_init)
  module_exit(sfq_module_exit)
  MODULE_LICENSE("GPL");