Blame view

net/sched/sch_sfq.c 22.3 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
  /*
   * net/sched/sch_sfq.c	Stochastic Fairness Queueing discipline.
   *
   *		This program is free software; you can redistribute it and/or
   *		modify it under the terms of the GNU General Public License
   *		as published by the Free Software Foundation; either version
   *		2 of the License, or (at your option) any later version.
   *
   * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
11
  #include <linux/module.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
12
13
14
15
  #include <linux/types.h>
  #include <linux/kernel.h>
  #include <linux/jiffies.h>
  #include <linux/string.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
16
17
  #include <linux/in.h>
  #include <linux/errno.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
18
  #include <linux/init.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
19
  #include <linux/skbuff.h>
32740ddc1   Alexey Kuznetsov   [SFQ]: Remove art...
20
  #include <linux/jhash.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
21
  #include <linux/slab.h>
817fb15df   Eric Dumazet   net_sched: sfq: a...
22
  #include <linux/vmalloc.h>
0ba480538   Patrick McHardy   [NET_SCHED]: Remo...
23
  #include <net/netlink.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
24
  #include <net/pkt_sched.h>
11fca931d   Eric Dumazet   sch_sfq: use skb_...
25
  #include <net/flow_keys.h>
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
26
  #include <net/red.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
  
  
  /*	Stochastic Fairness Queuing algorithm.
  	=======================================
  
  	Source:
  	Paul E. McKenney "Stochastic Fairness Queuing",
  	IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
  
  	Paul E. McKenney "Stochastic Fairness Queuing",
  	"Interworking: Research and Experience", v.2, 1991, p.113-131.
  
  
  	See also:
  	M. Shreedhar and George Varghese "Efficient Fair
  	Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
10297b993   YOSHIFUJI Hideaki   [NET] SCHED: Fix ...
43
  	This is not the thing that is usually called (W)FQ nowadays.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
44
45
46
47
48
49
50
51
  	It does not use any timestamp mechanism, but instead
  	processes queues in round-robin order.
  
  	ADVANTAGE:
  
  	- It is very cheap. Both CPU and memory requirements are minimal.
  
  	DRAWBACKS:
10297b993   YOSHIFUJI Hideaki   [NET] SCHED: Fix ...
52
  	- "Stochastic" -> It is not 100% fair.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
53
54
55
56
57
58
59
60
61
62
63
64
65
  	When hash collisions occur, several flows are considered as one.
  
  	- "Round-robin" -> It introduces larger delays than virtual clock
  	based schemes, and should not be used for isolating interactive
  	traffic	from non-interactive. It means, that this scheduler
  	should be used as leaf of CBQ or P3, which put interactive traffic
  	to higher priority band.
  
  	We still need true WFQ for top level CSZ, but using WFQ
  	for the best effort traffic is absolutely pointless:
  	SFQ is superior for this purpose.
  
  	IMPLEMENTATION:
18cb80985   Eric Dumazet   net_sched: sfq: e...
66
67
68
69
70
  	This implementation limits :
  	- maximal queue length per flow to 127 packets.
  	- max mtu to 2^18-1;
  	- max 65408 flows,
  	- number of hash buckets to 65536.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
71
72
  
  	It is easy to increase these values, but not in flight.  */
18cb80985   Eric Dumazet   net_sched: sfq: e...
73
74
75
76
  #define SFQ_MAX_DEPTH		127 /* max number of packets per flow */
  #define SFQ_DEFAULT_FLOWS	128
  #define SFQ_MAX_FLOWS		(0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */
  #define SFQ_EMPTY_SLOT		0xffff
817fb15df   Eric Dumazet   net_sched: sfq: a...
77
  #define SFQ_DEFAULT_HASH_DIVISOR 1024
eeaeb068f   Eric Dumazet   sch_sfq: allow bi...
78
79
80
81
82
  /* We use 16 bits to store allot, and want to handle packets up to 64K
   * Scale allot by 8 (1<<3) so that no overflow occurs.
   */
  #define SFQ_ALLOT_SHIFT		3
  #define SFQ_ALLOT_SIZE(X)	DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
83

18cb80985   Eric Dumazet   net_sched: sfq: e...
84
85
  /* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
  typedef u16 sfq_index;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
86

eda83e3b6   Eric Dumazet   net_sched: sch_sf...
87
88
  /*
   * We dont use pointers to save space.
18cb80985   Eric Dumazet   net_sched: sfq: e...
89
90
   * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array
   * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH]
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
91
92
   * are 'pointers' to dep[] array
   */
cc7ec456f   Eric Dumazet   net_sched: cleanups
93
  struct sfq_head {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
94
95
96
  	sfq_index	next;
  	sfq_index	prev;
  };
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
97
98
99
100
  struct sfq_slot {
  	struct sk_buff	*skblist_next;
  	struct sk_buff	*skblist_prev;
  	sfq_index	qlen; /* number of skbs in skblist */
18cb80985   Eric Dumazet   net_sched: sfq: e...
101
  	sfq_index	next; /* next slot in sfq RR chain */
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
102
103
104
  	struct sfq_head dep; /* anchor in dep[] chains */
  	unsigned short	hash; /* hash value (index in ht[]) */
  	short		allot; /* credit for this slot */
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
105
106
107
  
  	unsigned int    backlog;
  	struct red_vars vars;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
108
  };
cc7ec456f   Eric Dumazet   net_sched: cleanups
109
  struct sfq_sched_data {
18cb80985   Eric Dumazet   net_sched: sfq: e...
110
111
  /* frequently used fields */
  	int		limit;		/* limit of total number of packets in this qdisc */
817fb15df   Eric Dumazet   net_sched: sfq: a...
112
  	unsigned int	divisor;	/* number of slots in hash table */
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
113
114
  	u8		headdrop;
  	u8		maxdepth;	/* limit of packets per flow */
18cb80985   Eric Dumazet   net_sched: sfq: e...
115

32740ddc1   Alexey Kuznetsov   [SFQ]: Remove art...
116
  	u32		perturbation;
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
117
118
  	u8		cur_depth;	/* depth of longest slot */
  	u8		flags;
eeaeb068f   Eric Dumazet   sch_sfq: allow bi...
119
  	unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
120
  	struct tcf_proto *filter_list;
18cb80985   Eric Dumazet   net_sched: sfq: e...
121
122
  	sfq_index	*ht;		/* Hash table ('divisor' slots) */
  	struct sfq_slot	*slots;		/* Flows table ('maxflows' entries) */
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
123
124
125
  	struct red_parms *red_parms;
  	struct tc_sfqred_stats stats;
  	struct sfq_slot *tail;		/* current slot in round */
18cb80985   Eric Dumazet   net_sched: sfq: e...
126
127
128
129
130
131
  	struct sfq_head	dep[SFQ_MAX_DEPTH + 1];
  					/* Linked lists of slots, indexed by depth
  					 * dep[0] : list of unused flows
  					 * dep[1] : list of flows with 1 packet
  					 * dep[X] : list of flows with X packets
  					 */
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
132
  	unsigned int	maxflows;	/* number of flows in flows array */
18cb80985   Eric Dumazet   net_sched: sfq: e...
133
134
135
  	int		perturb_period;
  	unsigned int	quantum;	/* Allotment per round: MUST BE >= MTU */
  	struct timer_list perturb_timer;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
136
  };
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
137
138
139
140
141
  /*
   * sfq_head are either in a sfq_slot or in dep[] array
   */
  static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
  {
18cb80985   Eric Dumazet   net_sched: sfq: e...
142
  	if (val < SFQ_MAX_FLOWS)
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
143
  		return &q->slots[val].dep;
18cb80985   Eric Dumazet   net_sched: sfq: e...
144
  	return &q->dep[val - SFQ_MAX_FLOWS];
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
145
  }
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
146
147
148
149
150
151
152
153
154
155
156
157
158
159
  /*
   * In order to be able to quickly rehash our queue when timer changes
   * q->perturbation, we store flow_keys in skb->cb[]
   */
  struct sfq_skb_cb {
         struct flow_keys        keys;
  };
  
  static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
  {
         BUILD_BUG_ON(sizeof(skb->cb) <
                 sizeof(struct qdisc_skb_cb) + sizeof(struct sfq_skb_cb));
         return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data;
  }
11fca931d   Eric Dumazet   sch_sfq: use skb_...
160
161
  static unsigned int sfq_hash(const struct sfq_sched_data *q,
  			     const struct sk_buff *skb)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
162
  {
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
163
  	const struct flow_keys *keys = &sfq_skb_cb(skb)->keys;
11fca931d   Eric Dumazet   sch_sfq: use skb_...
164
  	unsigned int hash;
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
165
166
167
  	hash = jhash_3words((__force u32)keys->dst,
  			    (__force u32)keys->src ^ keys->ip_proto,
  			    (__force u32)keys->ports, q->perturbation);
11fca931d   Eric Dumazet   sch_sfq: use skb_...
168
  	return hash & (q->divisor - 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
169
  }
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
170
171
172
173
174
175
176
177
178
  static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
  				 int *qerr)
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
  	struct tcf_result res;
  	int result;
  
  	if (TC_H_MAJ(skb->priority) == sch->handle &&
  	    TC_H_MIN(skb->priority) > 0 &&
817fb15df   Eric Dumazet   net_sched: sfq: a...
179
  	    TC_H_MIN(skb->priority) <= q->divisor)
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
180
  		return TC_H_MIN(skb->priority);
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
181
182
  	if (!q->filter_list) {
  		skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys);
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
183
  		return sfq_hash(q, skb) + 1;
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
184
  	}
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
185

c27f339af   Jarek Poplawski   net_sched: Add qd...
186
  	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
187
188
189
190
191
192
  	result = tc_classify(skb, q->filter_list, &res);
  	if (result >= 0) {
  #ifdef CONFIG_NET_CLS_ACT
  		switch (result) {
  		case TC_ACT_STOLEN:
  		case TC_ACT_QUEUED:
378a2f090   Jarek Poplawski   net_sched: Add qd...
193
  			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
194
195
196
197
  		case TC_ACT_SHOT:
  			return 0;
  		}
  #endif
817fb15df   Eric Dumazet   net_sched: sfq: a...
198
  		if (TC_H_MIN(res.classid) <= q->divisor)
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
199
200
201
202
  			return TC_H_MIN(res.classid);
  	}
  	return 0;
  }
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
203
  /*
18cb80985   Eric Dumazet   net_sched: sfq: e...
204
   * x : slot number [0 .. SFQ_MAX_FLOWS - 1]
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
205
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
206
207
208
  static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
  {
  	sfq_index p, n;
18cb80985   Eric Dumazet   net_sched: sfq: e...
209
210
  	struct sfq_slot *slot = &q->slots[x];
  	int qlen = slot->qlen;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
211

18cb80985   Eric Dumazet   net_sched: sfq: e...
212
  	p = qlen + SFQ_MAX_FLOWS;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
213
  	n = q->dep[qlen].next;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
214

18cb80985   Eric Dumazet   net_sched: sfq: e...
215
216
  	slot->dep.next = n;
  	slot->dep.prev = p;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
217
218
219
  
  	q->dep[qlen].next = x;		/* sfq_dep_head(q, p)->next = x */
  	sfq_dep_head(q, n)->prev = x;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
220
  }
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
221
222
223
224
225
  #define sfq_unlink(q, x, n, p)			\
  	n = q->slots[x].dep.next;		\
  	p = q->slots[x].dep.prev;		\
  	sfq_dep_head(q, p)->next = n;		\
  	sfq_dep_head(q, n)->prev = p
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
226
227
228
  static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
  {
  	sfq_index p, n;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
229
  	int d;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
230

eda83e3b6   Eric Dumazet   net_sched: sch_sf...
231
  	sfq_unlink(q, x, n, p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
232

eda83e3b6   Eric Dumazet   net_sched: sch_sf...
233
234
235
  	d = q->slots[x].qlen--;
  	if (n == p && q->cur_depth == d)
  		q->cur_depth--;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
236
237
238
239
240
241
242
  	sfq_link(q, x);
  }
  
  static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
  {
  	sfq_index p, n;
  	int d;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
243
  	sfq_unlink(q, x, n, p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
244

eda83e3b6   Eric Dumazet   net_sched: sch_sf...
245
246
247
  	d = ++q->slots[x].qlen;
  	if (q->cur_depth < d)
  		q->cur_depth = d;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
248
249
  	sfq_link(q, x);
  }
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
250
251
252
253
254
255
256
257
  /* helper functions : might be changed when/if skb use a standard list_head */
  
  /* remove one skb from tail of slot queue */
  static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
  {
  	struct sk_buff *skb = slot->skblist_prev;
  
  	slot->skblist_prev = skb->prev;
ee09b3c1c   Eric Dumazet   sfq: fix sfq clas...
258
  	skb->prev->next = (struct sk_buff *)slot;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
259
260
261
262
263
264
265
266
267
268
  	skb->next = skb->prev = NULL;
  	return skb;
  }
  
  /* remove one skb from head of slot queue */
  static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
  {
  	struct sk_buff *skb = slot->skblist_next;
  
  	slot->skblist_next = skb->next;
18c8d82ae   Eric Dumazet   sfq: fix slot_deq...
269
  	skb->next->prev = (struct sk_buff *)slot;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
270
271
272
273
274
275
  	skb->next = skb->prev = NULL;
  	return skb;
  }
  
  static inline void slot_queue_init(struct sfq_slot *slot)
  {
18cb80985   Eric Dumazet   net_sched: sfq: e...
276
  	memset(slot, 0, sizeof(*slot));
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
  	slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
  }
  
  /* add skb to slot queue (tail add) */
  static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
  {
  	skb->prev = slot->skblist_prev;
  	skb->next = (struct sk_buff *)slot;
  	slot->skblist_prev->next = skb;
  	slot->skblist_prev = skb;
  }
  
  #define	slot_queue_walk(slot, skb)		\
  	for (skb = slot->skblist_next;		\
  	     skb != (struct sk_buff *)slot;	\
  	     skb = skb->next)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
293
294
295
  static unsigned int sfq_drop(struct Qdisc *sch)
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
296
  	sfq_index x, d = q->cur_depth;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
297
298
  	struct sk_buff *skb;
  	unsigned int len;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
299
  	struct sfq_slot *slot;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
300

eda83e3b6   Eric Dumazet   net_sched: sch_sf...
301
  	/* Queue is full! Find the longest slot and drop tail packet from it */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
302
  	if (d > 1) {
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
303
304
305
  		x = q->dep[d].next;
  		slot = &q->slots[x];
  drop:
18cb80985   Eric Dumazet   net_sched: sfq: e...
306
  		skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot);
0abf77e55   Jussi Kivilinna   net_sched: Add ac...
307
  		len = qdisc_pkt_len(skb);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
308
  		slot->backlog -= len;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
309
  		sfq_dec(q, x);
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
310
  		kfree_skb(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
311
312
  		sch->q.qlen--;
  		sch->qstats.drops++;
f5539eb8c   Patrick McHardy   [PKT_SCHED]: Keep...
313
  		sch->qstats.backlog -= len;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
314
315
316
317
318
  		return len;
  	}
  
  	if (d == 1) {
  		/* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
319
320
321
322
323
  		x = q->tail->next;
  		slot = &q->slots[x];
  		q->tail->next = slot->next;
  		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
  		goto drop;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
324
325
326
327
  	}
  
  	return 0;
  }
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
  /* Is ECN parameter configured */
  static int sfq_prob_mark(const struct sfq_sched_data *q)
  {
  	return q->flags & TC_RED_ECN;
  }
  
  /* Should packets over max threshold just be marked */
  static int sfq_hard_mark(const struct sfq_sched_data *q)
  {
  	return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN;
  }
  
  static int sfq_headdrop(const struct sfq_sched_data *q)
  {
  	return q->headdrop;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
344
  static int
6f9e98f7a   Stephen Hemminger   [PKT_SCHED] SFQ: ...
345
  sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
346
347
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
348
  	unsigned int hash;
8efa88540   Eric Dumazet   sch_sfq: avoid gi...
349
  	sfq_index x, qlen;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
350
  	struct sfq_slot *slot;
7f3ff4f63   Jarek Poplawski   pkt_sched: Annota...
351
  	int uninitialized_var(ret);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
352
353
  	struct sk_buff *head;
  	int delta;
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
354
355
356
  
  	hash = sfq_classify(skb, sch, &ret);
  	if (hash == 0) {
c27f339af   Jarek Poplawski   net_sched: Add qd...
357
  		if (ret & __NET_XMIT_BYPASS)
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
358
359
360
361
362
  			sch->qstats.drops++;
  		kfree_skb(skb);
  		return ret;
  	}
  	hash--;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
363
364
  
  	x = q->ht[hash];
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
365
366
367
  	slot = &q->slots[x];
  	if (x == SFQ_EMPTY_SLOT) {
  		x = q->dep[0].next; /* get a free slot */
18cb80985   Eric Dumazet   net_sched: sfq: e...
368
369
  		if (x >= SFQ_MAX_FLOWS)
  			return qdisc_drop(skb, sch);
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
370
371
372
  		q->ht[hash] = x;
  		slot = &q->slots[x];
  		slot->hash = hash;
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
  		slot->backlog = 0; /* should already be 0 anyway... */
  		red_set_vars(&slot->vars);
  		goto enqueue;
  	}
  	if (q->red_parms) {
  		slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms,
  							&slot->vars,
  							slot->backlog);
  		switch (red_action(q->red_parms,
  				   &slot->vars,
  				   slot->vars.qavg)) {
  		case RED_DONT_MARK:
  			break;
  
  		case RED_PROB_MARK:
  			sch->qstats.overlimits++;
  			if (sfq_prob_mark(q)) {
  				/* We know we have at least one packet in queue */
  				if (sfq_headdrop(q) &&
  				    INET_ECN_set_ce(slot->skblist_next)) {
  					q->stats.prob_mark_head++;
  					break;
  				}
  				if (INET_ECN_set_ce(skb)) {
  					q->stats.prob_mark++;
  					break;
  				}
  			}
  			q->stats.prob_drop++;
  			goto congestion_drop;
  
  		case RED_HARD_MARK:
  			sch->qstats.overlimits++;
  			if (sfq_hard_mark(q)) {
  				/* We know we have at least one packet in queue */
  				if (sfq_headdrop(q) &&
  				    INET_ECN_set_ce(slot->skblist_next)) {
  					q->stats.forced_mark_head++;
  					break;
  				}
  				if (INET_ECN_set_ce(skb)) {
  					q->stats.forced_mark++;
  					break;
  				}
  			}
  			q->stats.forced_drop++;
  			goto congestion_drop;
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
421
  	}
6f9e98f7a   Stephen Hemminger   [PKT_SCHED] SFQ: ...
422

18cb80985   Eric Dumazet   net_sched: sfq: e...
423
  	if (slot->qlen >= q->maxdepth) {
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
424
425
  congestion_drop:
  		if (!sfq_headdrop(q))
18cb80985   Eric Dumazet   net_sched: sfq: e...
426
  			return qdisc_drop(skb, sch);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
427
  		/* We know we have at least one packet in queue */
18cb80985   Eric Dumazet   net_sched: sfq: e...
428
  		head = slot_dequeue_head(slot);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
429
430
431
  		delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb);
  		sch->qstats.backlog -= delta;
  		slot->backlog -= delta;
18cb80985   Eric Dumazet   net_sched: sfq: e...
432
  		qdisc_drop(head, sch);
18cb80985   Eric Dumazet   net_sched: sfq: e...
433
434
435
  		slot_queue_add(slot, skb);
  		return NET_XMIT_CN;
  	}
32740ddc1   Alexey Kuznetsov   [SFQ]: Remove art...
436

ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
437
  enqueue:
0abf77e55   Jussi Kivilinna   net_sched: Add ac...
438
  	sch->qstats.backlog += qdisc_pkt_len(skb);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
439
  	slot->backlog += qdisc_pkt_len(skb);
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
440
  	slot_queue_add(slot, skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
441
  	sfq_inc(q, x);
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
442
443
444
  	if (slot->qlen == 1) {		/* The flow is new */
  		if (q->tail == NULL) {	/* It is the first flow */
  			slot->next = x;
d47a0ac7b   Eric Dumazet   sch_sfq: dont put...
445
  			q->tail = slot;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
446
  		} else {
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
447
448
  			slot->next = q->tail->next;
  			q->tail->next = x;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
449
  		}
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
450
  		/* We could use a bigger initial quantum for new flows */
eeaeb068f   Eric Dumazet   sch_sfq: allow bi...
451
  		slot->allot = q->scaled_quantum;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
452
  	}
9190b3b32   Eric Dumazet   net_sched: accura...
453
  	if (++sch->q.qlen <= q->limit)
9871e50ed   Ben Greear   net: Use NET_XMIT...
454
  		return NET_XMIT_SUCCESS;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
455

8efa88540   Eric Dumazet   sch_sfq: avoid gi...
456
  	qlen = slot->qlen;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
457
  	sfq_drop(sch);
8efa88540   Eric Dumazet   sch_sfq: avoid gi...
458
459
460
  	/* Return Congestion Notification only if we dropped a packet
  	 * from this flow.
  	 */
e1738bd9c   Eric Dumazet   sch_sfq: fix sfq_...
461
462
463
464
465
466
  	if (qlen != slot->qlen)
  		return NET_XMIT_CN;
  
  	/* As we dropped a packet, better let upper stack know this */
  	qdisc_tree_decrease_qlen(sch, 1);
  	return NET_XMIT_SUCCESS;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
467
  }
48a8f519e   Patrick McHardy   pkt_sched: Add ->...
468
  static struct sk_buff *
6f9e98f7a   Stephen Hemminger   [PKT_SCHED] SFQ: ...
469
  sfq_dequeue(struct Qdisc *sch)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
470
471
472
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
  	struct sk_buff *skb;
aa3e21999   Eric Dumazet   net_sched: sch_sf...
473
  	sfq_index a, next_a;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
474
  	struct sfq_slot *slot;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
475
476
  
  	/* No active slots */
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
477
  	if (q->tail == NULL)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
478
  		return NULL;
eeaeb068f   Eric Dumazet   sch_sfq: allow bi...
479
  next_slot:
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
480
481
  	a = q->tail->next;
  	slot = &q->slots[a];
eeaeb068f   Eric Dumazet   sch_sfq: allow bi...
482
483
484
485
486
  	if (slot->allot <= 0) {
  		q->tail = slot;
  		slot->allot += q->scaled_quantum;
  		goto next_slot;
  	}
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
487
  	skb = slot_dequeue_head(slot);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
488
  	sfq_dec(q, a);
9190b3b32   Eric Dumazet   net_sched: accura...
489
  	qdisc_bstats_update(sch, skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
490
  	sch->q.qlen--;
0abf77e55   Jussi Kivilinna   net_sched: Add ac...
491
  	sch->qstats.backlog -= qdisc_pkt_len(skb);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
492
  	slot->backlog -= qdisc_pkt_len(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
493
  	/* Is the slot empty? */
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
494
495
496
  	if (slot->qlen == 0) {
  		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
  		next_a = slot->next;
aa3e21999   Eric Dumazet   net_sched: sch_sf...
497
  		if (a == next_a) {
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
498
  			q->tail = NULL; /* no more active slots */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
499
500
  			return skb;
  		}
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
501
  		q->tail->next = next_a;
eeaeb068f   Eric Dumazet   sch_sfq: allow bi...
502
503
  	} else {
  		slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
504
505
506
507
508
  	}
  	return skb;
  }
  
  static void
6f9e98f7a   Stephen Hemminger   [PKT_SCHED] SFQ: ...
509
  sfq_reset(struct Qdisc *sch)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
510
511
512
513
514
515
  {
  	struct sk_buff *skb;
  
  	while ((skb = sfq_dequeue(sch)) != NULL)
  		kfree_skb(skb);
  }
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
516
517
518
519
520
521
  /*
   * When q->perturbation is changed, we rehash all queued skbs
   * to avoid OOO (Out Of Order) effects.
   * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
   * counters.
   */
18cb80985   Eric Dumazet   net_sched: sfq: e...
522
  static void sfq_rehash(struct Qdisc *sch)
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
523
  {
18cb80985   Eric Dumazet   net_sched: sfq: e...
524
  	struct sfq_sched_data *q = qdisc_priv(sch);
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
525
526
527
528
  	struct sk_buff *skb;
  	int i;
  	struct sfq_slot *slot;
  	struct sk_buff_head list;
18cb80985   Eric Dumazet   net_sched: sfq: e...
529
  	int dropped = 0;
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
530
531
  
  	__skb_queue_head_init(&list);
18cb80985   Eric Dumazet   net_sched: sfq: e...
532
  	for (i = 0; i < q->maxflows; i++) {
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
533
534
535
536
537
538
539
540
  		slot = &q->slots[i];
  		if (!slot->qlen)
  			continue;
  		while (slot->qlen) {
  			skb = slot_dequeue_head(slot);
  			sfq_dec(q, i);
  			__skb_queue_tail(&list, skb);
  		}
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
541
542
  		slot->backlog = 0;
  		red_set_vars(&slot->vars);
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
543
544
545
546
547
548
549
550
551
552
553
  		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
  	}
  	q->tail = NULL;
  
  	while ((skb = __skb_dequeue(&list)) != NULL) {
  		unsigned int hash = sfq_hash(q, skb);
  		sfq_index x = q->ht[hash];
  
  		slot = &q->slots[x];
  		if (x == SFQ_EMPTY_SLOT) {
  			x = q->dep[0].next; /* get a free slot */
18cb80985   Eric Dumazet   net_sched: sfq: e...
554
555
556
557
558
559
  			if (x >= SFQ_MAX_FLOWS) {
  drop:				sch->qstats.backlog -= qdisc_pkt_len(skb);
  				kfree_skb(skb);
  				dropped++;
  				continue;
  			}
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
560
561
562
563
  			q->ht[hash] = x;
  			slot = &q->slots[x];
  			slot->hash = hash;
  		}
18cb80985   Eric Dumazet   net_sched: sfq: e...
564
565
  		if (slot->qlen >= q->maxdepth)
  			goto drop;
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
566
  		slot_queue_add(slot, skb);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
567
568
569
570
571
  		if (q->red_parms)
  			slot->vars.qavg = red_calc_qavg(q->red_parms,
  							&slot->vars,
  							slot->backlog);
  		slot->backlog += qdisc_pkt_len(skb);
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
572
573
574
575
576
577
578
579
580
581
582
583
  		sfq_inc(q, x);
  		if (slot->qlen == 1) {		/* The flow is new */
  			if (q->tail == NULL) {	/* It is the first flow */
  				slot->next = x;
  			} else {
  				slot->next = q->tail->next;
  				q->tail->next = x;
  			}
  			q->tail = slot;
  			slot->allot = q->scaled_quantum;
  		}
  	}
18cb80985   Eric Dumazet   net_sched: sfq: e...
584
585
  	sch->q.qlen -= dropped;
  	qdisc_tree_decrease_qlen(sch, dropped);
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
586
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
587
588
  static void sfq_perturbation(unsigned long arg)
  {
6f9e98f7a   Stephen Hemminger   [PKT_SCHED] SFQ: ...
589
  	struct Qdisc *sch = (struct Qdisc *)arg;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
590
  	struct sfq_sched_data *q = qdisc_priv(sch);
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
591
  	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
592

225d9b89c   Eric Dumazet   sch_sfq: rehash q...
593
  	spin_lock(root_lock);
d46f8dd87   Stephen Hemminger   [PKT_SCHED] SFQ: ...
594
  	q->perturbation = net_random();
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
595
  	if (!q->filter_list && q->tail)
18cb80985   Eric Dumazet   net_sched: sfq: e...
596
  		sfq_rehash(sch);
225d9b89c   Eric Dumazet   sch_sfq: rehash q...
597
  	spin_unlock(root_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
598

32740ddc1   Alexey Kuznetsov   [SFQ]: Remove art...
599
600
  	if (q->perturb_period)
  		mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
601
  }
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
602
  static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
603
604
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
605
  	struct tc_sfq_qopt *ctl = nla_data(opt);
18cb80985   Eric Dumazet   net_sched: sfq: e...
606
  	struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
5e50da01d   Patrick McHardy   [NET_SCHED]: Fix ...
607
  	unsigned int qlen;
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
608
  	struct red_parms *p = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
609

1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
610
  	if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
611
  		return -EINVAL;
18cb80985   Eric Dumazet   net_sched: sfq: e...
612
613
  	if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
  		ctl_v1 = nla_data(opt);
119b3d386   stephen hemminger   sfq: deadlock in ...
614
615
616
  	if (ctl->divisor &&
  	    (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
  		return -EINVAL;
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
617
618
619
620
621
  	if (ctl_v1 && ctl_v1->qth_min) {
  		p = kmalloc(sizeof(*p), GFP_KERNEL);
  		if (!p)
  			return -ENOMEM;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
622
  	sch_tree_lock(sch);
18cb80985   Eric Dumazet   net_sched: sfq: e...
623
624
625
626
  	if (ctl->quantum) {
  		q->quantum = ctl->quantum;
  		q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
  	}
6f9e98f7a   Stephen Hemminger   [PKT_SCHED] SFQ: ...
627
  	q->perturb_period = ctl->perturb_period * HZ;
18cb80985   Eric Dumazet   net_sched: sfq: e...
628
629
630
  	if (ctl->flows)
  		q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
  	if (ctl->divisor) {
817fb15df   Eric Dumazet   net_sched: sfq: a...
631
  		q->divisor = ctl->divisor;
18cb80985   Eric Dumazet   net_sched: sfq: e...
632
633
634
635
636
  		q->maxflows = min_t(u32, q->maxflows, q->divisor);
  	}
  	if (ctl_v1) {
  		if (ctl_v1->depth)
  			q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
637
638
639
640
641
642
643
644
645
646
  		if (p) {
  			swap(q->red_parms, p);
  			red_set_parms(q->red_parms,
  				      ctl_v1->qth_min, ctl_v1->qth_max,
  				      ctl_v1->Wlog,
  				      ctl_v1->Plog, ctl_v1->Scell_log,
  				      NULL,
  				      ctl_v1->max_P);
  		}
  		q->flags = ctl_v1->flags;
18cb80985   Eric Dumazet   net_sched: sfq: e...
647
648
649
650
651
652
  		q->headdrop = ctl_v1->headdrop;
  	}
  	if (ctl->limit) {
  		q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
  		q->maxflows = min_t(u32, q->maxflows, q->limit);
  	}
5e50da01d   Patrick McHardy   [NET_SCHED]: Fix ...
653
  	qlen = sch->q.qlen;
5588b40d7   Alexey Kuznetsov   [PKT_SCHED]: Fix ...
654
  	while (sch->q.qlen > q->limit)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
655
  		sfq_drop(sch);
5e50da01d   Patrick McHardy   [NET_SCHED]: Fix ...
656
  	qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
657
658
659
  
  	del_timer(&q->perturb_timer);
  	if (q->perturb_period) {
32740ddc1   Alexey Kuznetsov   [SFQ]: Remove art...
660
  		mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
d46f8dd87   Stephen Hemminger   [PKT_SCHED] SFQ: ...
661
  		q->perturbation = net_random();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
662
663
  	}
  	sch_tree_unlock(sch);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
664
  	kfree(p);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
665
666
  	return 0;
  }
bd16a6cce   Eric Dumazet   net_sched: sfq: f...
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
  static void *sfq_alloc(size_t sz)
  {
  	void *ptr = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN);
  
  	if (!ptr)
  		ptr = vmalloc(sz);
  	return ptr;
  }
  
  static void sfq_free(void *addr)
  {
  	if (addr) {
  		if (is_vmalloc_addr(addr))
  			vfree(addr);
  		else
  			kfree(addr);
  	}
  }
  
  static void sfq_destroy(struct Qdisc *sch)
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
  
  	tcf_destroy_chain(&q->filter_list);
  	q->perturb_period = 0;
  	del_timer_sync(&q->perturb_timer);
  	sfq_free(q->ht);
18cb80985   Eric Dumazet   net_sched: sfq: e...
694
  	sfq_free(q->slots);
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
695
  	kfree(q->red_parms);
bd16a6cce   Eric Dumazet   net_sched: sfq: f...
696
  }
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
697
  static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
698
699
700
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
  	int i;
d3e994830   Stephen Hemminger   [PKT_SCHED] SFQ: ...
701
  	q->perturb_timer.function = sfq_perturbation;
c19a28e11   Fernando Carrijo   remove lots of do...
702
  	q->perturb_timer.data = (unsigned long)sch;
d3e994830   Stephen Hemminger   [PKT_SCHED] SFQ: ...
703
  	init_timer_deferrable(&q->perturb_timer);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
704

18cb80985   Eric Dumazet   net_sched: sfq: e...
705
706
707
  	for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
  		q->dep[i].next = i + SFQ_MAX_FLOWS;
  		q->dep[i].prev = i + SFQ_MAX_FLOWS;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
708
  	}
6f9e98f7a   Stephen Hemminger   [PKT_SCHED] SFQ: ...
709

18cb80985   Eric Dumazet   net_sched: sfq: e...
710
711
  	q->limit = SFQ_MAX_DEPTH;
  	q->maxdepth = SFQ_MAX_DEPTH;
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
712
713
  	q->cur_depth = 0;
  	q->tail = NULL;
817fb15df   Eric Dumazet   net_sched: sfq: a...
714
  	q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
18cb80985   Eric Dumazet   net_sched: sfq: e...
715
  	q->maxflows = SFQ_DEFAULT_FLOWS;
02a9098ed   Eric Dumazet   net_sched: sfq: a...
716
717
718
719
720
721
  	q->quantum = psched_mtu(qdisc_dev(sch));
  	q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
  	q->perturb_period = 0;
  	q->perturbation = net_random();
  
  	if (opt) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
722
723
724
725
  		int err = sfq_change(sch, opt);
  		if (err)
  			return err;
  	}
6f9e98f7a   Stephen Hemminger   [PKT_SCHED] SFQ: ...
726

bd16a6cce   Eric Dumazet   net_sched: sfq: f...
727
  	q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
18cb80985   Eric Dumazet   net_sched: sfq: e...
728
729
  	q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows);
  	if (!q->ht || !q->slots) {
bd16a6cce   Eric Dumazet   net_sched: sfq: f...
730
  		sfq_destroy(sch);
817fb15df   Eric Dumazet   net_sched: sfq: a...
731
  		return -ENOMEM;
bd16a6cce   Eric Dumazet   net_sched: sfq: f...
732
  	}
817fb15df   Eric Dumazet   net_sched: sfq: a...
733
734
  	for (i = 0; i < q->divisor; i++)
  		q->ht[i] = SFQ_EMPTY_SLOT;
18cb80985   Eric Dumazet   net_sched: sfq: e...
735
  	for (i = 0; i < q->maxflows; i++) {
18c8d82ae   Eric Dumazet   sfq: fix slot_deq...
736
  		slot_queue_init(&q->slots[i]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
737
  		sfq_link(q, i);
18c8d82ae   Eric Dumazet   sfq: fix slot_deq...
738
  	}
23624935e   Eric Dumazet   net_sched: TCQ_F_...
739
740
741
742
  	if (q->limit >= 1)
  		sch->flags |= TCQ_F_CAN_BYPASS;
  	else
  		sch->flags &= ~TCQ_F_CAN_BYPASS;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
743
744
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
745
746
747
  static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
27a884dc3   Arnaldo Carvalho de Melo   [SK_BUFF]: Conver...
748
  	unsigned char *b = skb_tail_pointer(skb);
18cb80985   Eric Dumazet   net_sched: sfq: e...
749
  	struct tc_sfq_qopt_v1 opt;
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
750
  	struct red_parms *p = q->red_parms;
18cb80985   Eric Dumazet   net_sched: sfq: e...
751
752
753
754
755
756
757
758
759
  
  	memset(&opt, 0, sizeof(opt));
  	opt.v0.quantum	= q->quantum;
  	opt.v0.perturb_period = q->perturb_period / HZ;
  	opt.v0.limit	= q->limit;
  	opt.v0.divisor	= q->divisor;
  	opt.v0.flows	= q->maxflows;
  	opt.depth	= q->maxdepth;
  	opt.headdrop	= q->headdrop;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
760

ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
761
762
763
764
765
766
767
768
769
770
  	if (p) {
  		opt.qth_min	= p->qth_min >> p->Wlog;
  		opt.qth_max	= p->qth_max >> p->Wlog;
  		opt.Wlog	= p->Wlog;
  		opt.Plog	= p->Plog;
  		opt.Scell_log	= p->Scell_log;
  		opt.max_P	= p->max_P;
  	}
  	memcpy(&opt.stats, &q->stats, sizeof(opt.stats));
  	opt.flags	= q->flags;
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
771
  	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
772
773
  
  	return skb->len;
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
774
  nla_put_failure:
dc5fc579b   Arnaldo Carvalho de Melo   [NETLINK]: Use nl...
775
  	nlmsg_trim(skb, b);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
776
777
  	return -1;
  }
41065fba8   Jarek Poplawski   pkt_sched: Fix sc...
778
779
780
781
  static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
  {
  	return NULL;
  }
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
782
783
784
785
  static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
  {
  	return 0;
  }
eb4a5527b   Jarek Poplawski   pkt_sched: Fix sc...
786
787
788
  static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
  			      u32 classid)
  {
23624935e   Eric Dumazet   net_sched: TCQ_F_...
789
790
  	/* we cannot bypass queue discipline anymore */
  	sch->flags &= ~TCQ_F_CAN_BYPASS;
eb4a5527b   Jarek Poplawski   pkt_sched: Fix sc...
791
792
  	return 0;
  }
da7115d94   Jarek Poplawski   pkt_sched: sch_sf...
793
794
795
  static void sfq_put(struct Qdisc *q, unsigned long cl)
  {
  }
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
796
797
798
799
800
801
802
803
  static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl)
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
  
  	if (cl)
  		return NULL;
  	return &q->filter_list;
  }
94de78d19   Patrick McHardy   [NET_SCHED]: sch_...
804
805
806
807
808
809
810
811
812
813
814
  static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
  			  struct sk_buff *skb, struct tcmsg *tcm)
  {
  	tcm->tcm_handle |= TC_H_MIN(cl);
  	return 0;
  }
  
  static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
  				struct gnet_dump *d)
  {
  	struct sfq_sched_data *q = qdisc_priv(sch);
ee09b3c1c   Eric Dumazet   sfq: fix sfq clas...
815
816
817
  	sfq_index idx = q->ht[cl - 1];
  	struct gnet_stats_queue qs = { 0 };
  	struct tc_sfq_xstats xstats = { 0 };
c42662632   Eric Dumazet   net_sched: sch_sf...
818

ee09b3c1c   Eric Dumazet   sfq: fix sfq clas...
819
820
  	if (idx != SFQ_EMPTY_SLOT) {
  		const struct sfq_slot *slot = &q->slots[idx];
94de78d19   Patrick McHardy   [NET_SCHED]: sch_...
821

eeaeb068f   Eric Dumazet   sch_sfq: allow bi...
822
  		xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
ee09b3c1c   Eric Dumazet   sfq: fix sfq clas...
823
  		qs.qlen = slot->qlen;
ddecf0f4d   Eric Dumazet   net_sched: sfq: a...
824
  		qs.backlog = slot->backlog;
ee09b3c1c   Eric Dumazet   sfq: fix sfq clas...
825
  	}
94de78d19   Patrick McHardy   [NET_SCHED]: sch_...
826
827
828
829
  	if (gnet_stats_copy_queue(d, &qs) < 0)
  		return -1;
  	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
  }
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
830
831
  static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
  {
94de78d19   Patrick McHardy   [NET_SCHED]: sch_...
832
833
834
835
836
  	struct sfq_sched_data *q = qdisc_priv(sch);
  	unsigned int i;
  
  	if (arg->stop)
  		return;
817fb15df   Eric Dumazet   net_sched: sfq: a...
837
  	for (i = 0; i < q->divisor; i++) {
eda83e3b6   Eric Dumazet   net_sched: sch_sf...
838
  		if (q->ht[i] == SFQ_EMPTY_SLOT ||
94de78d19   Patrick McHardy   [NET_SCHED]: sch_...
839
840
841
842
843
844
845
846
847
848
  		    arg->count < arg->skip) {
  			arg->count++;
  			continue;
  		}
  		if (arg->fn(sch, i + 1, arg) < 0) {
  			arg->stop = 1;
  			break;
  		}
  		arg->count++;
  	}
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
849
850
851
  }
  
  static const struct Qdisc_class_ops sfq_class_ops = {
41065fba8   Jarek Poplawski   pkt_sched: Fix sc...
852
  	.leaf		=	sfq_leaf,
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
853
  	.get		=	sfq_get,
da7115d94   Jarek Poplawski   pkt_sched: sch_sf...
854
  	.put		=	sfq_put,
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
855
  	.tcf_chain	=	sfq_find_tcf,
eb4a5527b   Jarek Poplawski   pkt_sched: Fix sc...
856
  	.bind_tcf	=	sfq_bind,
da7115d94   Jarek Poplawski   pkt_sched: sch_sf...
857
  	.unbind_tcf	=	sfq_put,
94de78d19   Patrick McHardy   [NET_SCHED]: sch_...
858
859
  	.dump		=	sfq_dump_class,
  	.dump_stats	=	sfq_dump_class_stats,
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
860
861
  	.walk		=	sfq_walk,
  };
20fea08b5   Eric Dumazet   [NET]: Move Qdisc...
862
  static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
7d2681a6f   Patrick McHardy   [NET_SCHED]: sch_...
863
  	.cl_ops		=	&sfq_class_ops,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
864
865
866
867
  	.id		=	"sfq",
  	.priv_size	=	sizeof(struct sfq_sched_data),
  	.enqueue	=	sfq_enqueue,
  	.dequeue	=	sfq_dequeue,
07bd8df5d   Eric Dumazet   sch_sfq: fix peek...
868
  	.peek		=	qdisc_peek_dequeued,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
869
870
871
872
873
874
875
876
877
878
879
880
881
  	.drop		=	sfq_drop,
  	.init		=	sfq_init,
  	.reset		=	sfq_reset,
  	.destroy	=	sfq_destroy,
  	.change		=	NULL,
  	.dump		=	sfq_dump,
  	.owner		=	THIS_MODULE,
  };
  
  static int __init sfq_module_init(void)
  {
  	return register_qdisc(&sfq_qdisc_ops);
  }
10297b993   YOSHIFUJI Hideaki   [NET] SCHED: Fix ...
882
  static void __exit sfq_module_exit(void)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
883
884
885
886
887
888
  {
  	unregister_qdisc(&sfq_qdisc_ops);
  }
  module_init(sfq_module_init)
  module_exit(sfq_module_exit)
  MODULE_LICENSE("GPL");