Blame view

net/sched/sch_api.c 44.5 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
  /*
   * net/sched/sch_api.c	Packet scheduler API.
   *
   *		This program is free software; you can redistribute it and/or
   *		modify it under the terms of the GNU General Public License
   *		as published by the Free Software Foundation; either version
   *		2 of the License, or (at your option) any later version.
   *
   * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   *
   * Fixes:
   *
   * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
   * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
   * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
17
18
19
  #include <linux/module.h>
  #include <linux/types.h>
  #include <linux/kernel.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
20
  #include <linux/string.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
21
  #include <linux/errno.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
22
  #include <linux/skbuff.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
23
24
25
26
27
  #include <linux/init.h>
  #include <linux/proc_fs.h>
  #include <linux/seq_file.h>
  #include <linux/kmod.h>
  #include <linux/list.h>
4179477f6   Patrick McHardy   [NET_SCHED]: Add ...
28
  #include <linux/hrtimer.h>
25bfcd5a7   Jarek Poplawski   pkt_sched: Add lo...
29
  #include <linux/lockdep.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
30
  #include <linux/slab.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
31

457c4cbc5   Eric W. Biederman   [NET]: Make /proc...
32
  #include <net/net_namespace.h>
b854272b3   Denis V. Lunev   [NET]: Modify all...
33
  #include <net/sock.h>
dc5fc579b   Arnaldo Carvalho de Melo   [NETLINK]: Use nl...
34
  #include <net/netlink.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
35
  #include <net/pkt_sched.h>
7316ae88c   Tom Goff   net_sched: make t...
36
37
  static int qdisc_notify(struct net *net, struct sk_buff *oskb,
  			struct nlmsghdr *n, u32 clid,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
38
  			struct Qdisc *old, struct Qdisc *new);
7316ae88c   Tom Goff   net_sched: make t...
39
40
41
  static int tclass_notify(struct net *net, struct sk_buff *oskb,
  			 struct nlmsghdr *n, struct Qdisc *q,
  			 unsigned long cl, int event);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
  
  /*
  
     Short review.
     -------------
  
     This file consists of two interrelated parts:
  
     1. queueing disciplines manager frontend.
     2. traffic classes manager frontend.
  
     Generally, queueing discipline ("qdisc") is a black box,
     which is able to enqueue packets and to dequeue them (when
     device is ready to send something) in order and at times
     determined by algorithm hidden in it.
  
     qdisc's are divided to two categories:
     - "queues", which have no internal structure visible from outside.
     - "schedulers", which split all the packets to "traffic classes",
       using "packet classifiers" (look at cls_api.c)
  
     In turn, classes may have child qdiscs (as rule, queues)
     attached to them etc. etc. etc.
  
     The goal of the routines in this file is to translate
     information supplied by user in the form of handles
     to more intelligible for kernel form, to make some sanity
     checks and part of work, which is common to all qdiscs
     and to provide rtnetlink notifications.
  
     All real intelligent work is done inside qdisc modules.
  
  
  
     Every discipline has two major routines: enqueue and dequeue.
  
     ---dequeue
  
     dequeue usually returns a skb to send. It is allowed to return NULL,
     but it does not mean that queue is empty, it just means that
     discipline does not want to send anything this time.
     Queue is really empty if q->q.qlen == 0.
     For complicated disciplines with multiple queues q->q is not
     real packet queue, but however q->q.qlen must be valid.
  
     ---enqueue
  
     enqueue returns 0, if packet was enqueued successfully.
     If packet (this one or another one) was dropped, it returns
     not zero error code.
     NET_XMIT_DROP 	- this packet dropped
       Expected action: do not backoff, but wait until queue will clear.
     NET_XMIT_CN	 	- probably this packet enqueued, but another one dropped.
       Expected action: backoff or ignore
     NET_XMIT_POLICED	- dropped by police.
       Expected action: backoff or error to real-time apps.
  
     Auxiliary routines:
99c0db267   Jarek Poplawski   pkt_sched: sch_ge...
100
101
102
     ---peek
  
     like dequeue but without removing a packet from the queue
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
     ---reset
  
     returns qdisc to initial state: purge all buffers, clear all
     timers, counters (except for statistics) etc.
  
     ---init
  
     initializes newly created qdisc.
  
     ---destroy
  
     destroys resources allocated by init and during lifetime of qdisc.
  
     ---change
  
     changes qdisc parameters.
   */
  
  /* Protects list of registered TC modules. It is pure SMP lock. */
  static DEFINE_RWLOCK(qdisc_mod_lock);
  
  
  /************************************************
   *	Queueing disciplines manipulation.	*
   ************************************************/
  
  
  /* The list of all installed queueing disciplines. */
  
  static struct Qdisc_ops *qdisc_base;
21eb21898   Zhi Yong Wu   net, sch: fix the...
133
  /* Register/unregister queueing discipline */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
134
135
136
137
138
139
140
141
142
143
144
145
146
  
  int register_qdisc(struct Qdisc_ops *qops)
  {
  	struct Qdisc_ops *q, **qp;
  	int rc = -EEXIST;
  
  	write_lock(&qdisc_mod_lock);
  	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
  		if (!strcmp(qops->id, q->id))
  			goto out;
  
  	if (qops->enqueue == NULL)
  		qops->enqueue = noop_qdisc_ops.enqueue;
99c0db267   Jarek Poplawski   pkt_sched: sch_ge...
147
  	if (qops->peek == NULL) {
68fd26b59   Jarek Poplawski   pkt_sched: Add so...
148
  		if (qops->dequeue == NULL)
99c0db267   Jarek Poplawski   pkt_sched: sch_ge...
149
  			qops->peek = noop_qdisc_ops.peek;
68fd26b59   Jarek Poplawski   pkt_sched: Add so...
150
151
  		else
  			goto out_einval;
99c0db267   Jarek Poplawski   pkt_sched: sch_ge...
152
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
153
154
  	if (qops->dequeue == NULL)
  		qops->dequeue = noop_qdisc_ops.dequeue;
68fd26b59   Jarek Poplawski   pkt_sched: Add so...
155
156
  	if (qops->cl_ops) {
  		const struct Qdisc_class_ops *cops = qops->cl_ops;
3e9e5a592   Jarek Poplawski   pkt_sched: Check ...
157
  		if (!(cops->get && cops->put && cops->walk && cops->leaf))
68fd26b59   Jarek Poplawski   pkt_sched: Add so...
158
159
160
161
162
  			goto out_einval;
  
  		if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf))
  			goto out_einval;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
163
164
165
166
167
168
  	qops->next = NULL;
  	*qp = qops;
  	rc = 0;
  out:
  	write_unlock(&qdisc_mod_lock);
  	return rc;
68fd26b59   Jarek Poplawski   pkt_sched: Add so...
169
170
171
172
  
  out_einval:
  	rc = -EINVAL;
  	goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
173
  }
62e3ba1b5   Patrick McHardy   [NET_SCHED]: Move...
174
  EXPORT_SYMBOL(register_qdisc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
175
176
177
178
179
180
181
  
  int unregister_qdisc(struct Qdisc_ops *qops)
  {
  	struct Qdisc_ops *q, **qp;
  	int err = -ENOENT;
  
  	write_lock(&qdisc_mod_lock);
cc7ec456f   Eric Dumazet   net_sched: cleanups
182
  	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
183
184
185
186
187
188
189
190
191
192
  		if (q == qops)
  			break;
  	if (q) {
  		*qp = q->next;
  		q->next = NULL;
  		err = 0;
  	}
  	write_unlock(&qdisc_mod_lock);
  	return err;
  }
62e3ba1b5   Patrick McHardy   [NET_SCHED]: Move...
193
  EXPORT_SYMBOL(unregister_qdisc);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
194

6da7c8fcb   stephen hemminger   qdisc: allow sett...
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
  /* Get default qdisc if not otherwise specified */
  void qdisc_get_default(char *name, size_t len)
  {
  	read_lock(&qdisc_mod_lock);
  	strlcpy(name, default_qdisc_ops->id, len);
  	read_unlock(&qdisc_mod_lock);
  }
  
  static struct Qdisc_ops *qdisc_lookup_default(const char *name)
  {
  	struct Qdisc_ops *q = NULL;
  
  	for (q = qdisc_base; q; q = q->next) {
  		if (!strcmp(name, q->id)) {
  			if (!try_module_get(q->owner))
  				q = NULL;
  			break;
  		}
  	}
  
  	return q;
  }
  
  /* Set new default qdisc to use */
  int qdisc_set_default(const char *name)
  {
  	const struct Qdisc_ops *ops;
  
  	if (!capable(CAP_NET_ADMIN))
  		return -EPERM;
  
  	write_lock(&qdisc_mod_lock);
  	ops = qdisc_lookup_default(name);
  	if (!ops) {
  		/* Not found, drop lock and try to load module */
  		write_unlock(&qdisc_mod_lock);
  		request_module("sch_%s", name);
  		write_lock(&qdisc_mod_lock);
  
  		ops = qdisc_lookup_default(name);
  	}
  
  	if (ops) {
  		/* Set new default */
  		module_put(default_qdisc_ops->owner);
  		default_qdisc_ops = ops;
  	}
  	write_unlock(&qdisc_mod_lock);
  
  	return ops ? 0 : -ENOENT;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
246
247
248
  /* We know handle. Find qdisc among all qdisc's attached to device
     (root qdisc, all its children, children of children etc.)
   */
6113b748f   Hannes Eder   pkt_sched: fix sp...
249
  static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
8123b421e   David S. Miller   pkt_sched: Fix in...
250
251
252
253
254
255
256
257
258
259
260
261
262
  {
  	struct Qdisc *q;
  
  	if (!(root->flags & TCQ_F_BUILTIN) &&
  	    root->handle == handle)
  		return root;
  
  	list_for_each_entry(q, &root->list, list) {
  		if (q->handle == handle)
  			return q;
  	}
  	return NULL;
  }
95dc19299   Eric Dumazet   pkt_sched: give v...
263
  void qdisc_list_add(struct Qdisc *q)
f6e0b239a   Jarek Poplawski   pkt_sched: Fix qd...
264
  {
37314363c   Eric Dumazet   pkt_sched: move t...
265
266
  	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
  		struct Qdisc *root = qdisc_dev(q)->qdisc;
e57a784d8   Eric Dumazet   pkt_sched: set ro...
267

37314363c   Eric Dumazet   pkt_sched: move t...
268
  		WARN_ON_ONCE(root == &noop_qdisc);
e57a784d8   Eric Dumazet   pkt_sched: set ro...
269
  		list_add_tail(&q->list, &root->list);
37314363c   Eric Dumazet   pkt_sched: move t...
270
  	}
f6e0b239a   Jarek Poplawski   pkt_sched: Fix qd...
271
  }
95dc19299   Eric Dumazet   pkt_sched: give v...
272
  EXPORT_SYMBOL(qdisc_list_add);
f6e0b239a   Jarek Poplawski   pkt_sched: Fix qd...
273
274
275
  
  void qdisc_list_del(struct Qdisc *q)
  {
f6486d40b   Jarek Poplawski   pkt_sched: sch_ap...
276
  	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
f6e0b239a   Jarek Poplawski   pkt_sched: Fix qd...
277
  		list_del(&q->list);
f6e0b239a   Jarek Poplawski   pkt_sched: Fix qd...
278
279
  }
  EXPORT_SYMBOL(qdisc_list_del);
ead81cc5f   David S. Miller   netdevice: Move q...
280
  struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
281
  {
f6e0b239a   Jarek Poplawski   pkt_sched: Fix qd...
282
  	struct Qdisc *q;
af356afa0   Patrick McHardy   net_sched: reintr...
283
284
285
  	q = qdisc_match_from_root(dev->qdisc, handle);
  	if (q)
  		goto out;
f6e0b239a   Jarek Poplawski   pkt_sched: Fix qd...
286

24824a09e   Eric Dumazet   net: dynamic ingr...
287
288
289
290
  	if (dev_ingress_queue(dev))
  		q = qdisc_match_from_root(
  			dev_ingress_queue(dev)->qdisc_sleeping,
  			handle);
f6486d40b   Jarek Poplawski   pkt_sched: sch_ap...
291
  out:
f6e0b239a   Jarek Poplawski   pkt_sched: Fix qd...
292
  	return q;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
293
294
295
296
297
298
  }
  
  static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
  {
  	unsigned long cl;
  	struct Qdisc *leaf;
20fea08b5   Eric Dumazet   [NET]: Move Qdisc...
299
  	const struct Qdisc_class_ops *cops = p->ops->cl_ops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
300
301
302
303
304
305
306
307
308
309
310
311
312
  
  	if (cops == NULL)
  		return NULL;
  	cl = cops->get(p, classid);
  
  	if (cl == 0)
  		return NULL;
  	leaf = cops->leaf(p, cl);
  	cops->put(p, cl);
  	return leaf;
  }
  
  /* Find queueing discipline by name */
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
313
  static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
314
315
316
317
318
319
  {
  	struct Qdisc_ops *q = NULL;
  
  	if (kind) {
  		read_lock(&qdisc_mod_lock);
  		for (q = qdisc_base; q; q = q->next) {
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
320
  			if (nla_strcmp(kind, q->id) == 0) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
321
322
323
324
325
326
327
328
329
  				if (!try_module_get(q->owner))
  					q = NULL;
  				break;
  			}
  		}
  		read_unlock(&qdisc_mod_lock);
  	}
  	return q;
  }
8a8e3d84b   Jesper Dangaard Brouer   net_sched: restor...
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
  /* The linklayer setting were not transferred from iproute2, in older
   * versions, and the rate tables lookup systems have been dropped in
   * the kernel. To keep backward compatible with older iproute2 tc
   * utils, we detect the linklayer setting by detecting if the rate
   * table were modified.
   *
   * For linklayer ATM table entries, the rate table will be aligned to
   * 48 bytes, thus some table entries will contain the same value.  The
   * mpu (min packet unit) is also encoded into the old rate table, thus
   * starting from the mpu, we find low and high table entries for
   * mapping this cell.  If these entries contain the same value, when
   * the rate tables have been modified for linklayer ATM.
   *
   * This is done by rounding mpu to the nearest 48 bytes cell/entry,
   * and then roundup to the next cell, calc the table entry one below,
   * and compare.
   */
  static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
  {
  	int low       = roundup(r->mpu, 48);
  	int high      = roundup(low+1, 48);
  	int cell_low  = low >> r->cell_log;
  	int cell_high = (high >> r->cell_log) - 1;
  
  	/* rtab is too inaccurate at rates > 100Mbit/s */
  	if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
  		pr_debug("TC linklayer: Giving up ATM detection
  ");
  		return TC_LINKLAYER_ETHERNET;
  	}
  
  	if ((cell_high > cell_low) && (cell_high < 256)
  	    && (rtab[cell_low] == rtab[cell_high])) {
  		pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u
  ",
  			 cell_low, cell_high, rtab[cell_high]);
  		return TC_LINKLAYER_ATM;
  	}
  	return TC_LINKLAYER_ETHERNET;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
370
  static struct qdisc_rate_table *qdisc_rtab_list;
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
371
  struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
372
373
  {
  	struct qdisc_rate_table *rtab;
40edeff6e   Eric Dumazet   net_sched: qdisc_...
374
375
376
  	if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
  	    nla_len(tab) != TC_RTAB_SIZE)
  		return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
377
  	for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
40edeff6e   Eric Dumazet   net_sched: qdisc_...
378
379
  		if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
  		    !memcmp(&rtab->data, nla_data(tab), 1024)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
380
381
382
383
  			rtab->refcnt++;
  			return rtab;
  		}
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
384
385
386
387
  	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
  	if (rtab) {
  		rtab->rate = *r;
  		rtab->refcnt = 1;
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
388
  		memcpy(rtab->data, nla_data(tab), 1024);
8a8e3d84b   Jesper Dangaard Brouer   net_sched: restor...
389
390
  		if (r->linklayer == TC_LINKLAYER_UNAWARE)
  			r->linklayer = __detect_linklayer(r, rtab->data);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
391
392
393
394
395
  		rtab->next = qdisc_rtab_list;
  		qdisc_rtab_list = rtab;
  	}
  	return rtab;
  }
62e3ba1b5   Patrick McHardy   [NET_SCHED]: Move...
396
  EXPORT_SYMBOL(qdisc_get_rtab);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
397
398
399
400
401
402
403
  
  void qdisc_put_rtab(struct qdisc_rate_table *tab)
  {
  	struct qdisc_rate_table *rtab, **rtabp;
  
  	if (!tab || --tab->refcnt)
  		return;
cc7ec456f   Eric Dumazet   net_sched: cleanups
404
405
406
  	for (rtabp = &qdisc_rtab_list;
  	     (rtab = *rtabp) != NULL;
  	     rtabp = &rtab->next) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
407
408
409
410
411
412
413
  		if (rtab == tab) {
  			*rtabp = rtab->next;
  			kfree(rtab);
  			return;
  		}
  	}
  }
62e3ba1b5   Patrick McHardy   [NET_SCHED]: Move...
414
  EXPORT_SYMBOL(qdisc_put_rtab);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
415

175f9c1bb   Jussi Kivilinna   net_sched: Add si...
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
  static LIST_HEAD(qdisc_stab_list);
  static DEFINE_SPINLOCK(qdisc_stab_lock);
  
  static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
  	[TCA_STAB_BASE]	= { .len = sizeof(struct tc_sizespec) },
  	[TCA_STAB_DATA] = { .type = NLA_BINARY },
  };
  
  static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
  {
  	struct nlattr *tb[TCA_STAB_MAX + 1];
  	struct qdisc_size_table *stab;
  	struct tc_sizespec *s;
  	unsigned int tsize = 0;
  	u16 *tab = NULL;
  	int err;
  
  	err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
  	if (err < 0)
  		return ERR_PTR(err);
  	if (!tb[TCA_STAB_BASE])
  		return ERR_PTR(-EINVAL);
  
  	s = nla_data(tb[TCA_STAB_BASE]);
  
  	if (s->tsize > 0) {
  		if (!tb[TCA_STAB_DATA])
  			return ERR_PTR(-EINVAL);
  		tab = nla_data(tb[TCA_STAB_DATA]);
  		tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
  	}
00093fab9   Dan Carpenter   net/sched: remove...
447
  	if (tsize != s->tsize || (!tab && tsize > 0))
175f9c1bb   Jussi Kivilinna   net_sched: Add si...
448
  		return ERR_PTR(-EINVAL);
f3b9605d7   David S. Miller   Revert "pkt_sched...
449
  	spin_lock(&qdisc_stab_lock);
175f9c1bb   Jussi Kivilinna   net_sched: Add si...
450
451
452
453
454
455
456
  
  	list_for_each_entry(stab, &qdisc_stab_list, list) {
  		if (memcmp(&stab->szopts, s, sizeof(*s)))
  			continue;
  		if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
  			continue;
  		stab->refcnt++;
f3b9605d7   David S. Miller   Revert "pkt_sched...
457
  		spin_unlock(&qdisc_stab_lock);
175f9c1bb   Jussi Kivilinna   net_sched: Add si...
458
459
  		return stab;
  	}
f3b9605d7   David S. Miller   Revert "pkt_sched...
460
  	spin_unlock(&qdisc_stab_lock);
175f9c1bb   Jussi Kivilinna   net_sched: Add si...
461
462
463
464
465
466
467
468
469
  
  	stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
  	if (!stab)
  		return ERR_PTR(-ENOMEM);
  
  	stab->refcnt = 1;
  	stab->szopts = *s;
  	if (tsize > 0)
  		memcpy(stab->data, tab, tsize * sizeof(u16));
f3b9605d7   David S. Miller   Revert "pkt_sched...
470
  	spin_lock(&qdisc_stab_lock);
175f9c1bb   Jussi Kivilinna   net_sched: Add si...
471
  	list_add_tail(&stab->list, &qdisc_stab_list);
f3b9605d7   David S. Miller   Revert "pkt_sched...
472
  	spin_unlock(&qdisc_stab_lock);
175f9c1bb   Jussi Kivilinna   net_sched: Add si...
473
474
475
  
  	return stab;
  }
a2da570d6   Eric Dumazet   net_sched: RCU co...
476
477
478
479
  static void stab_kfree_rcu(struct rcu_head *head)
  {
  	kfree(container_of(head, struct qdisc_size_table, rcu));
  }
175f9c1bb   Jussi Kivilinna   net_sched: Add si...
480
481
482
483
  void qdisc_put_stab(struct qdisc_size_table *tab)
  {
  	if (!tab)
  		return;
f3b9605d7   David S. Miller   Revert "pkt_sched...
484
  	spin_lock(&qdisc_stab_lock);
175f9c1bb   Jussi Kivilinna   net_sched: Add si...
485
486
487
  
  	if (--tab->refcnt == 0) {
  		list_del(&tab->list);
a2da570d6   Eric Dumazet   net_sched: RCU co...
488
  		call_rcu_bh(&tab->rcu, stab_kfree_rcu);
175f9c1bb   Jussi Kivilinna   net_sched: Add si...
489
  	}
f3b9605d7   David S. Miller   Revert "pkt_sched...
490
  	spin_unlock(&qdisc_stab_lock);
175f9c1bb   Jussi Kivilinna   net_sched: Add si...
491
492
493
494
495
496
497
498
  }
  EXPORT_SYMBOL(qdisc_put_stab);
  
  static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
  {
  	struct nlattr *nest;
  
  	nest = nla_nest_start(skb, TCA_STAB);
3aa4614da   Patrick McHardy   pkt_sched: fix mi...
499
500
  	if (nest == NULL)
  		goto nla_put_failure;
1b34ec43c   David S. Miller   pkt_sched: Stop u...
501
502
  	if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
  		goto nla_put_failure;
175f9c1bb   Jussi Kivilinna   net_sched: Add si...
503
504
505
506
507
508
509
  	nla_nest_end(skb, nest);
  
  	return skb->len;
  
  nla_put_failure:
  	return -1;
  }
a2da570d6   Eric Dumazet   net_sched: RCU co...
510
  void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
175f9c1bb   Jussi Kivilinna   net_sched: Add si...
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
  {
  	int pkt_len, slot;
  
  	pkt_len = skb->len + stab->szopts.overhead;
  	if (unlikely(!stab->szopts.tsize))
  		goto out;
  
  	slot = pkt_len + stab->szopts.cell_align;
  	if (unlikely(slot < 0))
  		slot = 0;
  
  	slot >>= stab->szopts.cell_log;
  	if (likely(slot < stab->szopts.tsize))
  		pkt_len = stab->data[slot];
  	else
  		pkt_len = stab->data[stab->szopts.tsize - 1] *
  				(slot / stab->szopts.tsize) +
  				stab->data[slot % stab->szopts.tsize];
  
  	pkt_len <<= stab->szopts.size_log;
  out:
  	if (unlikely(pkt_len < 1))
  		pkt_len = 1;
  	qdisc_skb_cb(skb)->pkt_len = pkt_len;
  }
a2da570d6   Eric Dumazet   net_sched: RCU co...
536
  EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
175f9c1bb   Jussi Kivilinna   net_sched: Add si...
537

6e765a009   Florian Westphal   net_sched: drr: w...
538
  void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
b00355db3   Jarek Poplawski   pkt_sched: sch_hf...
539
540
  {
  	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
cc7ec456f   Eric Dumazet   net_sched: cleanups
541
542
543
  		pr_warn("%s: %s qdisc %X: is non-work-conserving?
  ",
  			txt, qdisc->ops->id, qdisc->handle >> 16);
b00355db3   Jarek Poplawski   pkt_sched: sch_hf...
544
545
546
547
  		qdisc->flags |= TCQ_F_WARN_NONWC;
  	}
  }
  EXPORT_SYMBOL(qdisc_warn_nonwc);
4179477f6   Patrick McHardy   [NET_SCHED]: Add ...
548
549
550
  static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
  {
  	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
2fbd3da38   David S. Miller   pkt_sched: Revert...
551
  						 timer);
4179477f6   Patrick McHardy   [NET_SCHED]: Add ...
552

fd245a4ad   Eric Dumazet   net_sched: move T...
553
  	qdisc_unthrottled(wd->qdisc);
8608db031   David S. Miller   pkt_sched: Never ...
554
  	__netif_schedule(qdisc_root(wd->qdisc));
1936502d0   Stephen Hemminger   [NET_SCHED] qdisc...
555

4179477f6   Patrick McHardy   [NET_SCHED]: Add ...
556
557
558
559
560
  	return HRTIMER_NORESTART;
  }
  
  void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
  {
2fbd3da38   David S. Miller   pkt_sched: Revert...
561
562
  	hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  	wd->timer.function = qdisc_watchdog;
4179477f6   Patrick McHardy   [NET_SCHED]: Add ...
563
564
565
  	wd->qdisc = qdisc;
  }
  EXPORT_SYMBOL(qdisc_watchdog_init);
34c5d292c   Jiri Pirko   sch_api: introduc...
566
  void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
4179477f6   Patrick McHardy   [NET_SCHED]: Add ...
567
  {
2540e0511   Jarek Poplawski   pkt_sched: Fix qd...
568
569
570
  	if (test_bit(__QDISC_STATE_DEACTIVATED,
  		     &qdisc_root_sleeping(wd->qdisc)->state))
  		return;
fd245a4ad   Eric Dumazet   net_sched: move T...
571
  	qdisc_throttled(wd->qdisc);
46baac38e   Eric Dumazet   pkt_sched: use ns...
572
573
  
  	hrtimer_start(&wd->timer,
34c5d292c   Jiri Pirko   sch_api: introduc...
574
  		      ns_to_ktime(expires),
46baac38e   Eric Dumazet   pkt_sched: use ns...
575
  		      HRTIMER_MODE_ABS);
4179477f6   Patrick McHardy   [NET_SCHED]: Add ...
576
  }
34c5d292c   Jiri Pirko   sch_api: introduc...
577
  EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
4179477f6   Patrick McHardy   [NET_SCHED]: Add ...
578
579
580
  
  void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
  {
2fbd3da38   David S. Miller   pkt_sched: Revert...
581
  	hrtimer_cancel(&wd->timer);
fd245a4ad   Eric Dumazet   net_sched: move T...
582
  	qdisc_unthrottled(wd->qdisc);
4179477f6   Patrick McHardy   [NET_SCHED]: Add ...
583
584
  }
  EXPORT_SYMBOL(qdisc_watchdog_cancel);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
585

a94f779f9   Adrian Bunk   pkt_sched: make q...
586
  static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
6fe1c7a55   Patrick McHardy   net-sched: add dy...
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
  {
  	unsigned int size = n * sizeof(struct hlist_head), i;
  	struct hlist_head *h;
  
  	if (size <= PAGE_SIZE)
  		h = kmalloc(size, GFP_KERNEL);
  	else
  		h = (struct hlist_head *)
  			__get_free_pages(GFP_KERNEL, get_order(size));
  
  	if (h != NULL) {
  		for (i = 0; i < n; i++)
  			INIT_HLIST_HEAD(&h[i]);
  	}
  	return h;
  }
  
  static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
  {
  	unsigned int size = n * sizeof(struct hlist_head);
  
  	if (size <= PAGE_SIZE)
  		kfree(h);
  	else
  		free_pages((unsigned long)h, get_order(size));
  }
  
  void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
  {
  	struct Qdisc_class_common *cl;
b67bfe0d4   Sasha Levin   hlist: drop the n...
617
  	struct hlist_node *next;
6fe1c7a55   Patrick McHardy   net-sched: add dy...
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
  	struct hlist_head *nhash, *ohash;
  	unsigned int nsize, nmask, osize;
  	unsigned int i, h;
  
  	/* Rehash when load factor exceeds 0.75 */
  	if (clhash->hashelems * 4 <= clhash->hashsize * 3)
  		return;
  	nsize = clhash->hashsize * 2;
  	nmask = nsize - 1;
  	nhash = qdisc_class_hash_alloc(nsize);
  	if (nhash == NULL)
  		return;
  
  	ohash = clhash->hash;
  	osize = clhash->hashsize;
  
  	sch_tree_lock(sch);
  	for (i = 0; i < osize; i++) {
b67bfe0d4   Sasha Levin   hlist: drop the n...
636
  		hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
6fe1c7a55   Patrick McHardy   net-sched: add dy...
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
  			h = qdisc_class_hash(cl->classid, nmask);
  			hlist_add_head(&cl->hnode, &nhash[h]);
  		}
  	}
  	clhash->hash     = nhash;
  	clhash->hashsize = nsize;
  	clhash->hashmask = nmask;
  	sch_tree_unlock(sch);
  
  	qdisc_class_hash_free(ohash, osize);
  }
  EXPORT_SYMBOL(qdisc_class_hash_grow);
  
  int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
  {
  	unsigned int size = 4;
  
  	clhash->hash = qdisc_class_hash_alloc(size);
  	if (clhash->hash == NULL)
  		return -ENOMEM;
  	clhash->hashsize  = size;
  	clhash->hashmask  = size - 1;
  	clhash->hashelems = 0;
  	return 0;
  }
  EXPORT_SYMBOL(qdisc_class_hash_init);
  
  void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
  {
  	qdisc_class_hash_free(clhash->hash, clhash->hashsize);
  }
  EXPORT_SYMBOL(qdisc_class_hash_destroy);
  
  void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
  			     struct Qdisc_class_common *cl)
  {
  	unsigned int h;
  
  	INIT_HLIST_NODE(&cl->hnode);
  	h = qdisc_class_hash(cl->classid, clhash->hashmask);
  	hlist_add_head(&cl->hnode, &clhash->hash[h]);
  	clhash->hashelems++;
  }
  EXPORT_SYMBOL(qdisc_class_hash_insert);
  
  void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
  			     struct Qdisc_class_common *cl)
  {
  	hlist_del(&cl->hnode);
  	clhash->hashelems--;
  }
  EXPORT_SYMBOL(qdisc_class_hash_remove);
fa0f5aa74   Eric Dumazet   net_sched: qdisc_...
689
690
691
  /* Allocate an unique handle from space managed by kernel
   * Possible range is [8000-FFFF]:0000 (0x8000 values)
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
692
693
  static u32 qdisc_alloc_handle(struct net_device *dev)
  {
fa0f5aa74   Eric Dumazet   net_sched: qdisc_...
694
  	int i = 0x8000;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
695
696
697
698
699
700
  	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
  
  	do {
  		autohandle += TC_H_MAKE(0x10000U, 0);
  		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
  			autohandle = TC_H_MAKE(0x80000000U, 0);
fa0f5aa74   Eric Dumazet   net_sched: qdisc_...
701
702
703
704
  		if (!qdisc_lookup(dev, autohandle))
  			return autohandle;
  		cond_resched();
  	} while	(--i > 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
705

fa0f5aa74   Eric Dumazet   net_sched: qdisc_...
706
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
707
  }
43effa1e5   Patrick McHardy   [NET_SCHED]: Fix ...
708
709
  void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
  {
20fea08b5   Eric Dumazet   [NET]: Move Qdisc...
710
  	const struct Qdisc_class_ops *cops;
43effa1e5   Patrick McHardy   [NET_SCHED]: Fix ...
711
712
  	unsigned long cl;
  	u32 parentid;
2c8c8e6f9   Eric Dumazet   net_sched: increm...
713
  	int drops;
43effa1e5   Patrick McHardy   [NET_SCHED]: Fix ...
714
715
716
  
  	if (n == 0)
  		return;
2c8c8e6f9   Eric Dumazet   net_sched: increm...
717
  	drops = max_t(int, n, 0);
43effa1e5   Patrick McHardy   [NET_SCHED]: Fix ...
718
  	while ((parentid = sch->parent)) {
066a3b5b2   Jarek Poplawski   [NET_SCHED] sch_a...
719
720
  		if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
  			return;
5ce2d488f   David S. Miller   pkt_sched: Remove...
721
  		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
ffc8fefaf   Patrick McHardy   [NET]: Fix sch_ap...
722
723
724
725
  		if (sch == NULL) {
  			WARN_ON(parentid != TC_H_ROOT);
  			return;
  		}
43effa1e5   Patrick McHardy   [NET_SCHED]: Fix ...
726
727
728
729
730
731
732
  		cops = sch->ops->cl_ops;
  		if (cops->qlen_notify) {
  			cl = cops->get(sch, parentid);
  			cops->qlen_notify(sch, cl);
  			cops->put(sch, cl);
  		}
  		sch->q.qlen -= n;
2c8c8e6f9   Eric Dumazet   net_sched: increm...
733
  		sch->qstats.drops += drops;
43effa1e5   Patrick McHardy   [NET_SCHED]: Fix ...
734
735
736
  	}
  }
  EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
737

7316ae88c   Tom Goff   net_sched: make t...
738
739
  static void notify_and_destroy(struct net *net, struct sk_buff *skb,
  			       struct nlmsghdr *n, u32 clid,
99194cff3   David S. Miller   pkt_sched: Add mu...
740
741
742
  			       struct Qdisc *old, struct Qdisc *new)
  {
  	if (new || old)
7316ae88c   Tom Goff   net_sched: make t...
743
  		qdisc_notify(net, skb, n, clid, old, new);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
744

4d8863a29   David S. Miller   pkt_sched: Don't ...
745
  	if (old)
99194cff3   David S. Miller   pkt_sched: Add mu...
746
  		qdisc_destroy(old);
99194cff3   David S. Miller   pkt_sched: Add mu...
747
748
749
750
751
752
753
754
755
  }
  
  /* Graft qdisc "new" to class "classid" of qdisc "parent" or
   * to device "dev".
   *
   * When appropriate send a netlink notification using 'skb'
   * and "n".
   *
   * On success, destroy old qdisc.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
756
757
758
   */
  
  static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
99194cff3   David S. Miller   pkt_sched: Add mu...
759
760
  		       struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
  		       struct Qdisc *new, struct Qdisc *old)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
761
  {
99194cff3   David S. Miller   pkt_sched: Add mu...
762
  	struct Qdisc *q = old;
7316ae88c   Tom Goff   net_sched: make t...
763
  	struct net *net = dev_net(dev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
764
  	int err = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
765

10297b993   YOSHIFUJI Hideaki   [NET] SCHED: Fix ...
766
  	if (parent == NULL) {
99194cff3   David S. Miller   pkt_sched: Add mu...
767
768
769
770
  		unsigned int i, num_q, ingress;
  
  		ingress = 0;
  		num_q = dev->num_tx_queues;
8d50b53d6   David S. Miller   pkt_sched: Fix OO...
771
772
  		if ((q && q->flags & TCQ_F_INGRESS) ||
  		    (new && new->flags & TCQ_F_INGRESS)) {
99194cff3   David S. Miller   pkt_sched: Add mu...
773
774
  			num_q = 1;
  			ingress = 1;
24824a09e   Eric Dumazet   net: dynamic ingr...
775
776
  			if (!dev_ingress_queue(dev))
  				return -ENOENT;
99194cff3   David S. Miller   pkt_sched: Add mu...
777
778
779
780
  		}
  
  		if (dev->flags & IFF_UP)
  			dev_deactivate(dev);
6ec1c69a8   David S. Miller   net_sched: add cl...
781
782
783
784
  		if (new && new->ops->attach) {
  			new->ops->attach(new);
  			num_q = 0;
  		}
99194cff3   David S. Miller   pkt_sched: Add mu...
785
  		for (i = 0; i < num_q; i++) {
24824a09e   Eric Dumazet   net: dynamic ingr...
786
  			struct netdev_queue *dev_queue = dev_ingress_queue(dev);
99194cff3   David S. Miller   pkt_sched: Add mu...
787
788
789
  
  			if (!ingress)
  				dev_queue = netdev_get_tx_queue(dev, i);
8d50b53d6   David S. Miller   pkt_sched: Fix OO...
790
791
792
  			old = dev_graft_qdisc(dev_queue, new);
  			if (new && i > 0)
  				atomic_inc(&new->refcnt);
036d6a673   Jarek Poplawski   pkt_sched: Fix qd...
793
794
  			if (!ingress)
  				qdisc_destroy(old);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
795
  		}
99194cff3   David S. Miller   pkt_sched: Add mu...
796

036d6a673   Jarek Poplawski   pkt_sched: Fix qd...
797
  		if (!ingress) {
7316ae88c   Tom Goff   net_sched: make t...
798
799
  			notify_and_destroy(net, skb, n, classid,
  					   dev->qdisc, new);
036d6a673   Jarek Poplawski   pkt_sched: Fix qd...
800
801
802
803
  			if (new && !new->ops->attach)
  				atomic_inc(&new->refcnt);
  			dev->qdisc = new ? : &noop_qdisc;
  		} else {
7316ae88c   Tom Goff   net_sched: make t...
804
  			notify_and_destroy(net, skb, n, classid, old, new);
036d6a673   Jarek Poplawski   pkt_sched: Fix qd...
805
  		}
af356afa0   Patrick McHardy   net_sched: reintr...
806

99194cff3   David S. Miller   pkt_sched: Add mu...
807
808
  		if (dev->flags & IFF_UP)
  			dev_activate(dev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
809
  	} else {
20fea08b5   Eric Dumazet   [NET]: Move Qdisc...
810
  		const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
811

c9f1d0389   Patrick McHardy   net_sched: fix cl...
812
813
  		err = -EOPNOTSUPP;
  		if (cops && cops->graft) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
814
815
  			unsigned long cl = cops->get(parent, classid);
  			if (cl) {
99194cff3   David S. Miller   pkt_sched: Add mu...
816
  				err = cops->graft(parent, cl, new, &old);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
817
  				cops->put(parent, cl);
c9f1d0389   Patrick McHardy   net_sched: fix cl...
818
819
  			} else
  				err = -ENOENT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
820
  		}
99194cff3   David S. Miller   pkt_sched: Add mu...
821
  		if (!err)
7316ae88c   Tom Goff   net_sched: make t...
822
  			notify_and_destroy(net, skb, n, classid, old, new);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
823
824
825
  	}
  	return err;
  }
25bfcd5a7   Jarek Poplawski   pkt_sched: Add lo...
826
827
828
  /* lockdep annotation is needed for ingress; egress gets it only for name */
  static struct lock_class_key qdisc_tx_lock;
  static struct lock_class_key qdisc_rx_lock;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
829
830
831
832
833
834
835
  /*
     Allocate and initialize new qdisc.
  
     Parameters are passed via opt.
   */
  
  static struct Qdisc *
bb949fbd1   David S. Miller   netdev: Create ne...
836
  qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
23bcf634c   Patrick McHardy   net_sched: fix es...
837
838
  	     struct Qdisc *p, u32 parent, u32 handle,
  	     struct nlattr **tca, int *errp)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
839
840
  {
  	int err;
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
841
  	struct nlattr *kind = tca[TCA_KIND];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
842
843
  	struct Qdisc *sch;
  	struct Qdisc_ops *ops;
175f9c1bb   Jussi Kivilinna   net_sched: Add si...
844
  	struct qdisc_size_table *stab;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
845
846
  
  	ops = qdisc_lookup_ops(kind);
95a5afca4   Johannes Berg   net: Remove CONFI...
847
  #ifdef CONFIG_MODULES
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
848
849
  	if (ops == NULL && kind != NULL) {
  		char name[IFNAMSIZ];
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
850
  		if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
  			/* We dropped the RTNL semaphore in order to
  			 * perform the module load.  So, even if we
  			 * succeeded in loading the module we have to
  			 * tell the caller to replay the request.  We
  			 * indicate this using -EAGAIN.
  			 * We replay the request because the device may
  			 * go away in the mean time.
  			 */
  			rtnl_unlock();
  			request_module("sch_%s", name);
  			rtnl_lock();
  			ops = qdisc_lookup_ops(kind);
  			if (ops != NULL) {
  				/* We will try again qdisc_lookup_ops,
  				 * so don't keep a reference.
  				 */
  				module_put(ops->owner);
  				err = -EAGAIN;
  				goto err_out;
  			}
  		}
  	}
  #endif
b9e2cc0f0   Jamal Hadi Salim   [PKT_SCHED]: Retu...
874
  	err = -ENOENT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
875
876
  	if (ops == NULL)
  		goto err_out;
5ce2d488f   David S. Miller   pkt_sched: Remove...
877
  	sch = qdisc_alloc(dev_queue, ops);
3d54b82fd   Thomas Graf   [PKT_SCHED]: Clea...
878
879
  	if (IS_ERR(sch)) {
  		err = PTR_ERR(sch);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
880
  		goto err_out2;
3d54b82fd   Thomas Graf   [PKT_SCHED]: Clea...
881
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
882

ffc8fefaf   Patrick McHardy   [NET]: Fix sch_ap...
883
  	sch->parent = parent;
3d54b82fd   Thomas Graf   [PKT_SCHED]: Clea...
884
  	if (handle == TC_H_INGRESS) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
885
  		sch->flags |= TCQ_F_INGRESS;
3d54b82fd   Thomas Graf   [PKT_SCHED]: Clea...
886
  		handle = TC_H_MAKE(TC_H_INGRESS, 0);
25bfcd5a7   Jarek Poplawski   pkt_sched: Add lo...
887
  		lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
fd44de7cc   Patrick McHardy   [NET_SCHED]: ingr...
888
  	} else {
fd44de7cc   Patrick McHardy   [NET_SCHED]: ingr...
889
890
891
892
893
894
  		if (handle == 0) {
  			handle = qdisc_alloc_handle(dev);
  			err = -ENOMEM;
  			if (handle == 0)
  				goto err_out3;
  		}
25bfcd5a7   Jarek Poplawski   pkt_sched: Add lo...
895
  		lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
1abbe1394   Eric Dumazet   pkt_sched: avoid ...
896
897
  		if (!netif_is_multiqueue(dev))
  			sch->flags |= TCQ_F_ONETXQUEUE;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
898
  	}
3d54b82fd   Thomas Graf   [PKT_SCHED]: Clea...
899
  	sch->handle = handle;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
900

1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
901
  	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
175f9c1bb   Jussi Kivilinna   net_sched: Add si...
902
903
904
905
  		if (tca[TCA_STAB]) {
  			stab = qdisc_get_stab(tca[TCA_STAB]);
  			if (IS_ERR(stab)) {
  				err = PTR_ERR(stab);
7c64b9f3f   Jarek Poplawski   pkt_sched: Fix qd...
906
  				goto err_out4;
175f9c1bb   Jussi Kivilinna   net_sched: Add si...
907
  			}
a2da570d6   Eric Dumazet   net_sched: RCU co...
908
  			rcu_assign_pointer(sch->stab, stab);
175f9c1bb   Jussi Kivilinna   net_sched: Add si...
909
  		}
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
910
  		if (tca[TCA_RATE]) {
f6f9b93f1   Jarek Poplawski   pkt_sched: Fix ge...
911
  			spinlock_t *root_lock;
23bcf634c   Patrick McHardy   net_sched: fix es...
912
913
914
  			err = -EOPNOTSUPP;
  			if (sch->flags & TCQ_F_MQROOT)
  				goto err_out4;
f6f9b93f1   Jarek Poplawski   pkt_sched: Fix ge...
915
  			if ((sch->parent != TC_H_ROOT) &&
23bcf634c   Patrick McHardy   net_sched: fix es...
916
917
  			    !(sch->flags & TCQ_F_INGRESS) &&
  			    (!p || !(p->flags & TCQ_F_MQROOT)))
f6f9b93f1   Jarek Poplawski   pkt_sched: Fix ge...
918
919
920
  				root_lock = qdisc_root_sleeping_lock(sch);
  			else
  				root_lock = qdisc_lock(sch);
023e09a76   Thomas Graf   [PKT_SCHED]: Repo...
921
  			err = gen_new_estimator(&sch->bstats, &sch->rate_est,
f6f9b93f1   Jarek Poplawski   pkt_sched: Fix ge...
922
  						root_lock, tca[TCA_RATE]);
23bcf634c   Patrick McHardy   net_sched: fix es...
923
924
  			if (err)
  				goto err_out4;
023e09a76   Thomas Graf   [PKT_SCHED]: Repo...
925
  		}
f6e0b239a   Jarek Poplawski   pkt_sched: Fix qd...
926
927
  
  		qdisc_list_add(sch);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
928

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
929
930
931
932
  		return sch;
  	}
  err_out3:
  	dev_put(dev);
3d54b82fd   Thomas Graf   [PKT_SCHED]: Clea...
933
  	kfree((char *) sch - sch->padded);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
934
935
936
937
  err_out2:
  	module_put(ops->owner);
  err_out:
  	*errp = err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
938
  	return NULL;
23bcf634c   Patrick McHardy   net_sched: fix es...
939
940
941
942
943
944
  
  err_out4:
  	/*
  	 * Any broken qdiscs that would require a ops->reset() here?
  	 * The qdisc was never in action so it shouldn't be necessary.
  	 */
a2da570d6   Eric Dumazet   net_sched: RCU co...
945
  	qdisc_put_stab(rtnl_dereference(sch->stab));
23bcf634c   Patrick McHardy   net_sched: fix es...
946
947
948
  	if (ops->destroy)
  		ops->destroy(sch);
  	goto err_out3;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
949
  }
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
950
  static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
951
  {
a2da570d6   Eric Dumazet   net_sched: RCU co...
952
  	struct qdisc_size_table *ostab, *stab = NULL;
175f9c1bb   Jussi Kivilinna   net_sched: Add si...
953
  	int err = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
954

175f9c1bb   Jussi Kivilinna   net_sched: Add si...
955
  	if (tca[TCA_OPTIONS]) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
956
957
  		if (sch->ops->change == NULL)
  			return -EINVAL;
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
958
  		err = sch->ops->change(sch, tca[TCA_OPTIONS]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
959
960
961
  		if (err)
  			return err;
  	}
175f9c1bb   Jussi Kivilinna   net_sched: Add si...
962
963
964
965
966
967
  
  	if (tca[TCA_STAB]) {
  		stab = qdisc_get_stab(tca[TCA_STAB]);
  		if (IS_ERR(stab))
  			return PTR_ERR(stab);
  	}
a2da570d6   Eric Dumazet   net_sched: RCU co...
968
969
970
  	ostab = rtnl_dereference(sch->stab);
  	rcu_assign_pointer(sch->stab, stab);
  	qdisc_put_stab(ostab);
175f9c1bb   Jussi Kivilinna   net_sched: Add si...
971

23bcf634c   Patrick McHardy   net_sched: fix es...
972
  	if (tca[TCA_RATE]) {
71bcb09a5   Stephen Hemminger   tc: check for err...
973
974
  		/* NB: ignores errors from replace_estimator
  		   because change can't be undone. */
23bcf634c   Patrick McHardy   net_sched: fix es...
975
976
  		if (sch->flags & TCQ_F_MQROOT)
  			goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
977
  		gen_replace_estimator(&sch->bstats, &sch->rate_est,
71bcb09a5   Stephen Hemminger   tc: check for err...
978
979
  					    qdisc_root_sleeping_lock(sch),
  					    tca[TCA_RATE]);
23bcf634c   Patrick McHardy   net_sched: fix es...
980
981
  	}
  out:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
982
983
  	return 0;
  }
cc7ec456f   Eric Dumazet   net_sched: cleanups
984
985
  struct check_loop_arg {
  	struct qdisc_walker	w;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
  	struct Qdisc		*p;
  	int			depth;
  };
  
  static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
  
  static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
  {
  	struct check_loop_arg	arg;
  
  	if (q->ops->cl_ops == NULL)
  		return 0;
  
  	arg.w.stop = arg.w.skip = arg.w.count = 0;
  	arg.w.fn = check_loop_fn;
  	arg.depth = depth;
  	arg.p = p;
  	q->ops->cl_ops->walk(q, &arg.w);
  	return arg.w.stop ? -ELOOP : 0;
  }
  
  static int
  check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
  {
  	struct Qdisc *leaf;
20fea08b5   Eric Dumazet   [NET]: Move Qdisc...
1011
  	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
  	struct check_loop_arg *arg = (struct check_loop_arg *)w;
  
  	leaf = cops->leaf(q, cl);
  	if (leaf) {
  		if (leaf == arg->p || arg->depth > 7)
  			return -ELOOP;
  		return check_loop(leaf, arg->p, arg->depth + 1);
  	}
  	return 0;
  }
  
  /*
   * Delete/get qdisc.
   */
661d2967b   Thomas Graf   rtnetlink: Remove...
1026
  static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1027
  {
3b1e0a655   YOSHIFUJI Hideaki   [NET] NETNS: Omit...
1028
  	struct net *net = sock_net(skb->sk);
02ef22ca4   David S. Miller   pkt_sched: sch_ap...
1029
  	struct tcmsg *tcm = nlmsg_data(n);
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
1030
  	struct nlattr *tca[TCA_MAX + 1];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1031
  	struct net_device *dev;
de179c8c1   Hong zhi guo   netlink: have len...
1032
  	u32 clid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1033
1034
1035
  	struct Qdisc *q = NULL;
  	struct Qdisc *p = NULL;
  	int err;
4e8bbb819   Stéphane Graber   net: Allow tc cha...
1036
  	if ((n->nlmsg_type != RTM_GETQDISC) &&
5f013c9bc   David S. Miller   Merge git://git.k...
1037
  	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
dfc47ef86   Eric W. Biederman   net: Push capable...
1038
  		return -EPERM;
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
1039
1040
1041
  	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
  	if (err < 0)
  		return err;
de179c8c1   Hong zhi guo   netlink: have len...
1042
1043
1044
1045
1046
  	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
  	if (!dev)
  		return -ENODEV;
  
  	clid = tcm->tcm_parent;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1047
1048
1049
  	if (clid) {
  		if (clid != TC_H_ROOT) {
  			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
cc7ec456f   Eric Dumazet   net_sched: cleanups
1050
1051
  				p = qdisc_lookup(dev, TC_H_MAJ(clid));
  				if (!p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1052
1053
  					return -ENOENT;
  				q = qdisc_leaf(p, clid);
cc7ec456f   Eric Dumazet   net_sched: cleanups
1054
1055
  			} else if (dev_ingress_queue(dev)) {
  				q = dev_ingress_queue(dev)->qdisc_sleeping;
10297b993   YOSHIFUJI Hideaki   [NET] SCHED: Fix ...
1056
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1057
  		} else {
af356afa0   Patrick McHardy   net_sched: reintr...
1058
  			q = dev->qdisc;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1059
1060
1061
1062
1063
1064
1065
  		}
  		if (!q)
  			return -ENOENT;
  
  		if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
  			return -EINVAL;
  	} else {
cc7ec456f   Eric Dumazet   net_sched: cleanups
1066
1067
  		q = qdisc_lookup(dev, tcm->tcm_handle);
  		if (!q)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1068
1069
  			return -ENOENT;
  	}
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
1070
  	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1071
1072
1073
1074
1075
1076
1077
  		return -EINVAL;
  
  	if (n->nlmsg_type == RTM_DELQDISC) {
  		if (!clid)
  			return -EINVAL;
  		if (q->handle == 0)
  			return -ENOENT;
cc7ec456f   Eric Dumazet   net_sched: cleanups
1078
1079
  		err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
  		if (err != 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1080
  			return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1081
  	} else {
7316ae88c   Tom Goff   net_sched: make t...
1082
  		qdisc_notify(net, skb, n, clid, NULL, q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1083
1084
1085
1086
1087
  	}
  	return 0;
  }
  
  /*
cc7ec456f   Eric Dumazet   net_sched: cleanups
1088
   * Create/change qdisc.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1089
   */
661d2967b   Thomas Graf   rtnetlink: Remove...
1090
  static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1091
  {
3b1e0a655   YOSHIFUJI Hideaki   [NET] NETNS: Omit...
1092
  	struct net *net = sock_net(skb->sk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1093
  	struct tcmsg *tcm;
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
1094
  	struct nlattr *tca[TCA_MAX + 1];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1095
1096
1097
1098
  	struct net_device *dev;
  	u32 clid;
  	struct Qdisc *q, *p;
  	int err;
5f013c9bc   David S. Miller   Merge git://git.k...
1099
  	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
dfc47ef86   Eric W. Biederman   net: Push capable...
1100
  		return -EPERM;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1101
1102
  replay:
  	/* Reinit, just in case something touches this. */
de179c8c1   Hong zhi guo   netlink: have len...
1103
1104
1105
  	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
  	if (err < 0)
  		return err;
02ef22ca4   David S. Miller   pkt_sched: sch_ap...
1106
  	tcm = nlmsg_data(n);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1107
1108
  	clid = tcm->tcm_parent;
  	q = p = NULL;
cc7ec456f   Eric Dumazet   net_sched: cleanups
1109
1110
  	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
  	if (!dev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1111
  		return -ENODEV;
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
1112

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1113
1114
1115
  	if (clid) {
  		if (clid != TC_H_ROOT) {
  			if (clid != TC_H_INGRESS) {
cc7ec456f   Eric Dumazet   net_sched: cleanups
1116
1117
  				p = qdisc_lookup(dev, TC_H_MAJ(clid));
  				if (!p)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1118
1119
  					return -ENOENT;
  				q = qdisc_leaf(p, clid);
cc7ec456f   Eric Dumazet   net_sched: cleanups
1120
1121
  			} else if (dev_ingress_queue_create(dev)) {
  				q = dev_ingress_queue(dev)->qdisc_sleeping;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1122
1123
  			}
  		} else {
af356afa0   Patrick McHardy   net_sched: reintr...
1124
  			q = dev->qdisc;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1125
1126
1127
1128
1129
1130
1131
1132
  		}
  
  		/* It may be default qdisc, ignore it */
  		if (q && q->handle == 0)
  			q = NULL;
  
  		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
  			if (tcm->tcm_handle) {
cc7ec456f   Eric Dumazet   net_sched: cleanups
1133
  				if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1134
1135
1136
  					return -EEXIST;
  				if (TC_H_MIN(tcm->tcm_handle))
  					return -EINVAL;
cc7ec456f   Eric Dumazet   net_sched: cleanups
1137
1138
  				q = qdisc_lookup(dev, tcm->tcm_handle);
  				if (!q)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1139
  					goto create_n_graft;
cc7ec456f   Eric Dumazet   net_sched: cleanups
1140
  				if (n->nlmsg_flags & NLM_F_EXCL)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1141
  					return -EEXIST;
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
1142
  				if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1143
1144
1145
1146
1147
1148
1149
  					return -EINVAL;
  				if (q == p ||
  				    (p && check_loop(q, p, 0)))
  					return -ELOOP;
  				atomic_inc(&q->refcnt);
  				goto graft;
  			} else {
cc7ec456f   Eric Dumazet   net_sched: cleanups
1150
  				if (!q)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
  					goto create_n_graft;
  
  				/* This magic test requires explanation.
  				 *
  				 *   We know, that some child q is already
  				 *   attached to this parent and have choice:
  				 *   either to change it or to create/graft new one.
  				 *
  				 *   1. We are allowed to create/graft only
  				 *   if CREATE and REPLACE flags are set.
  				 *
  				 *   2. If EXCL is set, requestor wanted to say,
  				 *   that qdisc tcm_handle is not expected
  				 *   to exist, so that we choose create/graft too.
  				 *
  				 *   3. The last case is when no flags are set.
  				 *   Alas, it is sort of hole in API, we
  				 *   cannot decide what to do unambiguously.
  				 *   For now we select create/graft, if
  				 *   user gave KIND, which does not match existing.
  				 */
cc7ec456f   Eric Dumazet   net_sched: cleanups
1172
1173
1174
  				if ((n->nlmsg_flags & NLM_F_CREATE) &&
  				    (n->nlmsg_flags & NLM_F_REPLACE) &&
  				    ((n->nlmsg_flags & NLM_F_EXCL) ||
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
1175
1176
  				     (tca[TCA_KIND] &&
  				      nla_strcmp(tca[TCA_KIND], q->ops->id))))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
  					goto create_n_graft;
  			}
  		}
  	} else {
  		if (!tcm->tcm_handle)
  			return -EINVAL;
  		q = qdisc_lookup(dev, tcm->tcm_handle);
  	}
  
  	/* Change qdisc parameters */
  	if (q == NULL)
  		return -ENOENT;
cc7ec456f   Eric Dumazet   net_sched: cleanups
1189
  	if (n->nlmsg_flags & NLM_F_EXCL)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1190
  		return -EEXIST;
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
1191
  	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1192
1193
1194
  		return -EINVAL;
  	err = qdisc_change(q, tca);
  	if (err == 0)
7316ae88c   Tom Goff   net_sched: make t...
1195
  		qdisc_notify(net, skb, n, clid, NULL, q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1196
1197
1198
  	return err;
  
  create_n_graft:
cc7ec456f   Eric Dumazet   net_sched: cleanups
1199
  	if (!(n->nlmsg_flags & NLM_F_CREATE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1200
  		return -ENOENT;
24824a09e   Eric Dumazet   net: dynamic ingr...
1201
1202
1203
1204
1205
1206
1207
1208
  	if (clid == TC_H_INGRESS) {
  		if (dev_ingress_queue(dev))
  			q = qdisc_create(dev, dev_ingress_queue(dev), p,
  					 tcm->tcm_parent, tcm->tcm_parent,
  					 tca, &err);
  		else
  			err = -ENOENT;
  	} else {
926e61b7c   Jarek Poplawski   pkt_sched: Fix tx...
1209
  		struct netdev_queue *dev_queue;
6ec1c69a8   David S. Miller   net_sched: add cl...
1210
1211
  
  		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
926e61b7c   Jarek Poplawski   pkt_sched: Fix tx...
1212
1213
1214
1215
1216
  			dev_queue = p->ops->cl_ops->select_queue(p, tcm);
  		else if (p)
  			dev_queue = p->dev_queue;
  		else
  			dev_queue = netdev_get_tx_queue(dev, 0);
6ec1c69a8   David S. Miller   net_sched: add cl...
1217

926e61b7c   Jarek Poplawski   pkt_sched: Fix tx...
1218
  		q = qdisc_create(dev, dev_queue, p,
bb949fbd1   David S. Miller   netdev: Create ne...
1219
  				 tcm->tcm_parent, tcm->tcm_handle,
ffc8fefaf   Patrick McHardy   [NET]: Fix sch_ap...
1220
  				 tca, &err);
6ec1c69a8   David S. Miller   net_sched: add cl...
1221
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1222
1223
1224
1225
1226
1227
1228
  	if (q == NULL) {
  		if (err == -EAGAIN)
  			goto replay;
  		return err;
  	}
  
  graft:
e5befbd95   Ilpo Järvinen   pkt_sched: remove...
1229
1230
1231
1232
1233
  	err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
  	if (err) {
  		if (q)
  			qdisc_destroy(q);
  		return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1234
  	}
e5befbd95   Ilpo Järvinen   pkt_sched: remove...
1235

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1236
1237
1238
1239
  	return 0;
  }
  
  static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
15e473046   Eric W. Biederman   netlink: Rename p...
1240
  			 u32 portid, u32 seq, u16 flags, int event)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1241
1242
1243
  {
  	struct tcmsg *tcm;
  	struct nlmsghdr  *nlh;
27a884dc3   Arnaldo Carvalho de Melo   [SK_BUFF]: Conver...
1244
  	unsigned char *b = skb_tail_pointer(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1245
  	struct gnet_dump d;
a2da570d6   Eric Dumazet   net_sched: RCU co...
1246
  	struct qdisc_size_table *stab;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1247

fba373d2b   Eric Dumazet   pkt_sched: add co...
1248
  	cond_resched();
15e473046   Eric W. Biederman   netlink: Rename p...
1249
  	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
02ef22ca4   David S. Miller   pkt_sched: sch_ap...
1250
1251
1252
  	if (!nlh)
  		goto out_nlmsg_trim;
  	tcm = nlmsg_data(nlh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1253
  	tcm->tcm_family = AF_UNSPEC;
9ef1d4c7c   Patrick McHardy   [NETLINK]: Missin...
1254
1255
  	tcm->tcm__pad1 = 0;
  	tcm->tcm__pad2 = 0;
5ce2d488f   David S. Miller   pkt_sched: Remove...
1256
  	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1257
1258
1259
  	tcm->tcm_parent = clid;
  	tcm->tcm_handle = q->handle;
  	tcm->tcm_info = atomic_read(&q->refcnt);
1b34ec43c   David S. Miller   pkt_sched: Stop u...
1260
1261
  	if (nla_put_string(skb, TCA_KIND, q->ops->id))
  		goto nla_put_failure;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1262
  	if (q->ops->dump && q->ops->dump(q, skb) < 0)
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
1263
  		goto nla_put_failure;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1264
  	q->qstats.qlen = q->q.qlen;
a2da570d6   Eric Dumazet   net_sched: RCU co...
1265
1266
  	stab = rtnl_dereference(q->stab);
  	if (stab && qdisc_dump_stab(skb, stab) < 0)
175f9c1bb   Jussi Kivilinna   net_sched: Add si...
1267
  		goto nla_put_failure;
102396ae6   Jarek Poplawski   pkt_sched: Fix lo...
1268
1269
  	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
  					 qdisc_root_sleeping_lock(q), &d) < 0)
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
1270
  		goto nla_put_failure;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1271
1272
  
  	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
1273
  		goto nla_put_failure;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1274
1275
  
  	if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
d250a5f90   Eric Dumazet   pkt_sched: gen_es...
1276
  	    gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1277
  	    gnet_stats_copy_queue(&d, &q->qstats) < 0)
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
1278
  		goto nla_put_failure;
10297b993   YOSHIFUJI Hideaki   [NET] SCHED: Fix ...
1279

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1280
  	if (gnet_stats_finish_copy(&d) < 0)
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
1281
  		goto nla_put_failure;
10297b993   YOSHIFUJI Hideaki   [NET] SCHED: Fix ...
1282

27a884dc3   Arnaldo Carvalho de Melo   [SK_BUFF]: Conver...
1283
  	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1284
  	return skb->len;
02ef22ca4   David S. Miller   pkt_sched: sch_ap...
1285
  out_nlmsg_trim:
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
1286
  nla_put_failure:
dc5fc579b   Arnaldo Carvalho de Melo   [NETLINK]: Use nl...
1287
  	nlmsg_trim(skb, b);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1288
1289
  	return -1;
  }
53b0f0804   Eric Dumazet   net_sched: Fix qd...
1290
1291
1292
1293
  static bool tc_qdisc_dump_ignore(struct Qdisc *q)
  {
  	return (q->flags & TCQ_F_BUILTIN) ? true : false;
  }
7316ae88c   Tom Goff   net_sched: make t...
1294
1295
1296
  static int qdisc_notify(struct net *net, struct sk_buff *oskb,
  			struct nlmsghdr *n, u32 clid,
  			struct Qdisc *old, struct Qdisc *new)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1297
1298
  {
  	struct sk_buff *skb;
15e473046   Eric W. Biederman   netlink: Rename p...
1299
  	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1300
1301
1302
1303
  
  	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
  	if (!skb)
  		return -ENOBUFS;
53b0f0804   Eric Dumazet   net_sched: Fix qd...
1304
  	if (old && !tc_qdisc_dump_ignore(old)) {
15e473046   Eric W. Biederman   netlink: Rename p...
1305
  		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
cc7ec456f   Eric Dumazet   net_sched: cleanups
1306
  				  0, RTM_DELQDISC) < 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1307
1308
  			goto err_out;
  	}
53b0f0804   Eric Dumazet   net_sched: Fix qd...
1309
  	if (new && !tc_qdisc_dump_ignore(new)) {
15e473046   Eric W. Biederman   netlink: Rename p...
1310
  		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
cc7ec456f   Eric Dumazet   net_sched: cleanups
1311
  				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1312
1313
1314
1315
  			goto err_out;
  	}
  
  	if (skb->len)
15e473046   Eric W. Biederman   netlink: Rename p...
1316
  		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
cc7ec456f   Eric Dumazet   net_sched: cleanups
1317
  				      n->nlmsg_flags & NLM_F_ECHO);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1318
1319
1320
1321
1322
  
  err_out:
  	kfree_skb(skb);
  	return -EINVAL;
  }
307236730   David S. Miller   pkt_sched: Manage...
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
  static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
  			      struct netlink_callback *cb,
  			      int *q_idx_p, int s_q_idx)
  {
  	int ret = 0, q_idx = *q_idx_p;
  	struct Qdisc *q;
  
  	if (!root)
  		return 0;
  
  	q = root;
  	if (q_idx < s_q_idx) {
  		q_idx++;
  	} else {
  		if (!tc_qdisc_dump_ignore(q) &&
15e473046   Eric W. Biederman   netlink: Rename p...
1338
  		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
307236730   David S. Miller   pkt_sched: Manage...
1339
1340
1341
1342
1343
1344
1345
1346
1347
  				  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
  			goto done;
  		q_idx++;
  	}
  	list_for_each_entry(q, &root->list, list) {
  		if (q_idx < s_q_idx) {
  			q_idx++;
  			continue;
  		}
cc7ec456f   Eric Dumazet   net_sched: cleanups
1348
  		if (!tc_qdisc_dump_ignore(q) &&
15e473046   Eric W. Biederman   netlink: Rename p...
1349
  		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
307236730   David S. Miller   pkt_sched: Manage...
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
  				  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
  			goto done;
  		q_idx++;
  	}
  
  out:
  	*q_idx_p = q_idx;
  	return ret;
  done:
  	ret = -1;
  	goto out;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1362
1363
  static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
  {
3b1e0a655   YOSHIFUJI Hideaki   [NET] NETNS: Omit...
1364
  	struct net *net = sock_net(skb->sk);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1365
1366
1367
  	int idx, q_idx;
  	int s_idx, s_q_idx;
  	struct net_device *dev;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1368
1369
1370
  
  	s_idx = cb->args[0];
  	s_q_idx = q_idx = cb->args[1];
f1e9016da   stephen hemminger   net: use rcu for ...
1371

7562f876c   Pavel Emelianov   [NET]: Rework dev...
1372
  	idx = 0;
15dc36ebb   Eric Dumazet   pkt_sched: do not...
1373
1374
  	ASSERT_RTNL();
  	for_each_netdev(net, dev) {
307236730   David S. Miller   pkt_sched: Manage...
1375
  		struct netdev_queue *dev_queue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1376
  		if (idx < s_idx)
7562f876c   Pavel Emelianov   [NET]: Rework dev...
1377
  			goto cont;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1378
1379
  		if (idx > s_idx)
  			s_q_idx = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1380
  		q_idx = 0;
307236730   David S. Miller   pkt_sched: Manage...
1381

af356afa0   Patrick McHardy   net_sched: reintr...
1382
  		if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
307236730   David S. Miller   pkt_sched: Manage...
1383
  			goto done;
24824a09e   Eric Dumazet   net: dynamic ingr...
1384
1385
1386
1387
  		dev_queue = dev_ingress_queue(dev);
  		if (dev_queue &&
  		    tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
  				       &q_idx, s_q_idx) < 0)
307236730   David S. Miller   pkt_sched: Manage...
1388
  			goto done;
7562f876c   Pavel Emelianov   [NET]: Rework dev...
1389
1390
  cont:
  		idx++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1391
1392
1393
  	}
  
  done:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
  	cb->args[0] = idx;
  	cb->args[1] = q_idx;
  
  	return skb->len;
  }
  
  
  
  /************************************************
   *	Traffic classes manipulation.		*
   ************************************************/
661d2967b   Thomas Graf   rtnetlink: Remove...
1405
  static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1406
  {
3b1e0a655   YOSHIFUJI Hideaki   [NET] NETNS: Omit...
1407
  	struct net *net = sock_net(skb->sk);
02ef22ca4   David S. Miller   pkt_sched: sch_ap...
1408
  	struct tcmsg *tcm = nlmsg_data(n);
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
1409
  	struct nlattr *tca[TCA_MAX + 1];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1410
1411
  	struct net_device *dev;
  	struct Qdisc *q = NULL;
20fea08b5   Eric Dumazet   [NET]: Move Qdisc...
1412
  	const struct Qdisc_class_ops *cops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1413
1414
  	unsigned long cl = 0;
  	unsigned long new_cl;
de179c8c1   Hong zhi guo   netlink: have len...
1415
1416
1417
  	u32 portid;
  	u32 clid;
  	u32 qid;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1418
  	int err;
4e8bbb819   Stéphane Graber   net: Allow tc cha...
1419
  	if ((n->nlmsg_type != RTM_GETTCLASS) &&
5f013c9bc   David S. Miller   Merge git://git.k...
1420
  	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
dfc47ef86   Eric W. Biederman   net: Push capable...
1421
  		return -EPERM;
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
1422
1423
1424
  	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
  	if (err < 0)
  		return err;
de179c8c1   Hong zhi guo   netlink: have len...
1425
1426
1427
  	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
  	if (!dev)
  		return -ENODEV;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
  	/*
  	   parent == TC_H_UNSPEC - unspecified parent.
  	   parent == TC_H_ROOT   - class is root, which has no parent.
  	   parent == X:0	 - parent is root class.
  	   parent == X:Y	 - parent is a node in hierarchy.
  	   parent == 0:Y	 - parent is X:Y, where X:0 is qdisc.
  
  	   handle == 0:0	 - generate handle from kernel pool.
  	   handle == 0:Y	 - class is X:Y, where X:0 is qdisc.
  	   handle == X:Y	 - clear.
  	   handle == X:0	 - root class.
  	 */
  
  	/* Step 1. Determine qdisc handle X:0 */
de179c8c1   Hong zhi guo   netlink: have len...
1442
1443
1444
  	portid = tcm->tcm_parent;
  	clid = tcm->tcm_handle;
  	qid = TC_H_MAJ(clid);
15e473046   Eric W. Biederman   netlink: Rename p...
1445
1446
  	if (portid != TC_H_ROOT) {
  		u32 qid1 = TC_H_MAJ(portid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1447
1448
1449
1450
1451
1452
1453
1454
  
  		if (qid && qid1) {
  			/* If both majors are known, they must be identical. */
  			if (qid != qid1)
  				return -EINVAL;
  		} else if (qid1) {
  			qid = qid1;
  		} else if (qid == 0)
af356afa0   Patrick McHardy   net_sched: reintr...
1455
  			qid = dev->qdisc->handle;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1456
1457
  
  		/* Now qid is genuine qdisc handle consistent
cc7ec456f   Eric Dumazet   net_sched: cleanups
1458
1459
  		 * both with parent and child.
  		 *
15e473046   Eric W. Biederman   netlink: Rename p...
1460
  		 * TC_H_MAJ(portid) still may be unspecified, complete it now.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1461
  		 */
15e473046   Eric W. Biederman   netlink: Rename p...
1462
1463
  		if (portid)
  			portid = TC_H_MAKE(qid, portid);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1464
1465
  	} else {
  		if (qid == 0)
af356afa0   Patrick McHardy   net_sched: reintr...
1466
  			qid = dev->qdisc->handle;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1467
1468
1469
  	}
  
  	/* OK. Locate qdisc */
cc7ec456f   Eric Dumazet   net_sched: cleanups
1470
1471
  	q = qdisc_lookup(dev, qid);
  	if (!q)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1472
1473
1474
1475
1476
1477
1478
1479
1480
  		return -ENOENT;
  
  	/* An check that it supports classes */
  	cops = q->ops->cl_ops;
  	if (cops == NULL)
  		return -EINVAL;
  
  	/* Now try to get class */
  	if (clid == 0) {
15e473046   Eric W. Biederman   netlink: Rename p...
1481
  		if (portid == TC_H_ROOT)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1482
1483
1484
1485
1486
1487
1488
1489
1490
  			clid = qid;
  	} else
  		clid = TC_H_MAKE(qid, clid);
  
  	if (clid)
  		cl = cops->get(q, clid);
  
  	if (cl == 0) {
  		err = -ENOENT;
cc7ec456f   Eric Dumazet   net_sched: cleanups
1491
1492
  		if (n->nlmsg_type != RTM_NEWTCLASS ||
  		    !(n->nlmsg_flags & NLM_F_CREATE))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1493
1494
1495
  			goto out;
  	} else {
  		switch (n->nlmsg_type) {
10297b993   YOSHIFUJI Hideaki   [NET] SCHED: Fix ...
1496
  		case RTM_NEWTCLASS:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1497
  			err = -EEXIST;
cc7ec456f   Eric Dumazet   net_sched: cleanups
1498
  			if (n->nlmsg_flags & NLM_F_EXCL)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1499
1500
1501
  				goto out;
  			break;
  		case RTM_DELTCLASS:
de6d5cdf8   Patrick McHardy   net_sched: make c...
1502
1503
1504
  			err = -EOPNOTSUPP;
  			if (cops->delete)
  				err = cops->delete(q, cl);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1505
  			if (err == 0)
7316ae88c   Tom Goff   net_sched: make t...
1506
  				tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1507
1508
  			goto out;
  		case RTM_GETTCLASS:
7316ae88c   Tom Goff   net_sched: make t...
1509
  			err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1510
1511
1512
1513
1514
1515
1516
1517
  			goto out;
  		default:
  			err = -EINVAL;
  			goto out;
  		}
  	}
  
  	new_cl = cl;
de6d5cdf8   Patrick McHardy   net_sched: make c...
1518
1519
  	err = -EOPNOTSUPP;
  	if (cops->change)
15e473046   Eric W. Biederman   netlink: Rename p...
1520
  		err = cops->change(q, clid, portid, tca, &new_cl);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1521
  	if (err == 0)
7316ae88c   Tom Goff   net_sched: make t...
1522
  		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
  
  out:
  	if (cl)
  		cops->put(q, cl);
  
  	return err;
  }
  
  
  static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
  			  unsigned long cl,
15e473046   Eric W. Biederman   netlink: Rename p...
1534
  			  u32 portid, u32 seq, u16 flags, int event)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1535
1536
1537
  {
  	struct tcmsg *tcm;
  	struct nlmsghdr  *nlh;
27a884dc3   Arnaldo Carvalho de Melo   [SK_BUFF]: Conver...
1538
  	unsigned char *b = skb_tail_pointer(skb);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1539
  	struct gnet_dump d;
20fea08b5   Eric Dumazet   [NET]: Move Qdisc...
1540
  	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1541

fba373d2b   Eric Dumazet   pkt_sched: add co...
1542
  	cond_resched();
15e473046   Eric W. Biederman   netlink: Rename p...
1543
  	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
02ef22ca4   David S. Miller   pkt_sched: sch_ap...
1544
1545
1546
  	if (!nlh)
  		goto out_nlmsg_trim;
  	tcm = nlmsg_data(nlh);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1547
  	tcm->tcm_family = AF_UNSPEC;
16ebb5e0b   Eric Dumazet   tc: Fix unitializ...
1548
1549
  	tcm->tcm__pad1 = 0;
  	tcm->tcm__pad2 = 0;
5ce2d488f   David S. Miller   pkt_sched: Remove...
1550
  	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1551
1552
1553
  	tcm->tcm_parent = q->handle;
  	tcm->tcm_handle = q->handle;
  	tcm->tcm_info = 0;
1b34ec43c   David S. Miller   pkt_sched: Stop u...
1554
1555
  	if (nla_put_string(skb, TCA_KIND, q->ops->id))
  		goto nla_put_failure;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1556
  	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
1557
  		goto nla_put_failure;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1558

102396ae6   Jarek Poplawski   pkt_sched: Fix lo...
1559
1560
  	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
  					 qdisc_root_sleeping_lock(q), &d) < 0)
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
1561
  		goto nla_put_failure;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1562
1563
  
  	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
1564
  		goto nla_put_failure;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1565
1566
  
  	if (gnet_stats_finish_copy(&d) < 0)
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
1567
  		goto nla_put_failure;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1568

27a884dc3   Arnaldo Carvalho de Melo   [SK_BUFF]: Conver...
1569
  	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1570
  	return skb->len;
02ef22ca4   David S. Miller   pkt_sched: sch_ap...
1571
  out_nlmsg_trim:
1e90474c3   Patrick McHardy   [NET_SCHED]: Conv...
1572
  nla_put_failure:
dc5fc579b   Arnaldo Carvalho de Melo   [NETLINK]: Use nl...
1573
  	nlmsg_trim(skb, b);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1574
1575
  	return -1;
  }
7316ae88c   Tom Goff   net_sched: make t...
1576
1577
1578
  static int tclass_notify(struct net *net, struct sk_buff *oskb,
  			 struct nlmsghdr *n, struct Qdisc *q,
  			 unsigned long cl, int event)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1579
1580
  {
  	struct sk_buff *skb;
15e473046   Eric W. Biederman   netlink: Rename p...
1581
  	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1582
1583
1584
1585
  
  	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
  	if (!skb)
  		return -ENOBUFS;
15e473046   Eric W. Biederman   netlink: Rename p...
1586
  	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1587
1588
1589
  		kfree_skb(skb);
  		return -EINVAL;
  	}
15e473046   Eric W. Biederman   netlink: Rename p...
1590
  	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
cc7ec456f   Eric Dumazet   net_sched: cleanups
1591
  			      n->nlmsg_flags & NLM_F_ECHO);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1592
  }
cc7ec456f   Eric Dumazet   net_sched: cleanups
1593
1594
1595
1596
  struct qdisc_dump_args {
  	struct qdisc_walker	w;
  	struct sk_buff		*skb;
  	struct netlink_callback	*cb;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1597
1598
1599
1600
1601
  };
  
  static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
  {
  	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
15e473046   Eric W. Biederman   netlink: Rename p...
1602
  	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1603
1604
  			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
  }
307236730   David S. Miller   pkt_sched: Manage...
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
  static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
  				struct tcmsg *tcm, struct netlink_callback *cb,
  				int *t_p, int s_t)
  {
  	struct qdisc_dump_args arg;
  
  	if (tc_qdisc_dump_ignore(q) ||
  	    *t_p < s_t || !q->ops->cl_ops ||
  	    (tcm->tcm_parent &&
  	     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
  		(*t_p)++;
  		return 0;
  	}
  	if (*t_p > s_t)
  		memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
  	arg.w.fn = qdisc_class_dump;
  	arg.skb = skb;
  	arg.cb = cb;
  	arg.w.stop  = 0;
  	arg.w.skip = cb->args[1];
  	arg.w.count = 0;
  	q->ops->cl_ops->walk(q, &arg.w);
  	cb->args[1] = arg.w.count;
  	if (arg.w.stop)
  		return -1;
  	(*t_p)++;
  	return 0;
  }
  
  static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
  			       struct tcmsg *tcm, struct netlink_callback *cb,
  			       int *t_p, int s_t)
  {
  	struct Qdisc *q;
  
  	if (!root)
  		return 0;
  
  	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
  		return -1;
  
  	list_for_each_entry(q, &root->list, list) {
  		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
  			return -1;
  	}
  
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1653
1654
  static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
  {
02ef22ca4   David S. Miller   pkt_sched: sch_ap...
1655
  	struct tcmsg *tcm = nlmsg_data(cb->nlh);
3b1e0a655   YOSHIFUJI Hideaki   [NET] NETNS: Omit...
1656
  	struct net *net = sock_net(skb->sk);
307236730   David S. Miller   pkt_sched: Manage...
1657
  	struct netdev_queue *dev_queue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1658
  	struct net_device *dev;
307236730   David S. Miller   pkt_sched: Manage...
1659
  	int t, s_t;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1660

573ce260b   Hong zhi guo   net-next: replace...
1661
  	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1662
  		return 0;
cc7ec456f   Eric Dumazet   net_sched: cleanups
1663
1664
  	dev = dev_get_by_index(net, tcm->tcm_ifindex);
  	if (!dev)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1665
1666
1667
1668
  		return 0;
  
  	s_t = cb->args[0];
  	t = 0;
af356afa0   Patrick McHardy   net_sched: reintr...
1669
  	if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
307236730   David S. Miller   pkt_sched: Manage...
1670
  		goto done;
24824a09e   Eric Dumazet   net: dynamic ingr...
1671
1672
1673
1674
  	dev_queue = dev_ingress_queue(dev);
  	if (dev_queue &&
  	    tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
  				&t, s_t) < 0)
307236730   David S. Miller   pkt_sched: Manage...
1675
  		goto done;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1676

307236730   David S. Miller   pkt_sched: Manage...
1677
  done:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1678
1679
1680
1681
1682
1683
1684
  	cb->args[0] = t;
  
  	dev_put(dev);
  	return skb->len;
  }
  
  /* Main classifier routine: scans classifier chain attached
cc7ec456f   Eric Dumazet   net_sched: cleanups
1685
1686
   * to this qdisc, (optionally) tests for protocol and asks
   * specific classifiers.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1687
   */
dc7f9f6e8   Eric Dumazet   net: sched: const...
1688
  int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
73ca4918f   Patrick McHardy   [NET_SCHED]: act_...
1689
1690
1691
  		       struct tcf_result *res)
  {
  	__be16 protocol = skb->protocol;
cc7ec456f   Eric Dumazet   net_sched: cleanups
1692
  	int err;
73ca4918f   Patrick McHardy   [NET_SCHED]: act_...
1693
1694
  
  	for (; tp; tp = tp->next) {
cc7ec456f   Eric Dumazet   net_sched: cleanups
1695
1696
1697
1698
1699
1700
  		if (tp->protocol != protocol &&
  		    tp->protocol != htons(ETH_P_ALL))
  			continue;
  		err = tp->classify(skb, tp, res);
  
  		if (err >= 0) {
73ca4918f   Patrick McHardy   [NET_SCHED]: act_...
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
  #ifdef CONFIG_NET_CLS_ACT
  			if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
  				skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
  #endif
  			return err;
  		}
  	}
  	return -1;
  }
  EXPORT_SYMBOL(tc_classify_compat);
dc7f9f6e8   Eric Dumazet   net: sched: const...
1711
  int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
73ca4918f   Patrick McHardy   [NET_SCHED]: act_...
1712
  		struct tcf_result *res)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1713
1714
  {
  	int err = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1715
  #ifdef CONFIG_NET_CLS_ACT
dc7f9f6e8   Eric Dumazet   net: sched: const...
1716
  	const struct tcf_proto *otp = tp;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1717
  reclassify:
52bc97470   Hagen Paul Pfeifer   sched: protocol o...
1718
  #endif
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1719

73ca4918f   Patrick McHardy   [NET_SCHED]: act_...
1720
  	err = tc_classify_compat(skb, tp, res);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1721
  #ifdef CONFIG_NET_CLS_ACT
73ca4918f   Patrick McHardy   [NET_SCHED]: act_...
1722
1723
1724
1725
1726
  	if (err == TC_ACT_RECLASSIFY) {
  		u32 verd = G_TC_VERD(skb->tc_verd);
  		tp = otp;
  
  		if (verd++ >= MAX_REC_LOOP) {
e87cc4728   Joe Perches   net: Convert net_...
1727
1728
1729
1730
1731
  			net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x
  ",
  					       tp->q->ops->id,
  					       tp->prio & 0xffff,
  					       ntohs(tp->protocol));
73ca4918f   Patrick McHardy   [NET_SCHED]: act_...
1732
  			return TC_ACT_SHOT;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1733
  		}
73ca4918f   Patrick McHardy   [NET_SCHED]: act_...
1734
1735
  		skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
  		goto reclassify;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1736
  	}
73ca4918f   Patrick McHardy   [NET_SCHED]: act_...
1737
1738
  #endif
  	return err;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1739
  }
73ca4918f   Patrick McHardy   [NET_SCHED]: act_...
1740
  EXPORT_SYMBOL(tc_classify);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1741

a48b5a614   Patrick McHardy   [NET_SCHED]: Unli...
1742
1743
1744
1745
1746
1747
  void tcf_destroy(struct tcf_proto *tp)
  {
  	tp->ops->destroy(tp);
  	module_put(tp->ops->owner);
  	kfree(tp);
  }
ff31ab56c   Patrick McHardy   net-sched: change...
1748
  void tcf_destroy_chain(struct tcf_proto **fl)
a48b5a614   Patrick McHardy   [NET_SCHED]: Unli...
1749
1750
  {
  	struct tcf_proto *tp;
ff31ab56c   Patrick McHardy   net-sched: change...
1751
1752
  	while ((tp = *fl) != NULL) {
  		*fl = tp->next;
a48b5a614   Patrick McHardy   [NET_SCHED]: Unli...
1753
1754
1755
1756
  		tcf_destroy(tp);
  	}
  }
  EXPORT_SYMBOL(tcf_destroy_chain);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1757
1758
1759
  #ifdef CONFIG_PROC_FS
  static int psched_show(struct seq_file *seq, void *v)
  {
3c0cfc135   Patrick McHardy   [NET_SCHED]: Show...
1760
1761
1762
  	struct timespec ts;
  
  	hrtimer_get_res(CLOCK_MONOTONIC, &ts);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1763
1764
  	seq_printf(seq, "%08x %08x %08x %08x
  ",
ca44d6e60   Jarek Poplawski   pkt_sched: Rename...
1765
  		   (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
514bca322   Patrick McHardy   [NET_SCHED]: Fix ...
1766
  		   1000000,
3c0cfc135   Patrick McHardy   [NET_SCHED]: Show...
1767
  		   (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1768
1769
1770
1771
1772
1773
  
  	return 0;
  }
  
  static int psched_open(struct inode *inode, struct file *file)
  {
7e5ab1578   Tom Goff   net_sched: minor ...
1774
  	return single_open(file, psched_show, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1775
  }
da7071d7e   Arjan van de Ven   [PATCH] mark stru...
1776
  static const struct file_operations psched_fops = {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1777
1778
1779
1780
1781
  	.owner = THIS_MODULE,
  	.open = psched_open,
  	.read  = seq_read,
  	.llseek = seq_lseek,
  	.release = single_release,
10297b993   YOSHIFUJI Hideaki   [NET] SCHED: Fix ...
1782
  };
7316ae88c   Tom Goff   net_sched: make t...
1783
1784
1785
1786
  
  static int __net_init psched_net_init(struct net *net)
  {
  	struct proc_dir_entry *e;
d4beaa66a   Gao feng   net: proc: change...
1787
  	e = proc_create("psched", 0, net->proc_net, &psched_fops);
7316ae88c   Tom Goff   net_sched: make t...
1788
1789
1790
1791
1792
1793
1794
1795
  	if (e == NULL)
  		return -ENOMEM;
  
  	return 0;
  }
  
  static void __net_exit psched_net_exit(struct net *net)
  {
ece31ffd5   Gao feng   net: proc: change...
1796
  	remove_proc_entry("psched", net->proc_net);
7316ae88c   Tom Goff   net_sched: make t...
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
  }
  #else
  static int __net_init psched_net_init(struct net *net)
  {
  	return 0;
  }
  
  static void __net_exit psched_net_exit(struct net *net)
  {
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1807
  #endif
7316ae88c   Tom Goff   net_sched: make t...
1808
1809
1810
1811
  static struct pernet_operations psched_net_ops = {
  	.init = psched_net_init,
  	.exit = psched_net_exit,
  };
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1812
1813
  static int __init pktsched_init(void)
  {
7316ae88c   Tom Goff   net_sched: make t...
1814
1815
1816
1817
  	int err;
  
  	err = register_pernet_subsys(&psched_net_ops);
  	if (err) {
cc7ec456f   Eric Dumazet   net_sched: cleanups
1818
  		pr_err("pktsched_init: "
7316ae88c   Tom Goff   net_sched: make t...
1819
1820
1821
1822
  		       "cannot initialize per netns operations
  ");
  		return err;
  	}
6da7c8fcb   stephen hemminger   qdisc: allow sett...
1823
  	register_qdisc(&pfifo_fast_ops);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1824
1825
  	register_qdisc(&pfifo_qdisc_ops);
  	register_qdisc(&bfifo_qdisc_ops);
57dbb2d83   Hagen Paul Pfeifer   sched: add head d...
1826
  	register_qdisc(&pfifo_head_drop_qdisc_ops);
6ec1c69a8   David S. Miller   net_sched: add cl...
1827
  	register_qdisc(&mq_qdisc_ops);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1828

c7ac8679b   Greg Rose   rtnetlink: Comput...
1829
1830
1831
1832
1833
1834
  	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
  	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
  	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, NULL);
  	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
  	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
  	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, NULL);
be577ddc2   Thomas Graf   [PKT_SCHED] qdisc...
1835

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1836
1837
1838
1839
  	return 0;
  }
  
  subsys_initcall(pktsched_init);