Blame view
net/sched/sch_tbf.c
10.4 KB
1da177e4c Linux-2.6.12-rc2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 |
/* * net/sched/sch_tbf.c Token Bucket Filter queue. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs - * original idea by Martin Devera * */ |
1da177e4c Linux-2.6.12-rc2 |
14 |
#include <linux/module.h> |
1da177e4c Linux-2.6.12-rc2 |
15 16 |
#include <linux/types.h> #include <linux/kernel.h> |
1da177e4c Linux-2.6.12-rc2 |
17 |
#include <linux/string.h> |
1da177e4c Linux-2.6.12-rc2 |
18 |
#include <linux/errno.h> |
1da177e4c Linux-2.6.12-rc2 |
19 |
#include <linux/skbuff.h> |
0ba480538 [NET_SCHED]: Remo... |
20 |
#include <net/netlink.h> |
1da177e4c Linux-2.6.12-rc2 |
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
#include <net/pkt_sched.h> /* Simple Token Bucket Filter. ======================================= SOURCE. ------- None. Description. ------------ A data flow obeys TBF with rate R and depth B, if for any time interval t_i...t_f the number of transmitted bits does not exceed B + R*(t_f-t_i). Packetized version of this definition: The sequence of packets of sizes s_i served at moments t_i obeys TBF, if for any i<=k: s_i+....+s_k <= B + R*(t_k - t_i) Algorithm. ---------- Let N(t_i) be B/R initially and N(t) grow continuously with time as: N(t+delta) = min{B/R, N(t) + delta} If the first packet in queue has length S, it may be transmitted only at the time t_* when S/R <= N(t_*), and in this case N(t) jumps: N(t_* + 0) = N(t_* - 0) - S/R. Actually, QoS requires two TBF to be applied to a data stream. One of them controls steady state burst size, another one with rate P (peak rate) and depth M (equal to link MTU) limits bursts at a smaller time scale. It is easy to see that P>R, and B>M. If P is infinity, this double TBF is equivalent to a single one. When TBF works in reshaping mode, latency is estimated as: lat = max ((L-B)/R, (L-M)/P) NOTES. ------ If TBF throttles, it starts a watchdog timer, which will wake it up when it is ready to transmit. Note that the minimal timer resolution is 1/HZ. If no new packets arrive during this period, or if the device is not awaken by EOI for some previous packet, TBF can stop its activity for 1/HZ. This means, that with depth B, the maximal rate is R_crit = B*HZ F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes. Note that the peak rate TBF is much more tough: with MTU 1500 P_crit = 150Kbytes/sec. So, if you need greater peak rates, use alpha with HZ=1000 :-) With classful TBF, limit is just kept for backwards compatibility. It is passed to the default bfifo qdisc - if the inner qdisc is changed the limit is not effective anymore. */ |
cc7ec456f net_sched: cleanups |
98 |
struct tbf_sched_data { |
1da177e4c Linux-2.6.12-rc2 |
99 100 101 102 103 104 105 106 107 108 109 110 |
/* Parameters */ u32 limit; /* Maximal length of backlog: bytes */ u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ u32 mtu; u32 max_size; struct qdisc_rate_table *R_tab; struct qdisc_rate_table *P_tab; /* Variables */ long tokens; /* Current number of B tokens */ long ptokens; /* Current number of P tokens */ psched_time_t t_c; /* Time check-point */ |
1da177e4c Linux-2.6.12-rc2 |
111 |
struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */ |
f7f593e38 [NET_SCHED]: sch_... |
112 |
struct qdisc_watchdog watchdog; /* Watchdog timer */ |
1da177e4c Linux-2.6.12-rc2 |
113 |
}; |
cc7ec456f net_sched: cleanups |
114 115 |
#define L2T(q, L) qdisc_l2t((q)->R_tab, L) #define L2T_P(q, L) qdisc_l2t((q)->P_tab, L) |
1da177e4c Linux-2.6.12-rc2 |
116 |
|
cc7ec456f net_sched: cleanups |
117 |
static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) |
1da177e4c Linux-2.6.12-rc2 |
118 119 120 |
{ struct tbf_sched_data *q = qdisc_priv(sch); int ret; |
69747650c pkt_sched: Fix re... |
121 122 |
if (qdisc_pkt_len(skb) > q->max_size) return qdisc_reshape_fail(skb, sch); |
1da177e4c Linux-2.6.12-rc2 |
123 |
|
5f86173bd net_sched: Add qd... |
124 |
ret = qdisc_enqueue(skb, q->qdisc); |
9871e50ed net: Use NET_XMIT... |
125 |
if (ret != NET_XMIT_SUCCESS) { |
378a2f090 net_sched: Add qd... |
126 127 |
if (net_xmit_drop_count(ret)) sch->qstats.drops++; |
1da177e4c Linux-2.6.12-rc2 |
128 129 130 131 |
return ret; } sch->q.qlen++; |
9871e50ed net: Use NET_XMIT... |
132 |
return NET_XMIT_SUCCESS; |
1da177e4c Linux-2.6.12-rc2 |
133 |
} |
cc7ec456f net_sched: cleanups |
134 |
static unsigned int tbf_drop(struct Qdisc *sch) |
1da177e4c Linux-2.6.12-rc2 |
135 136 |
{ struct tbf_sched_data *q = qdisc_priv(sch); |
6d037a26f [PKT_SCHED]: Qdis... |
137 |
unsigned int len = 0; |
1da177e4c Linux-2.6.12-rc2 |
138 |
|
6d037a26f [PKT_SCHED]: Qdis... |
139 |
if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) { |
1da177e4c Linux-2.6.12-rc2 |
140 141 142 143 144 |
sch->q.qlen--; sch->qstats.drops++; } return len; } |
cc7ec456f net_sched: cleanups |
145 |
static struct sk_buff *tbf_dequeue(struct Qdisc *sch) |
1da177e4c Linux-2.6.12-rc2 |
146 147 148 |
{ struct tbf_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; |
03c05f0d4 pkt_sched: Use qd... |
149 |
skb = q->qdisc->ops->peek(q->qdisc); |
1da177e4c Linux-2.6.12-rc2 |
150 151 152 |
if (skb) { psched_time_t now; |
f7f593e38 [NET_SCHED]: sch_... |
153 |
long toks; |
1da177e4c Linux-2.6.12-rc2 |
154 |
long ptoks = 0; |
0abf77e55 net_sched: Add ac... |
155 |
unsigned int len = qdisc_pkt_len(skb); |
1da177e4c Linux-2.6.12-rc2 |
156 |
|
3bebcda28 [NET_SCHED]: turn... |
157 |
now = psched_get_time(); |
03cc45c0a [NET_SCHED]: turn... |
158 |
toks = psched_tdiff_bounded(now, q->t_c, q->buffer); |
1da177e4c Linux-2.6.12-rc2 |
159 160 161 162 163 164 165 166 167 168 169 170 171 |
if (q->P_tab) { ptoks = toks + q->ptokens; if (ptoks > (long)q->mtu) ptoks = q->mtu; ptoks -= L2T_P(q, len); } toks += q->tokens; if (toks > (long)q->buffer) toks = q->buffer; toks -= L2T(q, len); if ((toks|ptoks) >= 0) { |
77be155cb pkt_sched: Add pe... |
172 |
skb = qdisc_dequeue_peeked(q->qdisc); |
03c05f0d4 pkt_sched: Use qd... |
173 174 |
if (unlikely(!skb)) return NULL; |
1da177e4c Linux-2.6.12-rc2 |
175 176 177 178 |
q->t_c = now; q->tokens = toks; q->ptokens = ptoks; sch->q.qlen--; |
fd245a4ad net_sched: move T... |
179 |
qdisc_unthrottled(sch); |
9190b3b32 net_sched: accura... |
180 |
qdisc_bstats_update(sch, skb); |
1da177e4c Linux-2.6.12-rc2 |
181 182 |
return skb; } |
f7f593e38 [NET_SCHED]: sch_... |
183 184 |
qdisc_watchdog_schedule(&q->watchdog, now + max_t(long, -toks, -ptoks)); |
1da177e4c Linux-2.6.12-rc2 |
185 186 187 188 189 190 191 192 193 194 195 |
/* Maybe we have a shorter packet in the queue, which can be sent now. It sounds cool, but, however, this is wrong in principle. We MUST NOT reorder packets under these circumstances. Really, if we split the flow into independent subflows, it would be a very good solution. This is the main idea of all FQ algorithms (cf. CSZ, HPFQ, HFSC) */ |
1da177e4c Linux-2.6.12-rc2 |
196 197 198 199 |
sch->qstats.overlimits++; } return NULL; } |
cc7ec456f net_sched: cleanups |
200 |
static void tbf_reset(struct Qdisc *sch) |
1da177e4c Linux-2.6.12-rc2 |
201 202 203 204 205 |
{ struct tbf_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); sch->q.qlen = 0; |
3bebcda28 [NET_SCHED]: turn... |
206 |
q->t_c = psched_get_time(); |
1da177e4c Linux-2.6.12-rc2 |
207 208 |
q->tokens = q->buffer; q->ptokens = q->mtu; |
f7f593e38 [NET_SCHED]: sch_... |
209 |
qdisc_watchdog_cancel(&q->watchdog); |
1da177e4c Linux-2.6.12-rc2 |
210 |
} |
27a3421e4 [NET_SCHED]: Use ... |
211 212 213 214 215 |
static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = { [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) }, [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, }; |
cc7ec456f net_sched: cleanups |
216 |
static int tbf_change(struct Qdisc *sch, struct nlattr *opt) |
1da177e4c Linux-2.6.12-rc2 |
217 |
{ |
cee63723b [NET_SCHED]: Prop... |
218 |
int err; |
1da177e4c Linux-2.6.12-rc2 |
219 |
struct tbf_sched_data *q = qdisc_priv(sch); |
1e90474c3 [NET_SCHED]: Conv... |
220 |
struct nlattr *tb[TCA_TBF_PTAB + 1]; |
1da177e4c Linux-2.6.12-rc2 |
221 222 223 224 |
struct tc_tbf_qopt *qopt; struct qdisc_rate_table *rtab = NULL; struct qdisc_rate_table *ptab = NULL; struct Qdisc *child = NULL; |
cc7ec456f net_sched: cleanups |
225 |
int max_size, n; |
1da177e4c Linux-2.6.12-rc2 |
226 |
|
27a3421e4 [NET_SCHED]: Use ... |
227 |
err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy); |
cee63723b [NET_SCHED]: Prop... |
228 229 230 231 |
if (err < 0) return err; err = -EINVAL; |
27a3421e4 [NET_SCHED]: Use ... |
232 |
if (tb[TCA_TBF_PARMS] == NULL) |
1da177e4c Linux-2.6.12-rc2 |
233 |
goto done; |
1e90474c3 [NET_SCHED]: Conv... |
234 235 |
qopt = nla_data(tb[TCA_TBF_PARMS]); rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]); |
1da177e4c Linux-2.6.12-rc2 |
236 237 238 239 240 |
if (rtab == NULL) goto done; if (qopt->peakrate.rate) { if (qopt->peakrate.rate > qopt->rate.rate) |
1e90474c3 [NET_SCHED]: Conv... |
241 |
ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]); |
1da177e4c Linux-2.6.12-rc2 |
242 243 244 245 246 |
if (ptab == NULL) goto done; } for (n = 0; n < 256; n++) |
cc7ec456f net_sched: cleanups |
247 248 249 |
if (rtab->data[n] > qopt->buffer) break; max_size = (n << qopt->rate.cell_log) - 1; |
1da177e4c Linux-2.6.12-rc2 |
250 251 252 253 |
if (ptab) { int size; for (n = 0; n < 256; n++) |
cc7ec456f net_sched: cleanups |
254 255 256 257 258 |
if (ptab->data[n] > qopt->mtu) break; size = (n << qopt->peakrate.cell_log) - 1; if (size < max_size) max_size = size; |
1da177e4c Linux-2.6.12-rc2 |
259 260 261 |
} if (max_size < 0) goto done; |
f0cd15081 tbf: stop wanton ... |
262 263 264 265 266 |
if (q->qdisc != &noop_qdisc) { err = fifo_set_limit(q->qdisc, qopt->limit); if (err) goto done; } else if (qopt->limit > 0) { |
fb0305ce1 net-sched: consol... |
267 268 269 |
child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit); if (IS_ERR(child)) { err = PTR_ERR(child); |
1da177e4c Linux-2.6.12-rc2 |
270 |
goto done; |
fb0305ce1 net-sched: consol... |
271 |
} |
1da177e4c Linux-2.6.12-rc2 |
272 273 274 |
} sch_tree_lock(sch); |
5e50da01d [NET_SCHED]: Fix ... |
275 276 |
if (child) { qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); |
b94c8afcb pkt_sched: remove... |
277 278 |
qdisc_destroy(q->qdisc); q->qdisc = child; |
5e50da01d [NET_SCHED]: Fix ... |
279 |
} |
1da177e4c Linux-2.6.12-rc2 |
280 281 282 283 284 285 |
q->limit = qopt->limit; q->mtu = qopt->mtu; q->max_size = max_size; q->buffer = qopt->buffer; q->tokens = q->buffer; q->ptokens = q->mtu; |
b94c8afcb pkt_sched: remove... |
286 |
|
a0bffffc1 net/*: use linux/... |
287 288 |
swap(q->R_tab, rtab); swap(q->P_tab, ptab); |
b94c8afcb pkt_sched: remove... |
289 |
|
1da177e4c Linux-2.6.12-rc2 |
290 291 292 293 294 295 296 297 298 |
sch_tree_unlock(sch); err = 0; done: if (rtab) qdisc_put_rtab(rtab); if (ptab) qdisc_put_rtab(ptab); return err; } |
cc7ec456f net_sched: cleanups |
299 |
static int tbf_init(struct Qdisc *sch, struct nlattr *opt) |
1da177e4c Linux-2.6.12-rc2 |
300 301 302 303 304 |
{ struct tbf_sched_data *q = qdisc_priv(sch); if (opt == NULL) return -EINVAL; |
3bebcda28 [NET_SCHED]: turn... |
305 |
q->t_c = psched_get_time(); |
f7f593e38 [NET_SCHED]: sch_... |
306 |
qdisc_watchdog_init(&q->watchdog, sch); |
1da177e4c Linux-2.6.12-rc2 |
307 308 309 310 311 312 313 314 |
q->qdisc = &noop_qdisc; return tbf_change(sch, opt); } static void tbf_destroy(struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); |
f7f593e38 [NET_SCHED]: sch_... |
315 |
qdisc_watchdog_cancel(&q->watchdog); |
1da177e4c Linux-2.6.12-rc2 |
316 317 318 319 320 321 322 323 324 325 326 327 |
if (q->P_tab) qdisc_put_rtab(q->P_tab); if (q->R_tab) qdisc_put_rtab(q->R_tab); qdisc_destroy(q->qdisc); } static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) { struct tbf_sched_data *q = qdisc_priv(sch); |
4b3550ef5 [NET_SCHED]: Use ... |
328 |
struct nlattr *nest; |
1da177e4c Linux-2.6.12-rc2 |
329 |
struct tc_tbf_qopt opt; |
b0460e448 sch_tbf: report b... |
330 |
sch->qstats.backlog = q->qdisc->qstats.backlog; |
4b3550ef5 [NET_SCHED]: Use ... |
331 332 333 |
nest = nla_nest_start(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; |
1da177e4c Linux-2.6.12-rc2 |
334 335 336 337 338 339 340 341 342 |
opt.limit = q->limit; opt.rate = q->R_tab->rate; if (q->P_tab) opt.peakrate = q->P_tab->rate; else memset(&opt.peakrate, 0, sizeof(opt.peakrate)); opt.mtu = q->mtu; opt.buffer = q->buffer; |
1e90474c3 [NET_SCHED]: Conv... |
343 |
NLA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt); |
1da177e4c Linux-2.6.12-rc2 |
344 |
|
4b3550ef5 [NET_SCHED]: Use ... |
345 |
nla_nest_end(skb, nest); |
1da177e4c Linux-2.6.12-rc2 |
346 |
return skb->len; |
1e90474c3 [NET_SCHED]: Conv... |
347 |
nla_put_failure: |
4b3550ef5 [NET_SCHED]: Use ... |
348 |
nla_nest_cancel(skb, nest); |
1da177e4c Linux-2.6.12-rc2 |
349 350 351 352 353 354 355 |
return -1; } static int tbf_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { struct tbf_sched_data *q = qdisc_priv(sch); |
1da177e4c Linux-2.6.12-rc2 |
356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 |
tcm->tcm_handle |= TC_H_MIN(1); tcm->tcm_info = q->qdisc->handle; return 0; } static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old) { struct tbf_sched_data *q = qdisc_priv(sch); if (new == NULL) new = &noop_qdisc; sch_tree_lock(sch); |
b94c8afcb pkt_sched: remove... |
371 372 |
*old = q->qdisc; q->qdisc = new; |
5e50da01d [NET_SCHED]: Fix ... |
373 |
qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); |
1da177e4c Linux-2.6.12-rc2 |
374 |
qdisc_reset(*old); |
1da177e4c Linux-2.6.12-rc2 |
375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 |
sch_tree_unlock(sch); return 0; } static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg) { struct tbf_sched_data *q = qdisc_priv(sch); return q->qdisc; } static unsigned long tbf_get(struct Qdisc *sch, u32 classid) { return 1; } static void tbf_put(struct Qdisc *sch, unsigned long arg) { } |
1da177e4c Linux-2.6.12-rc2 |
394 395 396 397 398 399 400 401 402 403 404 |
static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { if (walker->count >= walker->skip) if (walker->fn(sch, 1, walker) < 0) { walker->stop = 1; return; } walker->count++; } } |
cc7ec456f net_sched: cleanups |
405 |
static const struct Qdisc_class_ops tbf_class_ops = { |
1da177e4c Linux-2.6.12-rc2 |
406 407 408 409 |
.graft = tbf_graft, .leaf = tbf_leaf, .get = tbf_get, .put = tbf_put, |
1da177e4c Linux-2.6.12-rc2 |
410 |
.walk = tbf_walk, |
1da177e4c Linux-2.6.12-rc2 |
411 412 |
.dump = tbf_dump_class, }; |
20fea08b5 [NET]: Move Qdisc... |
413 |
static struct Qdisc_ops tbf_qdisc_ops __read_mostly = { |
1da177e4c Linux-2.6.12-rc2 |
414 415 416 417 418 419 |
.next = NULL, .cl_ops = &tbf_class_ops, .id = "tbf", .priv_size = sizeof(struct tbf_sched_data), .enqueue = tbf_enqueue, .dequeue = tbf_dequeue, |
77be155cb pkt_sched: Add pe... |
420 |
.peek = qdisc_peek_dequeued, |
1da177e4c Linux-2.6.12-rc2 |
421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 |
.drop = tbf_drop, .init = tbf_init, .reset = tbf_reset, .destroy = tbf_destroy, .change = tbf_change, .dump = tbf_dump, .owner = THIS_MODULE, }; static int __init tbf_module_init(void) { return register_qdisc(&tbf_qdisc_ops); } static void __exit tbf_module_exit(void) { unregister_qdisc(&tbf_qdisc_ops); } module_init(tbf_module_init) module_exit(tbf_module_exit) MODULE_LICENSE("GPL"); |