Commit a2da570d62fcb9e8816f6920e1ec02c706b289fa
Committed by
David S. Miller
1 parent
fd245a4adb
Exists in
master
and in
7 other branches
net_sched: RCU conversion of stab
This patch converts stab qdisc management to RCU, so that we can perform the qdisc_calculate_pkt_len() call before getting qdisc lock. This shortens the lock's held time in __dev_xmit_skb(). This permits more qdiscs to get TCQ_F_CAN_BYPASS status, avoiding lot of cache misses and so reducing latencies. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> CC: Patrick McHardy <kaber@trash.net> CC: Jesper Dangaard Brouer <hawk@diku.dk> CC: Jarek Poplawski <jarkao2@gmail.com> CC: Jamal Hadi Salim <hadi@cyberus.ca> CC: Stephen Hemminger <shemminger@vyatta.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 4 changed files with 38 additions and 19 deletions Side-by-side Diff
include/net/sch_generic.h
... | ... | @@ -36,6 +36,7 @@ |
36 | 36 | }; |
37 | 37 | |
38 | 38 | struct qdisc_size_table { |
39 | + struct rcu_head rcu; | |
39 | 40 | struct list_head list; |
40 | 41 | struct tc_sizespec szopts; |
41 | 42 | int refcnt; |
... | ... | @@ -53,7 +54,7 @@ |
53 | 54 | #define TCQ_F_WARN_NONWC (1 << 16) |
54 | 55 | int padded; |
55 | 56 | struct Qdisc_ops *ops; |
56 | - struct qdisc_size_table *stab; | |
57 | + struct qdisc_size_table __rcu *stab; | |
57 | 58 | struct list_head list; |
58 | 59 | u32 handle; |
59 | 60 | u32 parent; |
... | ... | @@ -349,8 +350,8 @@ |
349 | 350 | struct Qdisc_ops *ops); |
350 | 351 | extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, |
351 | 352 | struct Qdisc_ops *ops, u32 parentid); |
352 | -extern void qdisc_calculate_pkt_len(struct sk_buff *skb, | |
353 | - struct qdisc_size_table *stab); | |
353 | +extern void __qdisc_calculate_pkt_len(struct sk_buff *skb, | |
354 | + const struct qdisc_size_table *stab); | |
354 | 355 | extern void tcf_destroy(struct tcf_proto *tp); |
355 | 356 | extern void tcf_destroy_chain(struct tcf_proto **fl); |
356 | 357 | |
357 | 358 | |
358 | 359 | |
... | ... | @@ -429,12 +430,20 @@ |
429 | 430 | #define net_xmit_drop_count(e) (1) |
430 | 431 | #endif |
431 | 432 | |
432 | -static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |
433 | +static inline void qdisc_calculate_pkt_len(struct sk_buff *skb, | |
434 | + const struct Qdisc *sch) | |
433 | 435 | { |
434 | 436 | #ifdef CONFIG_NET_SCHED |
435 | - if (sch->stab) | |
436 | - qdisc_calculate_pkt_len(skb, sch->stab); | |
437 | + struct qdisc_size_table *stab = rcu_dereference_bh(sch->stab); | |
438 | + | |
439 | + if (stab) | |
440 | + __qdisc_calculate_pkt_len(skb, stab); | |
437 | 441 | #endif |
442 | +} | |
443 | + | |
444 | +static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |
445 | +{ | |
446 | + qdisc_calculate_pkt_len(skb, sch); | |
438 | 447 | return sch->enqueue(skb, sch); |
439 | 448 | } |
440 | 449 |
net/core/dev.c
... | ... | @@ -2325,15 +2325,18 @@ |
2325 | 2325 | struct netdev_queue *txq) |
2326 | 2326 | { |
2327 | 2327 | spinlock_t *root_lock = qdisc_lock(q); |
2328 | - bool contended = qdisc_is_running(q); | |
2328 | + bool contended; | |
2329 | 2329 | int rc; |
2330 | 2330 | |
2331 | + qdisc_skb_cb(skb)->pkt_len = skb->len; | |
2332 | + qdisc_calculate_pkt_len(skb, q); | |
2331 | 2333 | /* |
2332 | 2334 | * Heuristic to force contended enqueues to serialize on a |
2333 | 2335 | * separate lock before trying to get qdisc main lock. |
2334 | 2336 | * This permits __QDISC_STATE_RUNNING owner to get the lock more often |
2335 | 2337 | * and dequeue packets faster. |
2336 | 2338 | */ |
2339 | + contended = qdisc_is_running(q); | |
2337 | 2340 | if (unlikely(contended)) |
2338 | 2341 | spin_lock(&q->busylock); |
2339 | 2342 | |
... | ... | @@ -2351,7 +2354,6 @@ |
2351 | 2354 | if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) |
2352 | 2355 | skb_dst_force(skb); |
2353 | 2356 | |
2354 | - qdisc_skb_cb(skb)->pkt_len = skb->len; | |
2355 | 2357 | qdisc_bstats_update(q, skb); |
2356 | 2358 | |
2357 | 2359 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { |
... | ... | @@ -2366,7 +2368,7 @@ |
2366 | 2368 | rc = NET_XMIT_SUCCESS; |
2367 | 2369 | } else { |
2368 | 2370 | skb_dst_force(skb); |
2369 | - rc = qdisc_enqueue_root(skb, q); | |
2371 | + rc = q->enqueue(skb, q) & NET_XMIT_MASK; | |
2370 | 2372 | if (qdisc_run_begin(q)) { |
2371 | 2373 | if (unlikely(contended)) { |
2372 | 2374 | spin_unlock(&q->busylock); |
net/sched/sch_api.c
... | ... | @@ -398,6 +398,11 @@ |
398 | 398 | return stab; |
399 | 399 | } |
400 | 400 | |
401 | +static void stab_kfree_rcu(struct rcu_head *head) | |
402 | +{ | |
403 | + kfree(container_of(head, struct qdisc_size_table, rcu)); | |
404 | +} | |
405 | + | |
401 | 406 | void qdisc_put_stab(struct qdisc_size_table *tab) |
402 | 407 | { |
403 | 408 | if (!tab) |
... | ... | @@ -407,7 +412,7 @@ |
407 | 412 | |
408 | 413 | if (--tab->refcnt == 0) { |
409 | 414 | list_del(&tab->list); |
410 | - kfree(tab); | |
415 | + call_rcu_bh(&tab->rcu, stab_kfree_rcu); | |
411 | 416 | } |
412 | 417 | |
413 | 418 | spin_unlock(&qdisc_stab_lock); |
... | ... | @@ -430,7 +435,7 @@ |
430 | 435 | return -1; |
431 | 436 | } |
432 | 437 | |
433 | -void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab) | |
438 | +void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab) | |
434 | 439 | { |
435 | 440 | int pkt_len, slot; |
436 | 441 | |
... | ... | @@ -456,7 +461,7 @@ |
456 | 461 | pkt_len = 1; |
457 | 462 | qdisc_skb_cb(skb)->pkt_len = pkt_len; |
458 | 463 | } |
459 | -EXPORT_SYMBOL(qdisc_calculate_pkt_len); | |
464 | +EXPORT_SYMBOL(__qdisc_calculate_pkt_len); | |
460 | 465 | |
461 | 466 | void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) |
462 | 467 | { |
... | ... | @@ -835,7 +840,7 @@ |
835 | 840 | err = PTR_ERR(stab); |
836 | 841 | goto err_out4; |
837 | 842 | } |
838 | - sch->stab = stab; | |
843 | + rcu_assign_pointer(sch->stab, stab); | |
839 | 844 | } |
840 | 845 | if (tca[TCA_RATE]) { |
841 | 846 | spinlock_t *root_lock; |
... | ... | @@ -875,7 +880,7 @@ |
875 | 880 | * Any broken qdiscs that would require a ops->reset() here? |
876 | 881 | * The qdisc was never in action so it shouldn't be necessary. |
877 | 882 | */ |
878 | - qdisc_put_stab(sch->stab); | |
883 | + qdisc_put_stab(rtnl_dereference(sch->stab)); | |
879 | 884 | if (ops->destroy) |
880 | 885 | ops->destroy(sch); |
881 | 886 | goto err_out3; |
... | ... | @@ -883,7 +888,7 @@ |
883 | 888 | |
884 | 889 | static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) |
885 | 890 | { |
886 | - struct qdisc_size_table *stab = NULL; | |
891 | + struct qdisc_size_table *ostab, *stab = NULL; | |
887 | 892 | int err = 0; |
888 | 893 | |
889 | 894 | if (tca[TCA_OPTIONS]) { |
... | ... | @@ -900,8 +905,9 @@ |
900 | 905 | return PTR_ERR(stab); |
901 | 906 | } |
902 | 907 | |
903 | - qdisc_put_stab(sch->stab); | |
904 | - sch->stab = stab; | |
908 | + ostab = rtnl_dereference(sch->stab); | |
909 | + rcu_assign_pointer(sch->stab, stab); | |
910 | + qdisc_put_stab(ostab); | |
905 | 911 | |
906 | 912 | if (tca[TCA_RATE]) { |
907 | 913 | /* NB: ignores errors from replace_estimator |
... | ... | @@ -1180,6 +1186,7 @@ |
1180 | 1186 | struct nlmsghdr *nlh; |
1181 | 1187 | unsigned char *b = skb_tail_pointer(skb); |
1182 | 1188 | struct gnet_dump d; |
1189 | + struct qdisc_size_table *stab; | |
1183 | 1190 | |
1184 | 1191 | nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); |
1185 | 1192 | tcm = NLMSG_DATA(nlh); |
... | ... | @@ -1195,7 +1202,8 @@ |
1195 | 1202 | goto nla_put_failure; |
1196 | 1203 | q->qstats.qlen = q->q.qlen; |
1197 | 1204 | |
1198 | - if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) | |
1205 | + stab = rtnl_dereference(q->stab); | |
1206 | + if (stab && qdisc_dump_stab(skb, stab) < 0) | |
1199 | 1207 | goto nla_put_failure; |
1200 | 1208 | |
1201 | 1209 | if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, |
net/sched/sch_generic.c