Commit a2da570d62fcb9e8816f6920e1ec02c706b289fa

Authored by Eric Dumazet
Committed by David S. Miller
1 parent fd245a4adb

net_sched: RCU conversion of stab

This patch converts stab qdisc management to RCU, so that we can perform
the qdisc_calculate_pkt_len() call before getting qdisc lock.

This shortens the lock's held time in __dev_xmit_skb().

This permits more qdiscs to get TCQ_F_CAN_BYPASS status, avoiding lot of
cache misses and so reducing latencies.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
CC: Jesper Dangaard Brouer <hawk@diku.dk>
CC: Jarek Poplawski <jarkao2@gmail.com>
CC: Jamal Hadi Salim <hadi@cyberus.ca>
CC: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 4 changed files with 38 additions and 19 deletions Side-by-side Diff

include/net/sch_generic.h
... ... @@ -36,6 +36,7 @@
36 36 };
37 37  
38 38 struct qdisc_size_table {
  39 + struct rcu_head rcu;
39 40 struct list_head list;
40 41 struct tc_sizespec szopts;
41 42 int refcnt;
... ... @@ -53,7 +54,7 @@
53 54 #define TCQ_F_WARN_NONWC (1 << 16)
54 55 int padded;
55 56 struct Qdisc_ops *ops;
56   - struct qdisc_size_table *stab;
  57 + struct qdisc_size_table __rcu *stab;
57 58 struct list_head list;
58 59 u32 handle;
59 60 u32 parent;
... ... @@ -349,8 +350,8 @@
349 350 struct Qdisc_ops *ops);
350 351 extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
351 352 struct Qdisc_ops *ops, u32 parentid);
352   -extern void qdisc_calculate_pkt_len(struct sk_buff *skb,
353   - struct qdisc_size_table *stab);
  353 +extern void __qdisc_calculate_pkt_len(struct sk_buff *skb,
  354 + const struct qdisc_size_table *stab);
354 355 extern void tcf_destroy(struct tcf_proto *tp);
355 356 extern void tcf_destroy_chain(struct tcf_proto **fl);
356 357  
357 358  
358 359  
... ... @@ -429,12 +430,20 @@
429 430 #define net_xmit_drop_count(e) (1)
430 431 #endif
431 432  
432   -static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
  433 +static inline void qdisc_calculate_pkt_len(struct sk_buff *skb,
  434 + const struct Qdisc *sch)
433 435 {
434 436 #ifdef CONFIG_NET_SCHED
435   - if (sch->stab)
436   - qdisc_calculate_pkt_len(skb, sch->stab);
  437 + struct qdisc_size_table *stab = rcu_dereference_bh(sch->stab);
  438 +
  439 + if (stab)
  440 + __qdisc_calculate_pkt_len(skb, stab);
437 441 #endif
  442 +}
  443 +
  444 +static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
  445 +{
  446 + qdisc_calculate_pkt_len(skb, sch);
438 447 return sch->enqueue(skb, sch);
439 448 }
440 449  
... ... @@ -2325,15 +2325,18 @@
2325 2325 struct netdev_queue *txq)
2326 2326 {
2327 2327 spinlock_t *root_lock = qdisc_lock(q);
2328   - bool contended = qdisc_is_running(q);
  2328 + bool contended;
2329 2329 int rc;
2330 2330  
  2331 + qdisc_skb_cb(skb)->pkt_len = skb->len;
  2332 + qdisc_calculate_pkt_len(skb, q);
2331 2333 /*
2332 2334 * Heuristic to force contended enqueues to serialize on a
2333 2335 * separate lock before trying to get qdisc main lock.
2334 2336 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
2335 2337 * and dequeue packets faster.
2336 2338 */
  2339 + contended = qdisc_is_running(q);
2337 2340 if (unlikely(contended))
2338 2341 spin_lock(&q->busylock);
2339 2342  
... ... @@ -2351,7 +2354,6 @@
2351 2354 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2352 2355 skb_dst_force(skb);
2353 2356  
2354   - qdisc_skb_cb(skb)->pkt_len = skb->len;
2355 2357 qdisc_bstats_update(q, skb);
2356 2358  
2357 2359 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
... ... @@ -2366,7 +2368,7 @@
2366 2368 rc = NET_XMIT_SUCCESS;
2367 2369 } else {
2368 2370 skb_dst_force(skb);
2369   - rc = qdisc_enqueue_root(skb, q);
  2371 + rc = q->enqueue(skb, q) & NET_XMIT_MASK;
2370 2372 if (qdisc_run_begin(q)) {
2371 2373 if (unlikely(contended)) {
2372 2374 spin_unlock(&q->busylock);
... ... @@ -398,6 +398,11 @@
398 398 return stab;
399 399 }
400 400  
  401 +static void stab_kfree_rcu(struct rcu_head *head)
  402 +{
  403 + kfree(container_of(head, struct qdisc_size_table, rcu));
  404 +}
  405 +
401 406 void qdisc_put_stab(struct qdisc_size_table *tab)
402 407 {
403 408 if (!tab)
... ... @@ -407,7 +412,7 @@
407 412  
408 413 if (--tab->refcnt == 0) {
409 414 list_del(&tab->list);
410   - kfree(tab);
  415 + call_rcu_bh(&tab->rcu, stab_kfree_rcu);
411 416 }
412 417  
413 418 spin_unlock(&qdisc_stab_lock);
... ... @@ -430,7 +435,7 @@
430 435 return -1;
431 436 }
432 437  
433   -void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
  438 +void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
434 439 {
435 440 int pkt_len, slot;
436 441  
... ... @@ -456,7 +461,7 @@
456 461 pkt_len = 1;
457 462 qdisc_skb_cb(skb)->pkt_len = pkt_len;
458 463 }
459   -EXPORT_SYMBOL(qdisc_calculate_pkt_len);
  464 +EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
460 465  
461 466 void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
462 467 {
... ... @@ -835,7 +840,7 @@
835 840 err = PTR_ERR(stab);
836 841 goto err_out4;
837 842 }
838   - sch->stab = stab;
  843 + rcu_assign_pointer(sch->stab, stab);
839 844 }
840 845 if (tca[TCA_RATE]) {
841 846 spinlock_t *root_lock;
... ... @@ -875,7 +880,7 @@
875 880 * Any broken qdiscs that would require a ops->reset() here?
876 881 * The qdisc was never in action so it shouldn't be necessary.
877 882 */
878   - qdisc_put_stab(sch->stab);
  883 + qdisc_put_stab(rtnl_dereference(sch->stab));
879 884 if (ops->destroy)
880 885 ops->destroy(sch);
881 886 goto err_out3;
... ... @@ -883,7 +888,7 @@
883 888  
884 889 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
885 890 {
886   - struct qdisc_size_table *stab = NULL;
  891 + struct qdisc_size_table *ostab, *stab = NULL;
887 892 int err = 0;
888 893  
889 894 if (tca[TCA_OPTIONS]) {
... ... @@ -900,8 +905,9 @@
900 905 return PTR_ERR(stab);
901 906 }
902 907  
903   - qdisc_put_stab(sch->stab);
904   - sch->stab = stab;
  908 + ostab = rtnl_dereference(sch->stab);
  909 + rcu_assign_pointer(sch->stab, stab);
  910 + qdisc_put_stab(ostab);
905 911  
906 912 if (tca[TCA_RATE]) {
907 913 /* NB: ignores errors from replace_estimator
... ... @@ -1180,6 +1186,7 @@
1180 1186 struct nlmsghdr *nlh;
1181 1187 unsigned char *b = skb_tail_pointer(skb);
1182 1188 struct gnet_dump d;
  1189 + struct qdisc_size_table *stab;
1183 1190  
1184 1191 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1185 1192 tcm = NLMSG_DATA(nlh);
... ... @@ -1195,7 +1202,8 @@
1195 1202 goto nla_put_failure;
1196 1203 q->qstats.qlen = q->q.qlen;
1197 1204  
1198   - if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
  1205 + stab = rtnl_dereference(q->stab);
  1206 + if (stab && qdisc_dump_stab(skb, stab) < 0)
1199 1207 goto nla_put_failure;
1200 1208  
1201 1209 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
net/sched/sch_generic.c
... ... @@ -632,7 +632,7 @@
632 632 #ifdef CONFIG_NET_SCHED
633 633 qdisc_list_del(qdisc);
634 634  
635   - qdisc_put_stab(qdisc->stab);
  635 + qdisc_put_stab(rtnl_dereference(qdisc->stab));
636 636 #endif
637 637 gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
638 638 if (ops->reset)