Commit 32b293a53deeb220769f9a29357cb151cfb8ee26
Committed by
David S. Miller
1 parent
c9da99e647
Exists in
master
and in
6 other branches
IPv6: Avoid taking write lock for /proc/net/ipv6_route
During some debugging I needed to look into how /proc/net/ipv6_route operated and in my digging I found its calling fib6_clean_all() which uses "write_lock_bh(&table->tb6_lock)" before doing the walk of the table. I found this on 2.6.32, but reading the code I believe the same basic idea exists currently. Looking at the rtnetlink code they are only calling "read_lock_bh(&table->tb6_lock);" via fib6_dump_table(). While I realize reading from proc isn't the recommended way of fetching the ipv6 route table; taking a write lock seems unnecessary and would probably cause network performance issues. To verify this I loaded up the ipv6 route table and then ran iperf in 3 cases: * doing nothing * reading ipv6 route table via proc (while :; do cat /proc/net/ipv6_route > /dev/null; done) * reading ipv6 route table via rtnetlink (while :; do ip -6 route show table all > /dev/null; done) * Load the ipv6 route table up with: * for ((i = 0;i < 4000;i++)); do ip route add unreachable 2000::$i; done * iperf commands: * client: iperf -i 1 -V -c <ipv6 addr> * server: iperf -V -s * iperf results - 3 runs each (in Mbits/sec) * nothing: client: 927,927,927 server: 927,927,927 * proc: client: 179,97,96,113 server: 142,112,133 * iproute: client: 928,927,928 server: 927,927,927 lock_stat shows taking the write lock is causing the slowdown. Using this info I decided to write a version of fib6_clean_all() which replaces write_lock_bh(&table->tb6_lock) with read_lock_bh(&table->tb6_lock). With this new function I see the same results as with my rtnetlink iperf test. Signed-off-by: Josh Hunt <joshhunt00@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 3 changed files with 25 additions and 1 deletions Side-by-side Diff
include/net/ip6_fib.h
... | ... | @@ -199,6 +199,10 @@ |
199 | 199 | const struct in6_addr *daddr, int dst_len, |
200 | 200 | const struct in6_addr *saddr, int src_len); |
201 | 201 | |
202 | +extern void fib6_clean_all_ro(struct net *net, | |
203 | + int (*func)(struct rt6_info *, void *arg), | |
204 | + int prune, void *arg); | |
205 | + | |
202 | 206 | extern void fib6_clean_all(struct net *net, |
203 | 207 | int (*func)(struct rt6_info *, void *arg), |
204 | 208 | int prune, void *arg); |
net/ipv6/ip6_fib.c
... | ... | @@ -1462,6 +1462,26 @@ |
1462 | 1462 | fib6_walk(&c.w); |
1463 | 1463 | } |
1464 | 1464 | |
1465 | +void fib6_clean_all_ro(struct net *net, int (*func)(struct rt6_info *, void *arg), | |
1466 | + int prune, void *arg) | |
1467 | +{ | |
1468 | + struct fib6_table *table; | |
1469 | + struct hlist_node *node; | |
1470 | + struct hlist_head *head; | |
1471 | + unsigned int h; | |
1472 | + | |
1473 | + rcu_read_lock(); | |
1474 | + for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { | |
1475 | + head = &net->ipv6.fib_table_hash[h]; | |
1476 | + hlist_for_each_entry_rcu(table, node, head, tb6_hlist) { | |
1477 | + read_lock_bh(&table->tb6_lock); | |
1478 | + fib6_clean_tree(net, &table->tb6_root, | |
1479 | + func, prune, arg); | |
1480 | + read_unlock_bh(&table->tb6_lock); | |
1481 | + } | |
1482 | + } | |
1483 | + rcu_read_unlock(); | |
1484 | +} | |
1465 | 1485 | void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), |
1466 | 1486 | int prune, void *arg) |
1467 | 1487 | { |
net/ipv6/route.c
... | ... | @@ -2680,7 +2680,7 @@ |
2680 | 2680 | static int ipv6_route_show(struct seq_file *m, void *v) |
2681 | 2681 | { |
2682 | 2682 | struct net *net = (struct net *)m->private; |
2683 | - fib6_clean_all(net, rt6_info_route, 0, m); | |
2683 | + fib6_clean_all_ro(net, rt6_info_route, 0, m); | |
2684 | 2684 | return 0; |
2685 | 2685 | } |
2686 | 2686 |