Commit 2ac3ac8f86f2fe065d746d9a9abaca867adec577
Committed by
David S. Miller
1 parent
1f1059fcf0
Exists in
smarc-imx_3.14.28_1.0.0_ga
and in
1 other branch
ipv6: prevent fib6_run_gc() contention
On a high-traffic router with many processors and many IPv6 dst entries, soft lockup in fib6_run_gc() can occur when number of entries reaches gc_thresh. This happens because fib6_run_gc() uses fib6_gc_lock to allow only one thread to run the garbage collector but ip6_dst_gc() doesn't update net->ipv6.ip6_rt_last_gc until fib6_run_gc() returns. On a system with many entries, this can take some time so that in the meantime, other threads pass the tests in ip6_dst_gc() (ip6_rt_last_gc is still not updated) and wait for the lock. They then have to run the garbage collector one after another which blocks them for quite long. Resolve this by replacing special value ~0UL of expire parameter to fib6_run_gc() by explicit "force" parameter to choose between spin_lock_bh() and spin_trylock_bh() and call fib6_run_gc() with force=false if gc_thresh is reached but not max_size. Signed-off-by: Michal Kubecek <mkubecek@suse.cz> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 4 changed files with 13 additions and 16 deletions Side-by-side Diff
include/net/ip6_fib.h
net/ipv6/ip6_fib.c
... | ... | @@ -1632,19 +1632,16 @@ |
1632 | 1632 | |
1633 | 1633 | static DEFINE_SPINLOCK(fib6_gc_lock); |
1634 | 1634 | |
1635 | -void fib6_run_gc(unsigned long expires, struct net *net) | |
1635 | +void fib6_run_gc(unsigned long expires, struct net *net, bool force) | |
1636 | 1636 | { |
1637 | - if (expires != ~0UL) { | |
1637 | + if (force) { | |
1638 | 1638 | spin_lock_bh(&fib6_gc_lock); |
1639 | - gc_args.timeout = expires ? (int)expires : | |
1640 | - net->ipv6.sysctl.ip6_rt_gc_interval; | |
1641 | - } else { | |
1642 | - if (!spin_trylock_bh(&fib6_gc_lock)) { | |
1643 | - mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); | |
1644 | - return; | |
1645 | - } | |
1646 | - gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval; | |
1639 | + } else if (!spin_trylock_bh(&fib6_gc_lock)) { | |
1640 | + mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); | |
1641 | + return; | |
1647 | 1642 | } |
1643 | + gc_args.timeout = expires ? (int)expires : | |
1644 | + net->ipv6.sysctl.ip6_rt_gc_interval; | |
1648 | 1645 | |
1649 | 1646 | gc_args.more = icmp6_dst_gc(); |
1650 | 1647 | |
... | ... | @@ -1661,7 +1658,7 @@ |
1661 | 1658 | |
1662 | 1659 | static void fib6_gc_timer_cb(unsigned long arg) |
1663 | 1660 | { |
1664 | - fib6_run_gc(0, (struct net *)arg); | |
1661 | + fib6_run_gc(0, (struct net *)arg, true); | |
1665 | 1662 | } |
1666 | 1663 | |
1667 | 1664 | static int __net_init fib6_net_init(struct net *net) |
net/ipv6/ndisc.c
... | ... | @@ -1576,7 +1576,7 @@ |
1576 | 1576 | switch (event) { |
1577 | 1577 | case NETDEV_CHANGEADDR: |
1578 | 1578 | neigh_changeaddr(&nd_tbl, dev); |
1579 | - fib6_run_gc(~0UL, net); | |
1579 | + fib6_run_gc(0, net, false); | |
1580 | 1580 | idev = in6_dev_get(dev); |
1581 | 1581 | if (!idev) |
1582 | 1582 | break; |
... | ... | @@ -1586,7 +1586,7 @@ |
1586 | 1586 | break; |
1587 | 1587 | case NETDEV_DOWN: |
1588 | 1588 | neigh_ifdown(&nd_tbl, dev); |
1589 | - fib6_run_gc(~0UL, net); | |
1589 | + fib6_run_gc(0, net, false); | |
1590 | 1590 | break; |
1591 | 1591 | case NETDEV_NOTIFY_PEERS: |
1592 | 1592 | ndisc_send_unsol_na(dev); |
net/ipv6/route.c
... | ... | @@ -1326,7 +1326,7 @@ |
1326 | 1326 | goto out; |
1327 | 1327 | |
1328 | 1328 | net->ipv6.ip6_rt_gc_expire++; |
1329 | - fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); | |
1329 | + fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size); | |
1330 | 1330 | net->ipv6.ip6_rt_last_gc = now; |
1331 | 1331 | entries = dst_entries_get_slow(ops); |
1332 | 1332 | if (entries < ops->gc_thresh) |
... | ... | @@ -2827,7 +2827,7 @@ |
2827 | 2827 | net = (struct net *)ctl->extra1; |
2828 | 2828 | delay = net->ipv6.sysctl.flush_delay; |
2829 | 2829 | proc_dointvec(ctl, write, buffer, lenp, ppos); |
2830 | - fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); | |
2830 | + fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); | |
2831 | 2831 | return 0; |
2832 | 2832 | } |
2833 | 2833 |