Commit 2ac3ac8f86f2fe065d746d9a9abaca867adec577

Authored by Michal Kubeček
Committed by David S. Miller
1 parent 1f1059fcf0

ipv6: prevent fib6_run_gc() contention

On a high-traffic router with many processors and many IPv6 dst
entries, soft lockup in fib6_run_gc() can occur when number of
entries reaches gc_thresh.

This happens because fib6_run_gc() uses fib6_gc_lock to allow
only one thread to run the garbage collector but ip6_dst_gc()
doesn't update net->ipv6.ip6_rt_last_gc until fib6_run_gc()
returns. On a system with many entries, this can take some time
so that in the meantime, other threads pass the tests in
ip6_dst_gc() (ip6_rt_last_gc is still not updated) and wait for
the lock. They then have to run the garbage collector one after
another which blocks them for quite long.

Resolve this by replacing special value ~0UL of expire parameter
to fib6_run_gc() by explicit "force" parameter to choose between
spin_lock_bh() and spin_trylock_bh() and call fib6_run_gc() with
force=false if gc_thresh is reached but not max_size.

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 4 changed files with 13 additions and 16 deletions Side-by-side Diff

include/net/ip6_fib.h
... ... @@ -300,7 +300,7 @@
300 300 struct nl_info *info);
301 301  
302 302 extern void fib6_run_gc(unsigned long expires,
303   - struct net *net);
  303 + struct net *net, bool force);
304 304  
305 305 extern void fib6_gc_cleanup(void);
306 306  
... ... @@ -1632,19 +1632,16 @@
1632 1632  
1633 1633 static DEFINE_SPINLOCK(fib6_gc_lock);
1634 1634  
1635   -void fib6_run_gc(unsigned long expires, struct net *net)
  1635 +void fib6_run_gc(unsigned long expires, struct net *net, bool force)
1636 1636 {
1637   - if (expires != ~0UL) {
  1637 + if (force) {
1638 1638 spin_lock_bh(&fib6_gc_lock);
1639   - gc_args.timeout = expires ? (int)expires :
1640   - net->ipv6.sysctl.ip6_rt_gc_interval;
1641   - } else {
1642   - if (!spin_trylock_bh(&fib6_gc_lock)) {
1643   - mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
1644   - return;
1645   - }
1646   - gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval;
  1639 + } else if (!spin_trylock_bh(&fib6_gc_lock)) {
  1640 + mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
  1641 + return;
1647 1642 }
  1643 + gc_args.timeout = expires ? (int)expires :
  1644 + net->ipv6.sysctl.ip6_rt_gc_interval;
1648 1645  
1649 1646 gc_args.more = icmp6_dst_gc();
1650 1647  
... ... @@ -1661,7 +1658,7 @@
1661 1658  
1662 1659 static void fib6_gc_timer_cb(unsigned long arg)
1663 1660 {
1664   - fib6_run_gc(0, (struct net *)arg);
  1661 + fib6_run_gc(0, (struct net *)arg, true);
1665 1662 }
1666 1663  
1667 1664 static int __net_init fib6_net_init(struct net *net)
... ... @@ -1576,7 +1576,7 @@
1576 1576 switch (event) {
1577 1577 case NETDEV_CHANGEADDR:
1578 1578 neigh_changeaddr(&nd_tbl, dev);
1579   - fib6_run_gc(~0UL, net);
  1579 + fib6_run_gc(0, net, false);
1580 1580 idev = in6_dev_get(dev);
1581 1581 if (!idev)
1582 1582 break;
... ... @@ -1586,7 +1586,7 @@
1586 1586 break;
1587 1587 case NETDEV_DOWN:
1588 1588 neigh_ifdown(&nd_tbl, dev);
1589   - fib6_run_gc(~0UL, net);
  1589 + fib6_run_gc(0, net, false);
1590 1590 break;
1591 1591 case NETDEV_NOTIFY_PEERS:
1592 1592 ndisc_send_unsol_na(dev);
... ... @@ -1326,7 +1326,7 @@
1326 1326 goto out;
1327 1327  
1328 1328 net->ipv6.ip6_rt_gc_expire++;
1329   - fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
  1329 + fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
1330 1330 net->ipv6.ip6_rt_last_gc = now;
1331 1331 entries = dst_entries_get_slow(ops);
1332 1332 if (entries < ops->gc_thresh)
... ... @@ -2827,7 +2827,7 @@
2827 2827 net = (struct net *)ctl->extra1;
2828 2828 delay = net->ipv6.sysctl.flush_delay;
2829 2829 proc_dointvec(ctl, write, buffer, lenp, ppos);
2830   - fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
  2830 + fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2831 2831 return 0;
2832 2832 }
2833 2833