Commit 14deae41566b5cdd992c01d0069518ced5227c83
1 parent
eb4dea5853
Exists in
master
and in
4 other branches
ipv6: Fix sporadic sendmsg -EINVAL when sending to multicast groups.
Thanks to excellent diagnosis by Eduard Guzovsky. The core problem is that on a network with lots of active multicast traffic, the neighbour cache can fill up. If we try to allocate a new route and thus neighbour cache entry, the bog-standard GC attempt the neighbour layer does in ineffective because route entries hold a reference to the existing neighbour entries and GC can only liberate entries with no references. IPV4 already has a way to handle this, by doing a route cache GC in such situations (when neigh attach returns -ENOBUFS). So simply mimick this on the ipv6 side. Tested-by: Eduard Guzovsky <eguzovsky@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 2 changed files with 49 additions and 7 deletions Side-by-side Diff
include/net/ndisc.h
net/ipv6/route.c
... | ... | @@ -627,6 +627,9 @@ |
627 | 627 | rt = ip6_rt_copy(ort); |
628 | 628 | |
629 | 629 | if (rt) { |
630 | + struct neighbour *neigh; | |
631 | + int attempts = !in_softirq(); | |
632 | + | |
630 | 633 | if (!(rt->rt6i_flags&RTF_GATEWAY)) { |
631 | 634 | if (rt->rt6i_dst.plen != 128 && |
632 | 635 | ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) |
633 | 636 | |
... | ... | @@ -646,8 +649,36 @@ |
646 | 649 | } |
647 | 650 | #endif |
648 | 651 | |
649 | - rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); | |
652 | + retry: | |
653 | + neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); | |
654 | + if (IS_ERR(neigh)) { | |
655 | + struct net *net = dev_net(rt->rt6i_dev); | |
656 | + int saved_rt_min_interval = | |
657 | + net->ipv6.sysctl.ip6_rt_gc_min_interval; | |
658 | + int saved_rt_elasticity = | |
659 | + net->ipv6.sysctl.ip6_rt_gc_elasticity; | |
650 | 660 | |
661 | + if (attempts-- > 0) { | |
662 | + net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; | |
663 | + net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; | |
664 | + | |
665 | + ip6_dst_gc(net->ipv6.ip6_dst_ops); | |
666 | + | |
667 | + net->ipv6.sysctl.ip6_rt_gc_elasticity = | |
668 | + saved_rt_elasticity; | |
669 | + net->ipv6.sysctl.ip6_rt_gc_min_interval = | |
670 | + saved_rt_min_interval; | |
671 | + goto retry; | |
672 | + } | |
673 | + | |
674 | + if (net_ratelimit()) | |
675 | + printk(KERN_WARNING | |
676 | + "Neighbour table overflow.\n"); | |
677 | + dst_free(&rt->u.dst); | |
678 | + return NULL; | |
679 | + } | |
680 | + rt->rt6i_nexthop = neigh; | |
681 | + | |
651 | 682 | } |
652 | 683 | |
653 | 684 | return rt; |
654 | 685 | |
... | ... | @@ -945,8 +976,11 @@ |
945 | 976 | dev_hold(dev); |
946 | 977 | if (neigh) |
947 | 978 | neigh_hold(neigh); |
948 | - else | |
979 | + else { | |
949 | 980 | neigh = ndisc_get_neigh(dev, addr); |
981 | + if (IS_ERR(neigh)) | |
982 | + neigh = NULL; | |
983 | + } | |
950 | 984 | |
951 | 985 | rt->rt6i_dev = dev; |
952 | 986 | rt->rt6i_idev = idev; |
... | ... | @@ -1887,6 +1921,7 @@ |
1887 | 1921 | { |
1888 | 1922 | struct net *net = dev_net(idev->dev); |
1889 | 1923 | struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); |
1924 | + struct neighbour *neigh; | |
1890 | 1925 | |
1891 | 1926 | if (rt == NULL) |
1892 | 1927 | return ERR_PTR(-ENOMEM); |
1893 | 1928 | |
1894 | 1929 | |
... | ... | @@ -1909,11 +1944,18 @@ |
1909 | 1944 | rt->rt6i_flags |= RTF_ANYCAST; |
1910 | 1945 | else |
1911 | 1946 | rt->rt6i_flags |= RTF_LOCAL; |
1912 | - rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); | |
1913 | - if (rt->rt6i_nexthop == NULL) { | |
1947 | + neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); | |
1948 | + if (IS_ERR(neigh)) { | |
1914 | 1949 | dst_free(&rt->u.dst); |
1915 | - return ERR_PTR(-ENOMEM); | |
1950 | + | |
1951 | + /* We are casting this because that is the return | |
1952 | + * value type. But an errno encoded pointer is the | |
1953 | + * same regardless of the underlying pointer type, | |
1954 | + * and that's what we are returning. So this is OK. | |
1955 | + */ | |
1956 | + return (struct rt6_info *) neigh; | |
1916 | 1957 | } |
1958 | + rt->rt6i_nexthop = neigh; | |
1917 | 1959 | |
1918 | 1960 | ipv6_addr_copy(&rt->rt6i_dst.addr, addr); |
1919 | 1961 | rt->rt6i_dst.plen = 128; |