Commit f2c31e32b378a6653f8de606149d963baf11d7d3

Authored by Eric Dumazet
Committed by David S. Miller
1 parent 28f4881cbf

net: fix NULL dereferences in check_peer_redir()

Gergely Kalman reported crashes in check_peer_redir().

It appears commit f39925dbde778 (ipv4: Cache learned redirect
information in inetpeer.) added a race, leading to possible NULL ptr
dereference.

Since we can now change dst neighbour, we should make sure a reader can
safely use a neighbour.

Add RCU protection to dst neighbour, and make sure check_peer_redir()
can be called safely by different cpus in parallel.

As neighbours are already freed after one RCU grace period, this patch
should not add typical RCU penalty (cache cold effects)

Many thanks to Gergely for providing a pretty report pointing to the
bug.

Reported-by: Gergely Kalman <synapse@hippy.csoma.elte.hu>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 7 changed files with 67 additions and 26 deletions Side-by-side Diff

... ... @@ -37,7 +37,7 @@
37 37 unsigned long _metrics;
38 38 unsigned long expires;
39 39 struct dst_entry *path;
40   - struct neighbour *_neighbour;
  40 + struct neighbour __rcu *_neighbour;
41 41 #ifdef CONFIG_XFRM
42 42 struct xfrm_state *xfrm;
43 43 #else
44 44  
45 45  
... ... @@ -88,12 +88,17 @@
88 88  
89 89 static inline struct neighbour *dst_get_neighbour(struct dst_entry *dst)
90 90 {
91   - return dst->_neighbour;
  91 + return rcu_dereference(dst->_neighbour);
92 92 }
93 93  
  94 +static inline struct neighbour *dst_get_neighbour_raw(struct dst_entry *dst)
  95 +{
  96 + return rcu_dereference_raw(dst->_neighbour);
  97 +}
  98 +
94 99 static inline void dst_set_neighbour(struct dst_entry *dst, struct neighbour *neigh)
95 100 {
96   - dst->_neighbour = neigh;
  101 + rcu_assign_pointer(dst->_neighbour, neigh);
97 102 }
98 103  
99 104 extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old);
100 105  
... ... @@ -382,8 +387,12 @@
382 387 static inline void dst_confirm(struct dst_entry *dst)
383 388 {
384 389 if (dst) {
385   - struct neighbour *n = dst_get_neighbour(dst);
  390 + struct neighbour *n;
  391 +
  392 + rcu_read_lock();
  393 + n = dst_get_neighbour(dst);
386 394 neigh_confirm(n);
  395 + rcu_read_unlock();
387 396 }
388 397 }
389 398  
net/ipv4/ip_output.c
... ... @@ -204,9 +204,15 @@
204 204 skb = skb2;
205 205 }
206 206  
  207 + rcu_read_lock();
207 208 neigh = dst_get_neighbour(dst);
208   - if (neigh)
209   - return neigh_output(neigh, skb);
  209 + if (neigh) {
  210 + int res = neigh_output(neigh, skb);
  211 +
  212 + rcu_read_unlock();
  213 + return res;
  214 + }
  215 + rcu_read_unlock();
210 216  
211 217 if (net_ratelimit())
212 218 printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
... ... @@ -1628,16 +1628,18 @@
1628 1628 {
1629 1629 struct rtable *rt = (struct rtable *) dst;
1630 1630 __be32 orig_gw = rt->rt_gateway;
1631   - struct neighbour *n;
  1631 + struct neighbour *n, *old_n;
1632 1632  
1633 1633 dst_confirm(&rt->dst);
1634 1634  
1635   - neigh_release(dst_get_neighbour(&rt->dst));
1636   - dst_set_neighbour(&rt->dst, NULL);
1637   -
1638 1635 rt->rt_gateway = peer->redirect_learned.a4;
1639   - rt_bind_neighbour(rt);
1640   - n = dst_get_neighbour(&rt->dst);
  1636 +
  1637 + n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway);
  1638 + if (IS_ERR(n))
  1639 + return PTR_ERR(n);
  1640 + old_n = xchg(&rt->dst._neighbour, n);
  1641 + if (old_n)
  1642 + neigh_release(old_n);
1641 1643 if (!n || !(n->nud_state & NUD_VALID)) {
1642 1644 if (n)
1643 1645 neigh_event_send(n, NULL);
... ... @@ -656,7 +656,7 @@
656 656 * layer address of our nexhop router
657 657 */
658 658  
659   - if (dst_get_neighbour(&rt->dst) == NULL)
  659 + if (dst_get_neighbour_raw(&rt->dst) == NULL)
660 660 ifa->flags &= ~IFA_F_OPTIMISTIC;
661 661  
662 662 ifa->idev = idev;
... ... @@ -1455,7 +1455,7 @@
1455 1455 RT6_TRACE("aging clone %p\n", rt);
1456 1456 return -1;
1457 1457 } else if ((rt->rt6i_flags & RTF_GATEWAY) &&
1458   - (!(dst_get_neighbour(&rt->dst)->flags & NTF_ROUTER))) {
  1458 + (!(dst_get_neighbour_raw(&rt->dst)->flags & NTF_ROUTER))) {
1459 1459 RT6_TRACE("purging route %p via non-router but gateway\n",
1460 1460 rt);
1461 1461 return -1;
net/ipv6/ip6_output.c
... ... @@ -135,10 +135,15 @@
135 135 skb->len);
136 136 }
137 137  
  138 + rcu_read_lock();
138 139 neigh = dst_get_neighbour(dst);
139   - if (neigh)
140   - return neigh_output(neigh, skb);
  140 + if (neigh) {
  141 + int res = neigh_output(neigh, skb);
141 142  
  143 + rcu_read_unlock();
  144 + return res;
  145 + }
  146 + rcu_read_unlock();
142 147 IP6_INC_STATS_BH(dev_net(dst->dev),
143 148 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
144 149 kfree_skb(skb);
145 150  
... ... @@ -975,12 +980,14 @@
975 980 * dst entry and replace it instead with the
976 981 * dst entry of the nexthop router
977 982 */
  983 + rcu_read_lock();
978 984 n = dst_get_neighbour(*dst);
979 985 if (n && !(n->nud_state & NUD_VALID)) {
980 986 struct inet6_ifaddr *ifp;
981 987 struct flowi6 fl_gw6;
982 988 int redirect;
983 989  
  990 + rcu_read_unlock();
984 991 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
985 992 (*dst)->dev, 1);
986 993  
... ... @@ -1000,6 +1007,8 @@
1000 1007 if ((err = (*dst)->error))
1001 1008 goto out_err_release;
1002 1009 }
  1010 + } else {
  1011 + rcu_read_unlock();
1003 1012 }
1004 1013 #endif
1005 1014  
... ... @@ -364,7 +364,7 @@
364 364 #ifdef CONFIG_IPV6_ROUTER_PREF
365 365 static void rt6_probe(struct rt6_info *rt)
366 366 {
367   - struct neighbour *neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
  367 + struct neighbour *neigh;
368 368 /*
369 369 * Okay, this does not seem to be appropriate
370 370 * for now, however, we need to check if it
371 371  
... ... @@ -373,8 +373,10 @@
373 373 * Router Reachability Probe MUST be rate-limited
374 374 * to no more than one per minute.
375 375 */
  376 + rcu_read_lock();
  377 + neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
376 378 if (!neigh || (neigh->nud_state & NUD_VALID))
377   - return;
  379 + goto out;
378 380 read_lock_bh(&neigh->lock);
379 381 if (!(neigh->nud_state & NUD_VALID) &&
380 382 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
381 383  
... ... @@ -387,8 +389,11 @@
387 389 target = (struct in6_addr *)&neigh->primary_key;
388 390 addrconf_addr_solict_mult(target, &mcaddr);
389 391 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
390   - } else
  392 + } else {
391 393 read_unlock_bh(&neigh->lock);
  394 + }
  395 +out:
  396 + rcu_read_unlock();
392 397 }
393 398 #else
394 399 static inline void rt6_probe(struct rt6_info *rt)
395 400  
... ... @@ -412,8 +417,11 @@
412 417  
413 418 static inline int rt6_check_neigh(struct rt6_info *rt)
414 419 {
415   - struct neighbour *neigh = dst_get_neighbour(&rt->dst);
  420 + struct neighbour *neigh;
416 421 int m;
  422 +
  423 + rcu_read_lock();
  424 + neigh = dst_get_neighbour(&rt->dst);
417 425 if (rt->rt6i_flags & RTF_NONEXTHOP ||
418 426 !(rt->rt6i_flags & RTF_GATEWAY))
419 427 m = 1;
... ... @@ -430,6 +438,7 @@
430 438 read_unlock_bh(&neigh->lock);
431 439 } else
432 440 m = 0;
  441 + rcu_read_unlock();
433 442 return m;
434 443 }
435 444  
... ... @@ -769,7 +778,7 @@
769 778 rt->rt6i_dst.plen = 128;
770 779 rt->rt6i_flags |= RTF_CACHE;
771 780 rt->dst.flags |= DST_HOST;
772   - dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour(&ort->dst)));
  781 + dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
773 782 }
774 783 return rt;
775 784 }
... ... @@ -803,7 +812,7 @@
803 812 dst_hold(&rt->dst);
804 813 read_unlock_bh(&table->tb6_lock);
805 814  
806   - if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
  815 + if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
807 816 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
808 817 else if (!(rt->dst.flags & DST_HOST))
809 818 nrt = rt6_alloc_clone(rt, &fl6->daddr);
... ... @@ -1587,7 +1596,7 @@
1587 1596 dst_confirm(&rt->dst);
1588 1597  
1589 1598 /* Duplicate redirect: silently ignore. */
1590   - if (neigh == dst_get_neighbour(&rt->dst))
  1599 + if (neigh == dst_get_neighbour_raw(&rt->dst))
1591 1600 goto out;
1592 1601  
1593 1602 nrt = ip6_rt_copy(rt, dest);
... ... @@ -1682,7 +1691,7 @@
1682 1691 1. It is connected route. Action: COW
1683 1692 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1684 1693 */
1685   - if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
  1694 + if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1686 1695 nrt = rt6_alloc_cow(rt, daddr, saddr);
1687 1696 else
1688 1697 nrt = rt6_alloc_clone(rt, daddr);
... ... @@ -2326,6 +2335,7 @@
2326 2335 struct nlmsghdr *nlh;
2327 2336 long expires;
2328 2337 u32 table;
  2338 + struct neighbour *n;
2329 2339  
2330 2340 if (prefix) { /* user wants prefix routes only */
2331 2341 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
... ... @@ -2414,8 +2424,11 @@
2414 2424 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2415 2425 goto nla_put_failure;
2416 2426  
2417   - if (dst_get_neighbour(&rt->dst))
2418   - NLA_PUT(skb, RTA_GATEWAY, 16, &dst_get_neighbour(&rt->dst)->primary_key);
  2427 + rcu_read_lock();
  2428 + n = dst_get_neighbour(&rt->dst);
  2429 + if (n)
  2430 + NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
  2431 + rcu_read_unlock();
2419 2432  
2420 2433 if (rt->dst.dev)
2421 2434 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2422 2435  
... ... @@ -2608,12 +2621,14 @@
2608 2621 #else
2609 2622 seq_puts(m, "00000000000000000000000000000000 00 ");
2610 2623 #endif
  2624 + rcu_read_lock();
2611 2625 n = dst_get_neighbour(&rt->dst);
2612 2626 if (n) {
2613 2627 seq_printf(m, "%pi6", n->primary_key);
2614 2628 } else {
2615 2629 seq_puts(m, "00000000000000000000000000000000");
2616 2630 }
  2631 + rcu_read_unlock();
2617 2632 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2618 2633 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2619 2634 rt->dst.__use, rt->rt6i_flags,