Commit 923f4902fefdf4e89b0fb32c4e069d4f57d704f5

Authored by Patrick McHardy
Committed by David S. Miller
1 parent 642d628b2c

[NETFILTER]: nf_conntrack: properly use RCU API for nf_ct_protos/nf_ct_l3protos arrays

Replace preempt_{enable,disable} based RCU by proper use of the
RCU API and add missing rcu_read_lock/rcu_read_unlock calls in
all paths not obviously only used within packet process context
(nfnetlink_conntrack).

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 6 changed files with 44 additions and 30 deletions Side-by-side Diff

include/net/netfilter/nf_conntrack_l3proto.h
... ... @@ -106,7 +106,7 @@
106 106 {
107 107 if (unlikely(l3proto >= AF_MAX))
108 108 return &nf_conntrack_l3proto_generic;
109   - return nf_ct_l3protos[l3proto];
  109 + return rcu_dereference(nf_ct_l3protos[l3proto]);
110 110 }
111 111  
112 112 #endif /*_NF_CONNTRACK_L3PROTO_H*/
net/ipv4/netfilter/nf_conntrack_proto_icmp.c
... ... @@ -170,7 +170,9 @@
170 170 return -NF_ACCEPT;
171 171 }
172 172  
  173 + /* rcu_read_lock()ed by nf_hook_slow */
173 174 innerproto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
  175 +
174 176 dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
175 177 /* Are they talking about one of our connections? */
176 178 if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
net/ipv4/netfilter/nf_nat_core.c
... ... @@ -429,6 +429,7 @@
429 429 struct icmphdr icmp;
430 430 struct iphdr ip;
431 431 } *inside;
  432 + struct nf_conntrack_l4proto *l4proto;
432 433 struct nf_conntrack_tuple inner, target;
433 434 int hdrlen = (*pskb)->nh.iph->ihl * 4;
434 435 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
435 436  
... ... @@ -464,16 +465,16 @@
464 465 DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
465 466 *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
466 467  
  468 + /* rcu_read_lock()ed by nf_hook_slow */
  469 + l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
  470 +
467 471 if (!nf_ct_get_tuple(*pskb,
468 472 (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr),
469 473 (*pskb)->nh.iph->ihl*4 +
470 474 sizeof(struct icmphdr) + inside->ip.ihl*4,
471 475 (u_int16_t)AF_INET,
472 476 inside->ip.protocol,
473   - &inner,
474   - l3proto,
475   - __nf_ct_l4proto_find((u_int16_t)PF_INET,
476   - inside->ip.protocol)))
  477 + &inner, l3proto, l4proto))
477 478 return 0;
478 479  
479 480 /* Change inner back to look like incoming packet. We do the
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
... ... @@ -182,6 +182,7 @@
182 182 return -NF_ACCEPT;
183 183 }
184 184  
  185 + /* rcu_read_lock()ed by nf_hook_slow */
185 186 inproto = __nf_ct_l4proto_find(PF_INET6, inprotonum);
186 187  
187 188 /* Are they talking about one of our connections? */
net/netfilter/nf_conntrack_core.c
... ... @@ -332,13 +332,16 @@
332 332 /* To make sure we don't get any weird locking issues here:
333 333 * destroy_conntrack() MUST NOT be called with a write lock
334 334 * to nf_conntrack_lock!!! -HW */
  335 + rcu_read_lock();
335 336 l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num);
336 337 if (l3proto && l3proto->destroy)
337 338 l3proto->destroy(ct);
338 339  
339   - l4proto = __nf_ct_l4proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
  340 + l4proto = __nf_ct_l4proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num,
  341 + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
340 342 if (l4proto && l4proto->destroy)
341 343 l4proto->destroy(ct);
  344 + rcu_read_unlock();
342 345  
343 346 if (nf_conntrack_destroyed)
344 347 nf_conntrack_destroyed(ct);
345 348  
346 349  
... ... @@ -647,9 +650,14 @@
647 650 const struct nf_conntrack_tuple *repl)
648 651 {
649 652 struct nf_conntrack_l3proto *l3proto;
  653 + struct nf_conn *ct;
650 654  
  655 + rcu_read_lock();
651 656 l3proto = __nf_ct_l3proto_find(orig->src.l3num);
652   - return __nf_conntrack_alloc(orig, repl, l3proto, 0);
  657 + ct = __nf_conntrack_alloc(orig, repl, l3proto, 0);
  658 + rcu_read_unlock();
  659 +
  660 + return ct;
653 661 }
654 662 EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
655 663  
656 664  
... ... @@ -817,7 +825,9 @@
817 825 return NF_ACCEPT;
818 826 }
819 827  
  828 + /* rcu_read_lock()ed by nf_hook_slow */
820 829 l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
  830 +
821 831 if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) {
822 832 DEBUGP("not prepared to track yet or error occured\n");
823 833 return -ret;
... ... @@ -872,10 +882,15 @@
872 882 int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
873 883 const struct nf_conntrack_tuple *orig)
874 884 {
875   - return nf_ct_invert_tuple(inverse, orig,
876   - __nf_ct_l3proto_find(orig->src.l3num),
877   - __nf_ct_l4proto_find(orig->src.l3num,
878   - orig->dst.protonum));
  885 + int ret;
  886 +
  887 + rcu_read_lock();
  888 + ret = nf_ct_invert_tuple(inverse, orig,
  889 + __nf_ct_l3proto_find(orig->src.l3num),
  890 + __nf_ct_l4proto_find(orig->src.l3num,
  891 + orig->dst.protonum));
  892 + rcu_read_unlock();
  893 + return ret;
879 894 }
880 895 EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr);
881 896  
net/netfilter/nf_conntrack_proto.c
... ... @@ -66,7 +66,7 @@
66 66 if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL))
67 67 return &nf_conntrack_l4proto_generic;
68 68  
69   - return nf_ct_protos[l3proto][l4proto];
  69 + return rcu_dereference(nf_ct_protos[l3proto][l4proto]);
70 70 }
71 71 EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
72 72  
73 73  
... ... @@ -77,11 +77,11 @@
77 77 {
78 78 struct nf_conntrack_l4proto *p;
79 79  
80   - preempt_disable();
  80 + rcu_read_lock();
81 81 p = __nf_ct_l4proto_find(l3proto, l4proto);
82 82 if (!try_module_get(p->me))
83 83 p = &nf_conntrack_l4proto_generic;
84   - preempt_enable();
  84 + rcu_read_unlock();
85 85  
86 86 return p;
87 87 }
88 88  
... ... @@ -98,11 +98,11 @@
98 98 {
99 99 struct nf_conntrack_l3proto *p;
100 100  
101   - preempt_disable();
  101 + rcu_read_lock();
102 102 p = __nf_ct_l3proto_find(l3proto);
103 103 if (!try_module_get(p->me))
104 104 p = &nf_conntrack_l3proto_generic;
105   - preempt_enable();
  105 + rcu_read_unlock();
106 106  
107 107 return p;
108 108 }
109 109  
... ... @@ -137,10 +137,8 @@
137 137 {
138 138 struct nf_conntrack_l3proto *p;
139 139  
140   - preempt_disable();
  140 + /* rcu_read_lock not necessary since the caller holds a reference */
141 141 p = __nf_ct_l3proto_find(l3proto);
142   - preempt_enable();
143   -
144 142 module_put(p->me);
145 143 }
146 144 EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
... ... @@ -202,7 +200,7 @@
202 200 ret = -EBUSY;
203 201 goto out_unlock;
204 202 }
205   - nf_ct_l3protos[proto->l3proto] = proto;
  203 + rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
206 204 write_unlock_bh(&nf_conntrack_lock);
207 205  
208 206 ret = nf_ct_l3proto_register_sysctl(proto);
209 207  
210 208  
... ... @@ -233,14 +231,13 @@
233 231 goto out;
234 232 }
235 233  
236   - nf_ct_l3protos[proto->l3proto] = &nf_conntrack_l3proto_generic;
  234 + rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
  235 + &nf_conntrack_l3proto_generic);
237 236 write_unlock_bh(&nf_conntrack_lock);
  237 + synchronize_rcu();
238 238  
239 239 nf_ct_l3proto_unregister_sysctl(proto);
240 240  
241   - /* Somebody could be still looking at the proto in bh. */
242   - synchronize_net();
243   -
244 241 /* Remove all contrack entries for this protocol */
245 242 nf_ct_iterate_cleanup(kill_l3proto, proto);
246 243  
... ... @@ -356,7 +353,7 @@
356 353 goto retry;
357 354 }
358 355  
359   - nf_ct_protos[l4proto->l3proto][l4proto->l4proto] = l4proto;
  356 + rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], l4proto);
360 357 write_unlock_bh(&nf_conntrack_lock);
361 358  
362 359 ret = nf_ct_l4proto_register_sysctl(l4proto);
363 360  
364 361  
... ... @@ -392,14 +389,12 @@
392 389 ret = -EBUSY;
393 390 goto out;
394 391 }
395   - nf_ct_protos[l4proto->l3proto][l4proto->l4proto]
396   - = &nf_conntrack_l4proto_generic;
  392 + rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
  393 + &nf_conntrack_l4proto_generic);
397 394 write_unlock_bh(&nf_conntrack_lock);
  395 + synchronize_rcu();
398 396  
399 397 nf_ct_l4proto_unregister_sysctl(l4proto);
400   -
401   - /* Somebody could be still looking at the proto in bh. */
402   - synchronize_net();
403 398  
404 399 /* Remove all contrack entries for this protocol */
405 400 nf_ct_iterate_cleanup(kill_l4proto, l4proto);