Commit 923f4902fefdf4e89b0fb32c4e069d4f57d704f5
Committed by
David S. Miller
1 parent
642d628b2c
[NETFILTER]: nf_conntrack: properly use RCU API for nf_ct_protos/nf_ct_l3protos arrays
Replace preempt_{enable,disable} based RCU by proper use of the RCU API and add missing rcu_read_lock/rcu_read_unlock calls in all paths not obviously only used within packet process context (nfnetlink_conntrack). Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 6 changed files with 44 additions and 30 deletions Side-by-side Diff
include/net/netfilter/nf_conntrack_l3proto.h
net/ipv4/netfilter/nf_conntrack_proto_icmp.c
... | ... | @@ -170,7 +170,9 @@ |
170 | 170 | return -NF_ACCEPT; |
171 | 171 | } |
172 | 172 | |
173 | + /* rcu_read_lock()ed by nf_hook_slow */ | |
173 | 174 | innerproto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); |
175 | + | |
174 | 176 | dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp); |
175 | 177 | /* Are they talking about one of our connections? */ |
176 | 178 | if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET, |
net/ipv4/netfilter/nf_nat_core.c
... | ... | @@ -429,6 +429,7 @@ |
429 | 429 | struct icmphdr icmp; |
430 | 430 | struct iphdr ip; |
431 | 431 | } *inside; |
432 | + struct nf_conntrack_l4proto *l4proto; | |
432 | 433 | struct nf_conntrack_tuple inner, target; |
433 | 434 | int hdrlen = (*pskb)->nh.iph->ihl * 4; |
434 | 435 | enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); |
435 | 436 | |
... | ... | @@ -464,16 +465,16 @@ |
464 | 465 | DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n", |
465 | 466 | *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); |
466 | 467 | |
468 | + /* rcu_read_lock()ed by nf_hook_slow */ | |
469 | + l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); | |
470 | + | |
467 | 471 | if (!nf_ct_get_tuple(*pskb, |
468 | 472 | (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr), |
469 | 473 | (*pskb)->nh.iph->ihl*4 + |
470 | 474 | sizeof(struct icmphdr) + inside->ip.ihl*4, |
471 | 475 | (u_int16_t)AF_INET, |
472 | 476 | inside->ip.protocol, |
473 | - &inner, | |
474 | - l3proto, | |
475 | - __nf_ct_l4proto_find((u_int16_t)PF_INET, | |
476 | - inside->ip.protocol))) | |
477 | + &inner, l3proto, l4proto)) | |
477 | 478 | return 0; |
478 | 479 | |
479 | 480 | /* Change inner back to look like incoming packet. We do the |
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
net/netfilter/nf_conntrack_core.c
... | ... | @@ -332,13 +332,16 @@ |
332 | 332 | /* To make sure we don't get any weird locking issues here: |
333 | 333 | * destroy_conntrack() MUST NOT be called with a write lock |
334 | 334 | * to nf_conntrack_lock!!! -HW */ |
335 | + rcu_read_lock(); | |
335 | 336 | l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num); |
336 | 337 | if (l3proto && l3proto->destroy) |
337 | 338 | l3proto->destroy(ct); |
338 | 339 | |
339 | - l4proto = __nf_ct_l4proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); | |
340 | + l4proto = __nf_ct_l4proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, | |
341 | + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); | |
340 | 342 | if (l4proto && l4proto->destroy) |
341 | 343 | l4proto->destroy(ct); |
344 | + rcu_read_unlock(); | |
342 | 345 | |
343 | 346 | if (nf_conntrack_destroyed) |
344 | 347 | nf_conntrack_destroyed(ct); |
345 | 348 | |
346 | 349 | |
... | ... | @@ -647,9 +650,14 @@ |
647 | 650 | const struct nf_conntrack_tuple *repl) |
648 | 651 | { |
649 | 652 | struct nf_conntrack_l3proto *l3proto; |
653 | + struct nf_conn *ct; | |
650 | 654 | |
655 | + rcu_read_lock(); | |
651 | 656 | l3proto = __nf_ct_l3proto_find(orig->src.l3num); |
652 | - return __nf_conntrack_alloc(orig, repl, l3proto, 0); | |
657 | + ct = __nf_conntrack_alloc(orig, repl, l3proto, 0); | |
658 | + rcu_read_unlock(); | |
659 | + | |
660 | + return ct; | |
653 | 661 | } |
654 | 662 | EXPORT_SYMBOL_GPL(nf_conntrack_alloc); |
655 | 663 | |
656 | 664 | |
... | ... | @@ -817,7 +825,9 @@ |
817 | 825 | return NF_ACCEPT; |
818 | 826 | } |
819 | 827 | |
828 | + /* rcu_read_lock()ed by nf_hook_slow */ | |
820 | 829 | l3proto = __nf_ct_l3proto_find((u_int16_t)pf); |
830 | + | |
821 | 831 | if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) { |
822 | 832 | DEBUGP("not prepared to track yet or error occured\n"); |
823 | 833 | return -ret; |
... | ... | @@ -872,10 +882,15 @@ |
872 | 882 | int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, |
873 | 883 | const struct nf_conntrack_tuple *orig) |
874 | 884 | { |
875 | - return nf_ct_invert_tuple(inverse, orig, | |
876 | - __nf_ct_l3proto_find(orig->src.l3num), | |
877 | - __nf_ct_l4proto_find(orig->src.l3num, | |
878 | - orig->dst.protonum)); | |
885 | + int ret; | |
886 | + | |
887 | + rcu_read_lock(); | |
888 | + ret = nf_ct_invert_tuple(inverse, orig, | |
889 | + __nf_ct_l3proto_find(orig->src.l3num), | |
890 | + __nf_ct_l4proto_find(orig->src.l3num, | |
891 | + orig->dst.protonum)); | |
892 | + rcu_read_unlock(); | |
893 | + return ret; | |
879 | 894 | } |
880 | 895 | EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr); |
881 | 896 |
net/netfilter/nf_conntrack_proto.c
... | ... | @@ -66,7 +66,7 @@ |
66 | 66 | if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL)) |
67 | 67 | return &nf_conntrack_l4proto_generic; |
68 | 68 | |
69 | - return nf_ct_protos[l3proto][l4proto]; | |
69 | + return rcu_dereference(nf_ct_protos[l3proto][l4proto]); | |
70 | 70 | } |
71 | 71 | EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find); |
72 | 72 | |
73 | 73 | |
... | ... | @@ -77,11 +77,11 @@ |
77 | 77 | { |
78 | 78 | struct nf_conntrack_l4proto *p; |
79 | 79 | |
80 | - preempt_disable(); | |
80 | + rcu_read_lock(); | |
81 | 81 | p = __nf_ct_l4proto_find(l3proto, l4proto); |
82 | 82 | if (!try_module_get(p->me)) |
83 | 83 | p = &nf_conntrack_l4proto_generic; |
84 | - preempt_enable(); | |
84 | + rcu_read_unlock(); | |
85 | 85 | |
86 | 86 | return p; |
87 | 87 | } |
88 | 88 | |
... | ... | @@ -98,11 +98,11 @@ |
98 | 98 | { |
99 | 99 | struct nf_conntrack_l3proto *p; |
100 | 100 | |
101 | - preempt_disable(); | |
101 | + rcu_read_lock(); | |
102 | 102 | p = __nf_ct_l3proto_find(l3proto); |
103 | 103 | if (!try_module_get(p->me)) |
104 | 104 | p = &nf_conntrack_l3proto_generic; |
105 | - preempt_enable(); | |
105 | + rcu_read_unlock(); | |
106 | 106 | |
107 | 107 | return p; |
108 | 108 | } |
109 | 109 | |
... | ... | @@ -137,10 +137,8 @@ |
137 | 137 | { |
138 | 138 | struct nf_conntrack_l3proto *p; |
139 | 139 | |
140 | - preempt_disable(); | |
140 | + /* rcu_read_lock not necessary since the caller holds a reference */ | |
141 | 141 | p = __nf_ct_l3proto_find(l3proto); |
142 | - preempt_enable(); | |
143 | - | |
144 | 142 | module_put(p->me); |
145 | 143 | } |
146 | 144 | EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put); |
... | ... | @@ -202,7 +200,7 @@ |
202 | 200 | ret = -EBUSY; |
203 | 201 | goto out_unlock; |
204 | 202 | } |
205 | - nf_ct_l3protos[proto->l3proto] = proto; | |
203 | + rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto); | |
206 | 204 | write_unlock_bh(&nf_conntrack_lock); |
207 | 205 | |
208 | 206 | ret = nf_ct_l3proto_register_sysctl(proto); |
209 | 207 | |
210 | 208 | |
... | ... | @@ -233,14 +231,13 @@ |
233 | 231 | goto out; |
234 | 232 | } |
235 | 233 | |
236 | - nf_ct_l3protos[proto->l3proto] = &nf_conntrack_l3proto_generic; | |
234 | + rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], | |
235 | + &nf_conntrack_l3proto_generic); | |
237 | 236 | write_unlock_bh(&nf_conntrack_lock); |
237 | + synchronize_rcu(); | |
238 | 238 | |
239 | 239 | nf_ct_l3proto_unregister_sysctl(proto); |
240 | 240 | |
241 | - /* Somebody could be still looking at the proto in bh. */ | |
242 | - synchronize_net(); | |
243 | - | |
244 | 241 | /* Remove all contrack entries for this protocol */ |
245 | 242 | nf_ct_iterate_cleanup(kill_l3proto, proto); |
246 | 243 | |
... | ... | @@ -356,7 +353,7 @@ |
356 | 353 | goto retry; |
357 | 354 | } |
358 | 355 | |
359 | - nf_ct_protos[l4proto->l3proto][l4proto->l4proto] = l4proto; | |
356 | + rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], l4proto); | |
360 | 357 | write_unlock_bh(&nf_conntrack_lock); |
361 | 358 | |
362 | 359 | ret = nf_ct_l4proto_register_sysctl(l4proto); |
363 | 360 | |
364 | 361 | |
... | ... | @@ -392,14 +389,12 @@ |
392 | 389 | ret = -EBUSY; |
393 | 390 | goto out; |
394 | 391 | } |
395 | - nf_ct_protos[l4proto->l3proto][l4proto->l4proto] | |
396 | - = &nf_conntrack_l4proto_generic; | |
392 | + rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], | |
393 | + &nf_conntrack_l4proto_generic); | |
397 | 394 | write_unlock_bh(&nf_conntrack_lock); |
395 | + synchronize_rcu(); | |
398 | 396 | |
399 | 397 | nf_ct_l4proto_unregister_sysctl(l4proto); |
400 | - | |
401 | - /* Somebody could be still looking at the proto in bh. */ | |
402 | - synchronize_net(); | |
403 | 398 | |
404 | 399 | /* Remove all contrack entries for this protocol */ |
405 | 400 | nf_ct_iterate_cleanup(kill_l4proto, l4proto); |