Commit ca7433df3a672efc88e08222cfa4b3aa965ca324

Authored by Jesper Dangaard Brouer
Committed by Pablo Neira Ayuso
1 parent e1b207dac1

netfilter: conntrack: seperate expect locking from nf_conntrack_lock

Netfilter expectations are protected with the same lock as conntrack
entries (nf_conntrack_lock).  This patch split out expectations locking
to use it's own lock (nf_conntrack_expect_lock).

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

Showing 7 changed files with 79 additions and 69 deletions Side-by-side Diff

include/net/netfilter/nf_conntrack_core.h
... ... @@ -79,5 +79,7 @@
79 79  
80 80 extern spinlock_t nf_conntrack_lock ;
81 81  
  82 +extern spinlock_t nf_conntrack_expect_lock;
  83 +
82 84 #endif /* _NF_CONNTRACK_CORE_H */
net/netfilter/nf_conntrack_core.c
... ... @@ -63,6 +63,9 @@
63 63 DEFINE_SPINLOCK(nf_conntrack_lock);
64 64 EXPORT_SYMBOL_GPL(nf_conntrack_lock);
65 65  
  66 +__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
  67 +EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
  68 +
66 69 unsigned int nf_conntrack_htable_size __read_mostly;
67 70 EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
68 71  
... ... @@ -247,9 +250,6 @@
247 250 NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
248 251 NF_CT_ASSERT(!timer_pending(&ct->timeout));
249 252  
250   - /* To make sure we don't get any weird locking issues here:
251   - * destroy_conntrack() MUST NOT be called with a write lock
252   - * to nf_conntrack_lock!!! -HW */
253 253 rcu_read_lock();
254 254 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
255 255 if (l4proto && l4proto->destroy)
256 256  
257 257  
... ... @@ -257,17 +257,18 @@
257 257  
258 258 rcu_read_unlock();
259 259  
260   - spin_lock_bh(&nf_conntrack_lock);
  260 + local_bh_disable();
261 261 /* Expectations will have been removed in clean_from_lists,
262 262 * except TFTP can create an expectation on the first packet,
263 263 * before connection is in the list, so we need to clean here,
264   - * too. */
  264 + * too.
  265 + */
265 266 nf_ct_remove_expectations(ct);
266 267  
267 268 nf_ct_del_from_dying_or_unconfirmed_list(ct);
268 269  
269 270 NF_CT_STAT_INC(net, delete);
270   - spin_unlock_bh(&nf_conntrack_lock);
  271 + local_bh_enable();
271 272  
272 273 if (ct->master)
273 274 nf_ct_put(ct->master);
... ... @@ -851,7 +852,7 @@
851 852 struct nf_conn_help *help;
852 853 struct nf_conntrack_tuple repl_tuple;
853 854 struct nf_conntrack_ecache *ecache;
854   - struct nf_conntrack_expect *exp;
  855 + struct nf_conntrack_expect *exp = NULL;
855 856 u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
856 857 struct nf_conn_timeout *timeout_ext;
857 858 unsigned int *timeouts;
858 859  
859 860  
860 861  
... ... @@ -895,30 +896,35 @@
895 896 ecache ? ecache->expmask : 0,
896 897 GFP_ATOMIC);
897 898  
898   - spin_lock_bh(&nf_conntrack_lock);
899   - exp = nf_ct_find_expectation(net, zone, tuple);
900   - if (exp) {
901   - pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
902   - ct, exp);
903   - /* Welcome, Mr. Bond. We've been expecting you... */
904   - __set_bit(IPS_EXPECTED_BIT, &ct->status);
905   - /* exp->master safe, refcnt bumped in nf_ct_find_expectation */
906   - ct->master = exp->master;
907   - if (exp->helper) {
908   - help = nf_ct_helper_ext_add(ct, exp->helper,
909   - GFP_ATOMIC);
910   - if (help)
911   - rcu_assign_pointer(help->helper, exp->helper);
912   - }
  899 + local_bh_disable();
  900 + if (net->ct.expect_count) {
  901 + spin_lock(&nf_conntrack_expect_lock);
  902 + exp = nf_ct_find_expectation(net, zone, tuple);
  903 + if (exp) {
  904 + pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
  905 + ct, exp);
  906 + /* Welcome, Mr. Bond. We've been expecting you... */
  907 + __set_bit(IPS_EXPECTED_BIT, &ct->status);
  908 + /* exp->master safe, refcnt bumped in nf_ct_find_expectation */
  909 + ct->master = exp->master;
  910 + if (exp->helper) {
  911 + help = nf_ct_helper_ext_add(ct, exp->helper,
  912 + GFP_ATOMIC);
  913 + if (help)
  914 + rcu_assign_pointer(help->helper, exp->helper);
  915 + }
913 916  
914 917 #ifdef CONFIG_NF_CONNTRACK_MARK
915   - ct->mark = exp->master->mark;
  918 + ct->mark = exp->master->mark;
916 919 #endif
917 920 #ifdef CONFIG_NF_CONNTRACK_SECMARK
918   - ct->secmark = exp->master->secmark;
  921 + ct->secmark = exp->master->secmark;
919 922 #endif
920   - NF_CT_STAT_INC(net, expect_new);
921   - } else {
  923 + NF_CT_STAT_INC(net, expect_new);
  924 + }
  925 + spin_unlock(&nf_conntrack_expect_lock);
  926 + }
  927 + if (!exp) {
922 928 __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
923 929 NF_CT_STAT_INC(net, new);
924 930 }
... ... @@ -927,7 +933,7 @@
927 933 nf_conntrack_get(&ct->ct_general);
928 934 nf_ct_add_to_unconfirmed_list(ct);
929 935  
930   - spin_unlock_bh(&nf_conntrack_lock);
  936 + local_bh_enable();
931 937  
932 938 if (exp) {
933 939 if (exp->expectfn)
net/netfilter/nf_conntrack_expect.c
... ... @@ -66,9 +66,9 @@
66 66 {
67 67 struct nf_conntrack_expect *exp = (void *)ul_expect;
68 68  
69   - spin_lock_bh(&nf_conntrack_lock);
  69 + spin_lock_bh(&nf_conntrack_expect_lock);
70 70 nf_ct_unlink_expect(exp);
71   - spin_unlock_bh(&nf_conntrack_lock);
  71 + spin_unlock_bh(&nf_conntrack_expect_lock);
72 72 nf_ct_expect_put(exp);
73 73 }
74 74  
75 75  
... ... @@ -191,12 +191,14 @@
191 191 if (!help)
192 192 return;
193 193  
  194 + spin_lock_bh(&nf_conntrack_expect_lock);
194 195 hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
195 196 if (del_timer(&exp->timeout)) {
196 197 nf_ct_unlink_expect(exp);
197 198 nf_ct_expect_put(exp);
198 199 }
199 200 }
  201 + spin_unlock_bh(&nf_conntrack_expect_lock);
200 202 }
201 203 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
202 204  
203 205  
... ... @@ -231,12 +233,12 @@
231 233 /* Generally a bad idea to call this: could have matched already. */
232 234 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
233 235 {
234   - spin_lock_bh(&nf_conntrack_lock);
  236 + spin_lock_bh(&nf_conntrack_expect_lock);
235 237 if (del_timer(&exp->timeout)) {
236 238 nf_ct_unlink_expect(exp);
237 239 nf_ct_expect_put(exp);
238 240 }
239   - spin_unlock_bh(&nf_conntrack_lock);
  241 + spin_unlock_bh(&nf_conntrack_expect_lock);
240 242 }
241 243 EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
242 244  
... ... @@ -349,7 +351,7 @@
349 351 setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
350 352 (unsigned long)exp);
351 353 helper = rcu_dereference_protected(master_help->helper,
352   - lockdep_is_held(&nf_conntrack_lock));
  354 + lockdep_is_held(&nf_conntrack_expect_lock));
353 355 if (helper) {
354 356 exp->timeout.expires = jiffies +
355 357 helper->expect_policy[exp->class].timeout * HZ;
... ... @@ -409,7 +411,7 @@
409 411 }
410 412 /* Will be over limit? */
411 413 helper = rcu_dereference_protected(master_help->helper,
412   - lockdep_is_held(&nf_conntrack_lock));
  414 + lockdep_is_held(&nf_conntrack_expect_lock));
413 415 if (helper) {
414 416 p = &helper->expect_policy[expect->class];
415 417 if (p->max_expected &&
... ... @@ -436,7 +438,7 @@
436 438 {
437 439 int ret;
438 440  
439   - spin_lock_bh(&nf_conntrack_lock);
  441 + spin_lock_bh(&nf_conntrack_expect_lock);
440 442 ret = __nf_ct_expect_check(expect);
441 443 if (ret <= 0)
442 444 goto out;
443 445  
... ... @@ -444,11 +446,11 @@
444 446 ret = nf_ct_expect_insert(expect);
445 447 if (ret < 0)
446 448 goto out;
447   - spin_unlock_bh(&nf_conntrack_lock);
  449 + spin_unlock_bh(&nf_conntrack_expect_lock);
448 450 nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
449 451 return ret;
450 452 out:
451   - spin_unlock_bh(&nf_conntrack_lock);
  453 + spin_unlock_bh(&nf_conntrack_expect_lock);
452 454 return ret;
453 455 }
454 456 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
net/netfilter/nf_conntrack_h323_main.c
... ... @@ -1476,7 +1476,7 @@
1476 1476 nf_ct_refresh(ct, skb, info->timeout * HZ);
1477 1477  
1478 1478 /* Set expect timeout */
1479   - spin_lock_bh(&nf_conntrack_lock);
  1479 + spin_lock_bh(&nf_conntrack_expect_lock);
1480 1480 exp = find_expect(ct, &ct->tuplehash[dir].tuple.dst.u3,
1481 1481 info->sig_port[!dir]);
1482 1482 if (exp) {
... ... @@ -1486,7 +1486,7 @@
1486 1486 nf_ct_dump_tuple(&exp->tuple);
1487 1487 set_expect_timeout(exp, info->timeout);
1488 1488 }
1489   - spin_unlock_bh(&nf_conntrack_lock);
  1489 + spin_unlock_bh(&nf_conntrack_expect_lock);
1490 1490 }
1491 1491  
1492 1492 return 0;
net/netfilter/nf_conntrack_helper.c
... ... @@ -250,16 +250,14 @@
250 250 }
251 251 EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper);
252 252  
  253 +/* appropiate ct lock protecting must be taken by caller */
253 254 static inline int unhelp(struct nf_conntrack_tuple_hash *i,
254 255 const struct nf_conntrack_helper *me)
255 256 {
256 257 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
257 258 struct nf_conn_help *help = nfct_help(ct);
258 259  
259   - if (help && rcu_dereference_protected(
260   - help->helper,
261   - lockdep_is_held(&nf_conntrack_lock)
262   - ) == me) {
  260 + if (help && rcu_dereference_raw(help->helper) == me) {
263 261 nf_conntrack_event(IPCT_HELPER, ct);
264 262 RCU_INIT_POINTER(help->helper, NULL);
265 263 }
266 264  
267 265  
268 266  
... ... @@ -284,17 +282,17 @@
284 282  
285 283 void nf_ct_helper_expectfn_register(struct nf_ct_helper_expectfn *n)
286 284 {
287   - spin_lock_bh(&nf_conntrack_lock);
  285 + spin_lock_bh(&nf_conntrack_expect_lock);
288 286 list_add_rcu(&n->head, &nf_ct_helper_expectfn_list);
289   - spin_unlock_bh(&nf_conntrack_lock);
  287 + spin_unlock_bh(&nf_conntrack_expect_lock);
290 288 }
291 289 EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_register);
292 290  
293 291 void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n)
294 292 {
295   - spin_lock_bh(&nf_conntrack_lock);
  293 + spin_lock_bh(&nf_conntrack_expect_lock);
296 294 list_del_rcu(&n->head);
297   - spin_unlock_bh(&nf_conntrack_lock);
  295 + spin_unlock_bh(&nf_conntrack_expect_lock);
298 296 }
299 297 EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister);
300 298  
301 299  
... ... @@ -399,13 +397,14 @@
399 397 int cpu;
400 398  
401 399 /* Get rid of expectations */
  400 + spin_lock_bh(&nf_conntrack_expect_lock);
402 401 for (i = 0; i < nf_ct_expect_hsize; i++) {
403 402 hlist_for_each_entry_safe(exp, next,
404 403 &net->ct.expect_hash[i], hnode) {
405 404 struct nf_conn_help *help = nfct_help(exp->master);
406 405 if ((rcu_dereference_protected(
407 406 help->helper,
408   - lockdep_is_held(&nf_conntrack_lock)
  407 + lockdep_is_held(&nf_conntrack_expect_lock)
409 408 ) == me || exp->helper == me) &&
410 409 del_timer(&exp->timeout)) {
411 410 nf_ct_unlink_expect(exp);
... ... @@ -413,6 +412,7 @@
413 412 }
414 413 }
415 414 }
  415 + spin_unlock_bh(&nf_conntrack_expect_lock);
416 416  
417 417 /* Get rid of expecteds, set helpers to NULL. */
418 418 for_each_possible_cpu(cpu) {
419 419  
... ... @@ -423,10 +423,12 @@
423 423 unhelp(h, me);
424 424 spin_unlock_bh(&pcpu->lock);
425 425 }
  426 + spin_lock_bh(&nf_conntrack_lock);
426 427 for (i = 0; i < net->ct.htable_size; i++) {
427 428 hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
428 429 unhelp(h, me);
429 430 }
  431 + spin_unlock_bh(&nf_conntrack_lock);
430 432 }
431 433  
432 434 void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
433 435  
... ... @@ -444,10 +446,8 @@
444 446 synchronize_rcu();
445 447  
446 448 rtnl_lock();
447   - spin_lock_bh(&nf_conntrack_lock);
448 449 for_each_net(net)
449 450 __nf_conntrack_helper_unregister(me, net);
450   - spin_unlock_bh(&nf_conntrack_lock);
451 451 rtnl_unlock();
452 452 }
453 453 EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
net/netfilter/nf_conntrack_netlink.c
... ... @@ -1376,14 +1376,14 @@
1376 1376 nf_ct_protonum(ct));
1377 1377 if (helper == NULL) {
1378 1378 #ifdef CONFIG_MODULES
1379   - spin_unlock_bh(&nf_conntrack_lock);
  1379 + spin_unlock_bh(&nf_conntrack_expect_lock);
1380 1380  
1381 1381 if (request_module("nfct-helper-%s", helpname) < 0) {
1382   - spin_lock_bh(&nf_conntrack_lock);
  1382 + spin_lock_bh(&nf_conntrack_expect_lock);
1383 1383 return -EOPNOTSUPP;
1384 1384 }
1385 1385  
1386   - spin_lock_bh(&nf_conntrack_lock);
  1386 + spin_lock_bh(&nf_conntrack_expect_lock);
1387 1387 helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
1388 1388 nf_ct_protonum(ct));
1389 1389 if (helper)
1390 1390  
... ... @@ -1821,9 +1821,9 @@
1821 1821 err = -EEXIST;
1822 1822 ct = nf_ct_tuplehash_to_ctrack(h);
1823 1823 if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
1824   - spin_lock_bh(&nf_conntrack_lock);
  1824 + spin_lock_bh(&nf_conntrack_expect_lock);
1825 1825 err = ctnetlink_change_conntrack(ct, cda);
1826   - spin_unlock_bh(&nf_conntrack_lock);
  1826 + spin_unlock_bh(&nf_conntrack_expect_lock);
1827 1827 if (err == 0) {
1828 1828 nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
1829 1829 (1 << IPCT_ASSURED) |
1830 1830  
... ... @@ -2152,9 +2152,9 @@
2152 2152 if (ret < 0)
2153 2153 return ret;
2154 2154  
2155   - spin_lock_bh(&nf_conntrack_lock);
  2155 + spin_lock_bh(&nf_conntrack_expect_lock);
2156 2156 ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct);
2157   - spin_unlock_bh(&nf_conntrack_lock);
  2157 + spin_unlock_bh(&nf_conntrack_expect_lock);
2158 2158  
2159 2159 return ret;
2160 2160 }
2161 2161  
... ... @@ -2709,13 +2709,13 @@
2709 2709 }
2710 2710  
2711 2711 /* after list removal, usage count == 1 */
2712   - spin_lock_bh(&nf_conntrack_lock);
  2712 + spin_lock_bh(&nf_conntrack_expect_lock);
2713 2713 if (del_timer(&exp->timeout)) {
2714 2714 nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid,
2715 2715 nlmsg_report(nlh));
2716 2716 nf_ct_expect_put(exp);
2717 2717 }
2718   - spin_unlock_bh(&nf_conntrack_lock);
  2718 + spin_unlock_bh(&nf_conntrack_expect_lock);
2719 2719 /* have to put what we 'get' above.
2720 2720 * after this line usage count == 0 */
2721 2721 nf_ct_expect_put(exp);
... ... @@ -2724,7 +2724,7 @@
2724 2724 struct nf_conn_help *m_help;
2725 2725  
2726 2726 /* delete all expectations for this helper */
2727   - spin_lock_bh(&nf_conntrack_lock);
  2727 + spin_lock_bh(&nf_conntrack_expect_lock);
2728 2728 for (i = 0; i < nf_ct_expect_hsize; i++) {
2729 2729 hlist_for_each_entry_safe(exp, next,
2730 2730 &net->ct.expect_hash[i],
2731 2731  
... ... @@ -2739,10 +2739,10 @@
2739 2739 }
2740 2740 }
2741 2741 }
2742   - spin_unlock_bh(&nf_conntrack_lock);
  2742 + spin_unlock_bh(&nf_conntrack_expect_lock);
2743 2743 } else {
2744 2744 /* This basically means we have to flush everything*/
2745   - spin_lock_bh(&nf_conntrack_lock);
  2745 + spin_lock_bh(&nf_conntrack_expect_lock);
2746 2746 for (i = 0; i < nf_ct_expect_hsize; i++) {
2747 2747 hlist_for_each_entry_safe(exp, next,
2748 2748 &net->ct.expect_hash[i],
... ... @@ -2755,7 +2755,7 @@
2755 2755 }
2756 2756 }
2757 2757 }
2758   - spin_unlock_bh(&nf_conntrack_lock);
  2758 + spin_unlock_bh(&nf_conntrack_expect_lock);
2759 2759 }
2760 2760  
2761 2761 return 0;
2762 2762  
... ... @@ -2981,11 +2981,11 @@
2981 2981 if (err < 0)
2982 2982 return err;
2983 2983  
2984   - spin_lock_bh(&nf_conntrack_lock);
  2984 + spin_lock_bh(&nf_conntrack_expect_lock);
2985 2985 exp = __nf_ct_expect_find(net, zone, &tuple);
2986 2986  
2987 2987 if (!exp) {
2988   - spin_unlock_bh(&nf_conntrack_lock);
  2988 + spin_unlock_bh(&nf_conntrack_expect_lock);
2989 2989 err = -ENOENT;
2990 2990 if (nlh->nlmsg_flags & NLM_F_CREATE) {
2991 2991 err = ctnetlink_create_expect(net, zone, cda,
... ... @@ -2999,7 +2999,7 @@
2999 2999 err = -EEXIST;
3000 3000 if (!(nlh->nlmsg_flags & NLM_F_EXCL))
3001 3001 err = ctnetlink_change_expect(exp, cda);
3002   - spin_unlock_bh(&nf_conntrack_lock);
  3002 + spin_unlock_bh(&nf_conntrack_expect_lock);
3003 3003  
3004 3004 return err;
3005 3005 }
net/netfilter/nf_conntrack_sip.c
... ... @@ -800,7 +800,7 @@
800 800 struct hlist_node *next;
801 801 int found = 0;
802 802  
803   - spin_lock_bh(&nf_conntrack_lock);
  803 + spin_lock_bh(&nf_conntrack_expect_lock);
804 804 hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
805 805 if (exp->class != SIP_EXPECT_SIGNALLING ||
806 806 !nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) ||
... ... @@ -815,7 +815,7 @@
815 815 found = 1;
816 816 break;
817 817 }
818   - spin_unlock_bh(&nf_conntrack_lock);
  818 + spin_unlock_bh(&nf_conntrack_expect_lock);
819 819 return found;
820 820 }
821 821  
... ... @@ -825,7 +825,7 @@
825 825 struct nf_conntrack_expect *exp;
826 826 struct hlist_node *next;
827 827  
828   - spin_lock_bh(&nf_conntrack_lock);
  828 + spin_lock_bh(&nf_conntrack_expect_lock);
829 829 hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
830 830 if ((exp->class != SIP_EXPECT_SIGNALLING) ^ media)
831 831 continue;
... ... @@ -836,7 +836,7 @@
836 836 if (!media)
837 837 break;
838 838 }
839   - spin_unlock_bh(&nf_conntrack_lock);
  839 + spin_unlock_bh(&nf_conntrack_expect_lock);
840 840 }
841 841  
842 842 static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,