Commit 64b87639c9cbeb03e26bc65528416c961b1dde96
Committed by
Pablo Neira Ayuso
1 parent
a90a6e55f3
netfilter: conntrack: fix race between nf_conntrack proc read and hash resize
When we do "cat /proc/net/nf_conntrack", and meanwhile resize the conntrack hash table via /sys/module/nf_conntrack/parameters/hashsize, race will happen, because reader can observe a newly allocated hash but the old size (or vice versa). So oops will happen like follows: BUG: unable to handle kernel NULL pointer dereference at 0000000000000017 IP: [<ffffffffa0418e21>] seq_print_acct+0x11/0x50 [nf_conntrack] Call Trace: [<ffffffffa0412f4e>] ? ct_seq_show+0x14e/0x340 [nf_conntrack] [<ffffffff81261a1c>] seq_read+0x2cc/0x390 [<ffffffff812a8d62>] proc_reg_read+0x42/0x70 [<ffffffff8123bee7>] __vfs_read+0x37/0x130 [<ffffffff81347980>] ? security_file_permission+0xa0/0xc0 [<ffffffff8123cf75>] vfs_read+0x95/0x140 [<ffffffff8123e475>] SyS_read+0x55/0xc0 [<ffffffff817c2572>] entry_SYSCALL_64_fastpath+0x1a/0xa4 It is very easy to reproduce this kernel crash. 1. open one shell and input the following cmds: while : ; do echo $RANDOM > /sys/module/nf_conntrack/parameters/hashsize done 2. open more shells and input the following cmds: while : ; do cat /proc/net/nf_conntrack done 3. just wait a monent, oops will happen soon. The solution in this patch is based on Florian's Commit 5e3c61f98175 ("netfilter: conntrack: fix lookup race during hash resize"). And add a wrapper function nf_conntrack_get_ht to get hash and hsize suggested by Florian Westphal. Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Showing 4 changed files with 38 additions and 9 deletions Side-by-side Diff
include/net/netfilter/nf_conntrack_core.h
... | ... | @@ -51,6 +51,8 @@ |
51 | 51 | const struct nf_conntrack_l3proto *l3proto, |
52 | 52 | const struct nf_conntrack_l4proto *l4proto); |
53 | 53 | |
54 | +void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize); | |
55 | + | |
54 | 56 | /* Find a connection corresponding to a tuple. */ |
55 | 57 | struct nf_conntrack_tuple_hash * |
56 | 58 | nf_conntrack_find_get(struct net *net, |
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
... | ... | @@ -26,6 +26,8 @@ |
26 | 26 | |
27 | 27 | struct ct_iter_state { |
28 | 28 | struct seq_net_private p; |
29 | + struct hlist_nulls_head *hash; | |
30 | + unsigned int htable_size; | |
29 | 31 | unsigned int bucket; |
30 | 32 | }; |
31 | 33 | |
32 | 34 | |
... | ... | @@ -35,10 +37,10 @@ |
35 | 37 | struct hlist_nulls_node *n; |
36 | 38 | |
37 | 39 | for (st->bucket = 0; |
38 | - st->bucket < nf_conntrack_htable_size; | |
40 | + st->bucket < st->htable_size; | |
39 | 41 | st->bucket++) { |
40 | 42 | n = rcu_dereference( |
41 | - hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket])); | |
43 | + hlist_nulls_first_rcu(&st->hash[st->bucket])); | |
42 | 44 | if (!is_a_nulls(n)) |
43 | 45 | return n; |
44 | 46 | } |
45 | 47 | |
... | ... | @@ -53,11 +55,11 @@ |
53 | 55 | head = rcu_dereference(hlist_nulls_next_rcu(head)); |
54 | 56 | while (is_a_nulls(head)) { |
55 | 57 | if (likely(get_nulls_value(head) == st->bucket)) { |
56 | - if (++st->bucket >= nf_conntrack_htable_size) | |
58 | + if (++st->bucket >= st->htable_size) | |
57 | 59 | return NULL; |
58 | 60 | } |
59 | 61 | head = rcu_dereference( |
60 | - hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket])); | |
62 | + hlist_nulls_first_rcu(&st->hash[st->bucket])); | |
61 | 63 | } |
62 | 64 | return head; |
63 | 65 | } |
64 | 66 | |
... | ... | @@ -75,7 +77,11 @@ |
75 | 77 | static void *ct_seq_start(struct seq_file *seq, loff_t *pos) |
76 | 78 | __acquires(RCU) |
77 | 79 | { |
80 | + struct ct_iter_state *st = seq->private; | |
81 | + | |
78 | 82 | rcu_read_lock(); |
83 | + | |
84 | + nf_conntrack_get_ht(&st->hash, &st->htable_size); | |
79 | 85 | return ct_get_idx(seq, *pos); |
80 | 86 | } |
81 | 87 |
net/netfilter/nf_conntrack_core.c
... | ... | @@ -460,6 +460,23 @@ |
460 | 460 | net_eq(net, nf_ct_net(ct)); |
461 | 461 | } |
462 | 462 | |
463 | +/* must be called with rcu read lock held */ | |
464 | +void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize) | |
465 | +{ | |
466 | + struct hlist_nulls_head *hptr; | |
467 | + unsigned int sequence, hsz; | |
468 | + | |
469 | + do { | |
470 | + sequence = read_seqcount_begin(&nf_conntrack_generation); | |
471 | + hsz = nf_conntrack_htable_size; | |
472 | + hptr = nf_conntrack_hash; | |
473 | + } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); | |
474 | + | |
475 | + *hash = hptr; | |
476 | + *hsize = hsz; | |
477 | +} | |
478 | +EXPORT_SYMBOL_GPL(nf_conntrack_get_ht); | |
479 | + | |
463 | 480 | /* |
464 | 481 | * Warning : |
465 | 482 | * - Caller must take a reference on returned object |
net/netfilter/nf_conntrack_standalone.c
... | ... | @@ -48,6 +48,8 @@ |
48 | 48 | |
49 | 49 | struct ct_iter_state { |
50 | 50 | struct seq_net_private p; |
51 | + struct hlist_nulls_head *hash; | |
52 | + unsigned int htable_size; | |
51 | 53 | unsigned int bucket; |
52 | 54 | u_int64_t time_now; |
53 | 55 | }; |
54 | 56 | |
... | ... | @@ -58,9 +60,10 @@ |
58 | 60 | struct hlist_nulls_node *n; |
59 | 61 | |
60 | 62 | for (st->bucket = 0; |
61 | - st->bucket < nf_conntrack_htable_size; | |
63 | + st->bucket < st->htable_size; | |
62 | 64 | st->bucket++) { |
63 | - n = rcu_dereference(hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket])); | |
65 | + n = rcu_dereference( | |
66 | + hlist_nulls_first_rcu(&st->hash[st->bucket])); | |
64 | 67 | if (!is_a_nulls(n)) |
65 | 68 | return n; |
66 | 69 | } |
67 | 70 | |
... | ... | @@ -75,12 +78,11 @@ |
75 | 78 | head = rcu_dereference(hlist_nulls_next_rcu(head)); |
76 | 79 | while (is_a_nulls(head)) { |
77 | 80 | if (likely(get_nulls_value(head) == st->bucket)) { |
78 | - if (++st->bucket >= nf_conntrack_htable_size) | |
81 | + if (++st->bucket >= st->htable_size) | |
79 | 82 | return NULL; |
80 | 83 | } |
81 | 84 | head = rcu_dereference( |
82 | - hlist_nulls_first_rcu( | |
83 | - &nf_conntrack_hash[st->bucket])); | |
85 | + hlist_nulls_first_rcu(&st->hash[st->bucket])); | |
84 | 86 | } |
85 | 87 | return head; |
86 | 88 | } |
... | ... | @@ -102,6 +104,8 @@ |
102 | 104 | |
103 | 105 | st->time_now = ktime_get_real_ns(); |
104 | 106 | rcu_read_lock(); |
107 | + | |
108 | + nf_conntrack_get_ht(&st->hash, &st->htable_size); | |
105 | 109 | return ct_get_idx(seq, *pos); |
106 | 110 | } |
107 | 111 |