Commit 64b87639c9cbeb03e26bc65528416c961b1dde96

Authored by Liping Zhang
Committed by Pablo Neira Ayuso
1 parent a90a6e55f3

netfilter: conntrack: fix race between nf_conntrack proc read and hash resize

When we do "cat /proc/net/nf_conntrack", and meanwhile resize the conntrack
hash table via /sys/module/nf_conntrack/parameters/hashsize, race will
happen, because reader can observe a newly allocated hash but the old size
(or vice versa). So oops will happen like follows:

  BUG: unable to handle kernel NULL pointer dereference at 0000000000000017
  IP: [<ffffffffa0418e21>] seq_print_acct+0x11/0x50 [nf_conntrack]
  Call Trace:
  [<ffffffffa0412f4e>] ? ct_seq_show+0x14e/0x340 [nf_conntrack]
  [<ffffffff81261a1c>] seq_read+0x2cc/0x390
  [<ffffffff812a8d62>] proc_reg_read+0x42/0x70
  [<ffffffff8123bee7>] __vfs_read+0x37/0x130
  [<ffffffff81347980>] ? security_file_permission+0xa0/0xc0
  [<ffffffff8123cf75>] vfs_read+0x95/0x140
  [<ffffffff8123e475>] SyS_read+0x55/0xc0
  [<ffffffff817c2572>] entry_SYSCALL_64_fastpath+0x1a/0xa4

It is very easy to reproduce this kernel crash.
1. open one shell and input the following cmds:
  while : ; do
    echo $RANDOM > /sys/module/nf_conntrack/parameters/hashsize
  done
2. open more shells and input the following cmds:
  while : ; do
    cat /proc/net/nf_conntrack
  done
3. just wait a monent, oops will happen soon.

The solution in this patch is based on Florian's Commit 5e3c61f98175
("netfilter: conntrack: fix lookup race during hash resize"). And
add a wrapper function nf_conntrack_get_ht to get hash and hsize
suggested by Florian Westphal.

Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

Showing 4 changed files with 38 additions and 9 deletions Side-by-side Diff

include/net/netfilter/nf_conntrack_core.h
... ... @@ -51,6 +51,8 @@
51 51 const struct nf_conntrack_l3proto *l3proto,
52 52 const struct nf_conntrack_l4proto *l4proto);
53 53  
  54 +void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize);
  55 +
54 56 /* Find a connection corresponding to a tuple. */
55 57 struct nf_conntrack_tuple_hash *
56 58 nf_conntrack_find_get(struct net *net,
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
... ... @@ -26,6 +26,8 @@
26 26  
27 27 struct ct_iter_state {
28 28 struct seq_net_private p;
  29 + struct hlist_nulls_head *hash;
  30 + unsigned int htable_size;
29 31 unsigned int bucket;
30 32 };
31 33  
32 34  
... ... @@ -35,10 +37,10 @@
35 37 struct hlist_nulls_node *n;
36 38  
37 39 for (st->bucket = 0;
38   - st->bucket < nf_conntrack_htable_size;
  40 + st->bucket < st->htable_size;
39 41 st->bucket++) {
40 42 n = rcu_dereference(
41   - hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
  43 + hlist_nulls_first_rcu(&st->hash[st->bucket]));
42 44 if (!is_a_nulls(n))
43 45 return n;
44 46 }
45 47  
... ... @@ -53,11 +55,11 @@
53 55 head = rcu_dereference(hlist_nulls_next_rcu(head));
54 56 while (is_a_nulls(head)) {
55 57 if (likely(get_nulls_value(head) == st->bucket)) {
56   - if (++st->bucket >= nf_conntrack_htable_size)
  58 + if (++st->bucket >= st->htable_size)
57 59 return NULL;
58 60 }
59 61 head = rcu_dereference(
60   - hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
  62 + hlist_nulls_first_rcu(&st->hash[st->bucket]));
61 63 }
62 64 return head;
63 65 }
64 66  
... ... @@ -75,7 +77,11 @@
75 77 static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
76 78 __acquires(RCU)
77 79 {
  80 + struct ct_iter_state *st = seq->private;
  81 +
78 82 rcu_read_lock();
  83 +
  84 + nf_conntrack_get_ht(&st->hash, &st->htable_size);
79 85 return ct_get_idx(seq, *pos);
80 86 }
81 87  
net/netfilter/nf_conntrack_core.c
... ... @@ -460,6 +460,23 @@
460 460 net_eq(net, nf_ct_net(ct));
461 461 }
462 462  
  463 +/* must be called with rcu read lock held */
  464 +void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
  465 +{
  466 + struct hlist_nulls_head *hptr;
  467 + unsigned int sequence, hsz;
  468 +
  469 + do {
  470 + sequence = read_seqcount_begin(&nf_conntrack_generation);
  471 + hsz = nf_conntrack_htable_size;
  472 + hptr = nf_conntrack_hash;
  473 + } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
  474 +
  475 + *hash = hptr;
  476 + *hsize = hsz;
  477 +}
  478 +EXPORT_SYMBOL_GPL(nf_conntrack_get_ht);
  479 +
463 480 /*
464 481 * Warning :
465 482 * - Caller must take a reference on returned object
net/netfilter/nf_conntrack_standalone.c
... ... @@ -48,6 +48,8 @@
48 48  
49 49 struct ct_iter_state {
50 50 struct seq_net_private p;
  51 + struct hlist_nulls_head *hash;
  52 + unsigned int htable_size;
51 53 unsigned int bucket;
52 54 u_int64_t time_now;
53 55 };
54 56  
... ... @@ -58,9 +60,10 @@
58 60 struct hlist_nulls_node *n;
59 61  
60 62 for (st->bucket = 0;
61   - st->bucket < nf_conntrack_htable_size;
  63 + st->bucket < st->htable_size;
62 64 st->bucket++) {
63   - n = rcu_dereference(hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
  65 + n = rcu_dereference(
  66 + hlist_nulls_first_rcu(&st->hash[st->bucket]));
64 67 if (!is_a_nulls(n))
65 68 return n;
66 69 }
67 70  
... ... @@ -75,12 +78,11 @@
75 78 head = rcu_dereference(hlist_nulls_next_rcu(head));
76 79 while (is_a_nulls(head)) {
77 80 if (likely(get_nulls_value(head) == st->bucket)) {
78   - if (++st->bucket >= nf_conntrack_htable_size)
  81 + if (++st->bucket >= st->htable_size)
79 82 return NULL;
80 83 }
81 84 head = rcu_dereference(
82   - hlist_nulls_first_rcu(
83   - &nf_conntrack_hash[st->bucket]));
  85 + hlist_nulls_first_rcu(&st->hash[st->bucket]));
84 86 }
85 87 return head;
86 88 }
... ... @@ -102,6 +104,8 @@
102 104  
103 105 st->time_now = ktime_get_real_ns();
104 106 rcu_read_lock();
  107 +
  108 + nf_conntrack_get_ht(&st->hash, &st->htable_size);
105 109 return ct_get_idx(seq, *pos);
106 110 }
107 111