Commit 1e47ee8367babe6a5e8adf44a714c7086657b87e

Authored by Pablo Neira Ayuso
1 parent 4610476d89

netfilter: nf_conntrack: fix BUG_ON while removing nf_conntrack with netns

canqun zhang reported that we're hitting BUG_ON in the
nf_conntrack_destroy path when calling kfree_skb while
rmmod'ing the nf_conntrack module.

Currently, the nf_ct_destroy hook is being set to NULL in the
destroy path of conntrack.init_net. However, this is a problem
since init_net may be destroyed before any other existing netns
(we cannot assume any specific ordering while releasing existing
netns according to what I read in recent emails).

Thanks to Gao feng for initial patch to address this issue.

Reported-by: canqun zhang <canqunzhang@gmail.com>
Acked-by: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

Showing 3 changed files with 8 additions and 4 deletions Inline Diff

include/net/netfilter/nf_conntrack_core.h
1 /* 1 /*
2 * This header is used to share core functionality between the 2 * This header is used to share core functionality between the
3 * standalone connection tracking module, and the compatibility layer's use 3 * standalone connection tracking module, and the compatibility layer's use
4 * of connection tracking. 4 * of connection tracking.
5 * 5 *
6 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> 6 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
7 * - generalize L3 protocol dependent part. 7 * - generalize L3 protocol dependent part.
8 * 8 *
9 * Derived from include/linux/netfiter_ipv4/ip_conntrack_core.h 9 * Derived from include/linux/netfiter_ipv4/ip_conntrack_core.h
10 */ 10 */
11 11
12 #ifndef _NF_CONNTRACK_CORE_H 12 #ifndef _NF_CONNTRACK_CORE_H
13 #define _NF_CONNTRACK_CORE_H 13 #define _NF_CONNTRACK_CORE_H
14 14
15 #include <linux/netfilter.h> 15 #include <linux/netfilter.h>
16 #include <net/netfilter/nf_conntrack_l3proto.h> 16 #include <net/netfilter/nf_conntrack_l3proto.h>
17 #include <net/netfilter/nf_conntrack_l4proto.h> 17 #include <net/netfilter/nf_conntrack_l4proto.h>
18 #include <net/netfilter/nf_conntrack_ecache.h> 18 #include <net/netfilter/nf_conntrack_ecache.h>
19 19
20 /* This header is used to share core functionality between the 20 /* This header is used to share core functionality between the
21 standalone connection tracking module, and the compatibility layer's use 21 standalone connection tracking module, and the compatibility layer's use
22 of connection tracking. */ 22 of connection tracking. */
23 extern unsigned int nf_conntrack_in(struct net *net, 23 extern unsigned int nf_conntrack_in(struct net *net,
24 u_int8_t pf, 24 u_int8_t pf,
25 unsigned int hooknum, 25 unsigned int hooknum,
26 struct sk_buff *skb); 26 struct sk_buff *skb);
27 27
28 extern int nf_conntrack_init(struct net *net); 28 extern int nf_conntrack_init(struct net *net);
29 extern void nf_conntrack_cleanup(struct net *net); 29 extern void nf_conntrack_cleanup(struct net *net);
30 30
31 extern int nf_conntrack_proto_init(struct net *net); 31 extern int nf_conntrack_proto_init(struct net *net);
32 extern void nf_conntrack_proto_fini(struct net *net); 32 extern void nf_conntrack_proto_fini(struct net *net);
33 33
34 extern void nf_conntrack_cleanup_end(void);
35
34 extern bool 36 extern bool
35 nf_ct_get_tuple(const struct sk_buff *skb, 37 nf_ct_get_tuple(const struct sk_buff *skb,
36 unsigned int nhoff, 38 unsigned int nhoff,
37 unsigned int dataoff, 39 unsigned int dataoff,
38 u_int16_t l3num, 40 u_int16_t l3num,
39 u_int8_t protonum, 41 u_int8_t protonum,
40 struct nf_conntrack_tuple *tuple, 42 struct nf_conntrack_tuple *tuple,
41 const struct nf_conntrack_l3proto *l3proto, 43 const struct nf_conntrack_l3proto *l3proto,
42 const struct nf_conntrack_l4proto *l4proto); 44 const struct nf_conntrack_l4proto *l4proto);
43 45
44 extern bool 46 extern bool
45 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, 47 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
46 const struct nf_conntrack_tuple *orig, 48 const struct nf_conntrack_tuple *orig,
47 const struct nf_conntrack_l3proto *l3proto, 49 const struct nf_conntrack_l3proto *l3proto,
48 const struct nf_conntrack_l4proto *l4proto); 50 const struct nf_conntrack_l4proto *l4proto);
49 51
50 /* Find a connection corresponding to a tuple. */ 52 /* Find a connection corresponding to a tuple. */
51 extern struct nf_conntrack_tuple_hash * 53 extern struct nf_conntrack_tuple_hash *
52 nf_conntrack_find_get(struct net *net, u16 zone, 54 nf_conntrack_find_get(struct net *net, u16 zone,
53 const struct nf_conntrack_tuple *tuple); 55 const struct nf_conntrack_tuple *tuple);
54 56
55 extern int __nf_conntrack_confirm(struct sk_buff *skb); 57 extern int __nf_conntrack_confirm(struct sk_buff *skb);
56 58
57 /* Confirm a connection: returns NF_DROP if packet must be dropped. */ 59 /* Confirm a connection: returns NF_DROP if packet must be dropped. */
58 static inline int nf_conntrack_confirm(struct sk_buff *skb) 60 static inline int nf_conntrack_confirm(struct sk_buff *skb)
59 { 61 {
60 struct nf_conn *ct = (struct nf_conn *)skb->nfct; 62 struct nf_conn *ct = (struct nf_conn *)skb->nfct;
61 int ret = NF_ACCEPT; 63 int ret = NF_ACCEPT;
62 64
63 if (ct && !nf_ct_is_untracked(ct)) { 65 if (ct && !nf_ct_is_untracked(ct)) {
64 if (!nf_ct_is_confirmed(ct)) 66 if (!nf_ct_is_confirmed(ct))
65 ret = __nf_conntrack_confirm(skb); 67 ret = __nf_conntrack_confirm(skb);
66 if (likely(ret == NF_ACCEPT)) 68 if (likely(ret == NF_ACCEPT))
67 nf_ct_deliver_cached_events(ct); 69 nf_ct_deliver_cached_events(ct);
68 } 70 }
69 return ret; 71 return ret;
70 } 72 }
71 73
72 int 74 int
73 print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, 75 print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
74 const struct nf_conntrack_l3proto *l3proto, 76 const struct nf_conntrack_l3proto *l3proto,
75 const struct nf_conntrack_l4proto *proto); 77 const struct nf_conntrack_l4proto *proto);
76 78
77 extern spinlock_t nf_conntrack_lock ; 79 extern spinlock_t nf_conntrack_lock ;
78 80
79 #endif /* _NF_CONNTRACK_CORE_H */ 81 #endif /* _NF_CONNTRACK_CORE_H */
80 82
net/netfilter/nf_conntrack_core.c
1 /* Connection state tracking for netfilter. This is separated from, 1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables 2 but required by, the NAT layer; it can also be used by an iptables
3 extension. */ 3 extension. */
4 4
5 /* (C) 1999-2001 Paul `Rusty' Russell 5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 6 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
8 * 8 *
9 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as 10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation. 11 * published by the Free Software Foundation.
12 */ 12 */
13 13
14 #include <linux/types.h> 14 #include <linux/types.h>
15 #include <linux/netfilter.h> 15 #include <linux/netfilter.h>
16 #include <linux/module.h> 16 #include <linux/module.h>
17 #include <linux/sched.h> 17 #include <linux/sched.h>
18 #include <linux/skbuff.h> 18 #include <linux/skbuff.h>
19 #include <linux/proc_fs.h> 19 #include <linux/proc_fs.h>
20 #include <linux/vmalloc.h> 20 #include <linux/vmalloc.h>
21 #include <linux/stddef.h> 21 #include <linux/stddef.h>
22 #include <linux/slab.h> 22 #include <linux/slab.h>
23 #include <linux/random.h> 23 #include <linux/random.h>
24 #include <linux/jhash.h> 24 #include <linux/jhash.h>
25 #include <linux/err.h> 25 #include <linux/err.h>
26 #include <linux/percpu.h> 26 #include <linux/percpu.h>
27 #include <linux/moduleparam.h> 27 #include <linux/moduleparam.h>
28 #include <linux/notifier.h> 28 #include <linux/notifier.h>
29 #include <linux/kernel.h> 29 #include <linux/kernel.h>
30 #include <linux/netdevice.h> 30 #include <linux/netdevice.h>
31 #include <linux/socket.h> 31 #include <linux/socket.h>
32 #include <linux/mm.h> 32 #include <linux/mm.h>
33 #include <linux/nsproxy.h> 33 #include <linux/nsproxy.h>
34 #include <linux/rculist_nulls.h> 34 #include <linux/rculist_nulls.h>
35 35
36 #include <net/netfilter/nf_conntrack.h> 36 #include <net/netfilter/nf_conntrack.h>
37 #include <net/netfilter/nf_conntrack_l3proto.h> 37 #include <net/netfilter/nf_conntrack_l3proto.h>
38 #include <net/netfilter/nf_conntrack_l4proto.h> 38 #include <net/netfilter/nf_conntrack_l4proto.h>
39 #include <net/netfilter/nf_conntrack_expect.h> 39 #include <net/netfilter/nf_conntrack_expect.h>
40 #include <net/netfilter/nf_conntrack_helper.h> 40 #include <net/netfilter/nf_conntrack_helper.h>
41 #include <net/netfilter/nf_conntrack_core.h> 41 #include <net/netfilter/nf_conntrack_core.h>
42 #include <net/netfilter/nf_conntrack_extend.h> 42 #include <net/netfilter/nf_conntrack_extend.h>
43 #include <net/netfilter/nf_conntrack_acct.h> 43 #include <net/netfilter/nf_conntrack_acct.h>
44 #include <net/netfilter/nf_conntrack_ecache.h> 44 #include <net/netfilter/nf_conntrack_ecache.h>
45 #include <net/netfilter/nf_conntrack_zones.h> 45 #include <net/netfilter/nf_conntrack_zones.h>
46 #include <net/netfilter/nf_conntrack_timestamp.h> 46 #include <net/netfilter/nf_conntrack_timestamp.h>
47 #include <net/netfilter/nf_conntrack_timeout.h> 47 #include <net/netfilter/nf_conntrack_timeout.h>
48 #include <net/netfilter/nf_nat.h> 48 #include <net/netfilter/nf_nat.h>
49 #include <net/netfilter/nf_nat_core.h> 49 #include <net/netfilter/nf_nat_core.h>
50 50
51 #define NF_CONNTRACK_VERSION "0.5.0" 51 #define NF_CONNTRACK_VERSION "0.5.0"
52 52
53 int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, 53 int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
54 enum nf_nat_manip_type manip, 54 enum nf_nat_manip_type manip,
55 const struct nlattr *attr) __read_mostly; 55 const struct nlattr *attr) __read_mostly;
56 EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook); 56 EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook);
57 57
58 int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, 58 int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
59 struct nf_conn *ct, 59 struct nf_conn *ct,
60 enum ip_conntrack_info ctinfo, 60 enum ip_conntrack_info ctinfo,
61 unsigned int protoff); 61 unsigned int protoff);
62 EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook); 62 EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook);
63 63
64 DEFINE_SPINLOCK(nf_conntrack_lock); 64 DEFINE_SPINLOCK(nf_conntrack_lock);
65 EXPORT_SYMBOL_GPL(nf_conntrack_lock); 65 EXPORT_SYMBOL_GPL(nf_conntrack_lock);
66 66
67 unsigned int nf_conntrack_htable_size __read_mostly; 67 unsigned int nf_conntrack_htable_size __read_mostly;
68 EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); 68 EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
69 69
70 unsigned int nf_conntrack_max __read_mostly; 70 unsigned int nf_conntrack_max __read_mostly;
71 EXPORT_SYMBOL_GPL(nf_conntrack_max); 71 EXPORT_SYMBOL_GPL(nf_conntrack_max);
72 72
73 DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); 73 DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
74 EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); 74 EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
75 75
76 unsigned int nf_conntrack_hash_rnd __read_mostly; 76 unsigned int nf_conntrack_hash_rnd __read_mostly;
77 EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd); 77 EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd);
78 78
79 static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone) 79 static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)
80 { 80 {
81 unsigned int n; 81 unsigned int n;
82 82
83 /* The direction must be ignored, so we hash everything up to the 83 /* The direction must be ignored, so we hash everything up to the
84 * destination ports (which is a multiple of 4) and treat the last 84 * destination ports (which is a multiple of 4) and treat the last
85 * three bytes manually. 85 * three bytes manually.
86 */ 86 */
87 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); 87 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
88 return jhash2((u32 *)tuple, n, zone ^ nf_conntrack_hash_rnd ^ 88 return jhash2((u32 *)tuple, n, zone ^ nf_conntrack_hash_rnd ^
89 (((__force __u16)tuple->dst.u.all << 16) | 89 (((__force __u16)tuple->dst.u.all << 16) |
90 tuple->dst.protonum)); 90 tuple->dst.protonum));
91 } 91 }
92 92
93 static u32 __hash_bucket(u32 hash, unsigned int size) 93 static u32 __hash_bucket(u32 hash, unsigned int size)
94 { 94 {
95 return ((u64)hash * size) >> 32; 95 return ((u64)hash * size) >> 32;
96 } 96 }
97 97
98 static u32 hash_bucket(u32 hash, const struct net *net) 98 static u32 hash_bucket(u32 hash, const struct net *net)
99 { 99 {
100 return __hash_bucket(hash, net->ct.htable_size); 100 return __hash_bucket(hash, net->ct.htable_size);
101 } 101 }
102 102
103 static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, 103 static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
104 u16 zone, unsigned int size) 104 u16 zone, unsigned int size)
105 { 105 {
106 return __hash_bucket(hash_conntrack_raw(tuple, zone), size); 106 return __hash_bucket(hash_conntrack_raw(tuple, zone), size);
107 } 107 }
108 108
109 static inline u_int32_t hash_conntrack(const struct net *net, u16 zone, 109 static inline u_int32_t hash_conntrack(const struct net *net, u16 zone,
110 const struct nf_conntrack_tuple *tuple) 110 const struct nf_conntrack_tuple *tuple)
111 { 111 {
112 return __hash_conntrack(tuple, zone, net->ct.htable_size); 112 return __hash_conntrack(tuple, zone, net->ct.htable_size);
113 } 113 }
114 114
115 bool 115 bool
116 nf_ct_get_tuple(const struct sk_buff *skb, 116 nf_ct_get_tuple(const struct sk_buff *skb,
117 unsigned int nhoff, 117 unsigned int nhoff,
118 unsigned int dataoff, 118 unsigned int dataoff,
119 u_int16_t l3num, 119 u_int16_t l3num,
120 u_int8_t protonum, 120 u_int8_t protonum,
121 struct nf_conntrack_tuple *tuple, 121 struct nf_conntrack_tuple *tuple,
122 const struct nf_conntrack_l3proto *l3proto, 122 const struct nf_conntrack_l3proto *l3proto,
123 const struct nf_conntrack_l4proto *l4proto) 123 const struct nf_conntrack_l4proto *l4proto)
124 { 124 {
125 memset(tuple, 0, sizeof(*tuple)); 125 memset(tuple, 0, sizeof(*tuple));
126 126
127 tuple->src.l3num = l3num; 127 tuple->src.l3num = l3num;
128 if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0) 128 if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
129 return false; 129 return false;
130 130
131 tuple->dst.protonum = protonum; 131 tuple->dst.protonum = protonum;
132 tuple->dst.dir = IP_CT_DIR_ORIGINAL; 132 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
133 133
134 return l4proto->pkt_to_tuple(skb, dataoff, tuple); 134 return l4proto->pkt_to_tuple(skb, dataoff, tuple);
135 } 135 }
136 EXPORT_SYMBOL_GPL(nf_ct_get_tuple); 136 EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
137 137
138 bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, 138 bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
139 u_int16_t l3num, struct nf_conntrack_tuple *tuple) 139 u_int16_t l3num, struct nf_conntrack_tuple *tuple)
140 { 140 {
141 struct nf_conntrack_l3proto *l3proto; 141 struct nf_conntrack_l3proto *l3proto;
142 struct nf_conntrack_l4proto *l4proto; 142 struct nf_conntrack_l4proto *l4proto;
143 unsigned int protoff; 143 unsigned int protoff;
144 u_int8_t protonum; 144 u_int8_t protonum;
145 int ret; 145 int ret;
146 146
147 rcu_read_lock(); 147 rcu_read_lock();
148 148
149 l3proto = __nf_ct_l3proto_find(l3num); 149 l3proto = __nf_ct_l3proto_find(l3num);
150 ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum); 150 ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum);
151 if (ret != NF_ACCEPT) { 151 if (ret != NF_ACCEPT) {
152 rcu_read_unlock(); 152 rcu_read_unlock();
153 return false; 153 return false;
154 } 154 }
155 155
156 l4proto = __nf_ct_l4proto_find(l3num, protonum); 156 l4proto = __nf_ct_l4proto_find(l3num, protonum);
157 157
158 ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, tuple, 158 ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, tuple,
159 l3proto, l4proto); 159 l3proto, l4proto);
160 160
161 rcu_read_unlock(); 161 rcu_read_unlock();
162 return ret; 162 return ret;
163 } 163 }
164 EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr); 164 EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
165 165
166 bool 166 bool
167 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, 167 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
168 const struct nf_conntrack_tuple *orig, 168 const struct nf_conntrack_tuple *orig,
169 const struct nf_conntrack_l3proto *l3proto, 169 const struct nf_conntrack_l3proto *l3proto,
170 const struct nf_conntrack_l4proto *l4proto) 170 const struct nf_conntrack_l4proto *l4proto)
171 { 171 {
172 memset(inverse, 0, sizeof(*inverse)); 172 memset(inverse, 0, sizeof(*inverse));
173 173
174 inverse->src.l3num = orig->src.l3num; 174 inverse->src.l3num = orig->src.l3num;
175 if (l3proto->invert_tuple(inverse, orig) == 0) 175 if (l3proto->invert_tuple(inverse, orig) == 0)
176 return false; 176 return false;
177 177
178 inverse->dst.dir = !orig->dst.dir; 178 inverse->dst.dir = !orig->dst.dir;
179 179
180 inverse->dst.protonum = orig->dst.protonum; 180 inverse->dst.protonum = orig->dst.protonum;
181 return l4proto->invert_tuple(inverse, orig); 181 return l4proto->invert_tuple(inverse, orig);
182 } 182 }
183 EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); 183 EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
184 184
185 static void 185 static void
186 clean_from_lists(struct nf_conn *ct) 186 clean_from_lists(struct nf_conn *ct)
187 { 187 {
188 pr_debug("clean_from_lists(%p)\n", ct); 188 pr_debug("clean_from_lists(%p)\n", ct);
189 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); 189 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
190 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode); 190 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode);
191 191
192 /* Destroy all pending expectations */ 192 /* Destroy all pending expectations */
193 nf_ct_remove_expectations(ct); 193 nf_ct_remove_expectations(ct);
194 } 194 }
195 195
196 static void 196 static void
197 destroy_conntrack(struct nf_conntrack *nfct) 197 destroy_conntrack(struct nf_conntrack *nfct)
198 { 198 {
199 struct nf_conn *ct = (struct nf_conn *)nfct; 199 struct nf_conn *ct = (struct nf_conn *)nfct;
200 struct net *net = nf_ct_net(ct); 200 struct net *net = nf_ct_net(ct);
201 struct nf_conntrack_l4proto *l4proto; 201 struct nf_conntrack_l4proto *l4proto;
202 202
203 pr_debug("destroy_conntrack(%p)\n", ct); 203 pr_debug("destroy_conntrack(%p)\n", ct);
204 NF_CT_ASSERT(atomic_read(&nfct->use) == 0); 204 NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
205 NF_CT_ASSERT(!timer_pending(&ct->timeout)); 205 NF_CT_ASSERT(!timer_pending(&ct->timeout));
206 206
207 /* To make sure we don't get any weird locking issues here: 207 /* To make sure we don't get any weird locking issues here:
208 * destroy_conntrack() MUST NOT be called with a write lock 208 * destroy_conntrack() MUST NOT be called with a write lock
209 * to nf_conntrack_lock!!! -HW */ 209 * to nf_conntrack_lock!!! -HW */
210 rcu_read_lock(); 210 rcu_read_lock();
211 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 211 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
212 if (l4proto && l4proto->destroy) 212 if (l4proto && l4proto->destroy)
213 l4proto->destroy(ct); 213 l4proto->destroy(ct);
214 214
215 rcu_read_unlock(); 215 rcu_read_unlock();
216 216
217 spin_lock_bh(&nf_conntrack_lock); 217 spin_lock_bh(&nf_conntrack_lock);
218 /* Expectations will have been removed in clean_from_lists, 218 /* Expectations will have been removed in clean_from_lists,
219 * except TFTP can create an expectation on the first packet, 219 * except TFTP can create an expectation on the first packet,
220 * before connection is in the list, so we need to clean here, 220 * before connection is in the list, so we need to clean here,
221 * too. */ 221 * too. */
222 nf_ct_remove_expectations(ct); 222 nf_ct_remove_expectations(ct);
223 223
224 /* We overload first tuple to link into unconfirmed or dying list.*/ 224 /* We overload first tuple to link into unconfirmed or dying list.*/
225 BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode)); 225 BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
226 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); 226 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
227 227
228 NF_CT_STAT_INC(net, delete); 228 NF_CT_STAT_INC(net, delete);
229 spin_unlock_bh(&nf_conntrack_lock); 229 spin_unlock_bh(&nf_conntrack_lock);
230 230
231 if (ct->master) 231 if (ct->master)
232 nf_ct_put(ct->master); 232 nf_ct_put(ct->master);
233 233
234 pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct); 234 pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct);
235 nf_conntrack_free(ct); 235 nf_conntrack_free(ct);
236 } 236 }
237 237
238 void nf_ct_delete_from_lists(struct nf_conn *ct) 238 void nf_ct_delete_from_lists(struct nf_conn *ct)
239 { 239 {
240 struct net *net = nf_ct_net(ct); 240 struct net *net = nf_ct_net(ct);
241 241
242 nf_ct_helper_destroy(ct); 242 nf_ct_helper_destroy(ct);
243 spin_lock_bh(&nf_conntrack_lock); 243 spin_lock_bh(&nf_conntrack_lock);
244 /* Inside lock so preempt is disabled on module removal path. 244 /* Inside lock so preempt is disabled on module removal path.
245 * Otherwise we can get spurious warnings. */ 245 * Otherwise we can get spurious warnings. */
246 NF_CT_STAT_INC(net, delete_list); 246 NF_CT_STAT_INC(net, delete_list);
247 clean_from_lists(ct); 247 clean_from_lists(ct);
248 /* add this conntrack to the dying list */ 248 /* add this conntrack to the dying list */
249 hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, 249 hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
250 &net->ct.dying); 250 &net->ct.dying);
251 spin_unlock_bh(&nf_conntrack_lock); 251 spin_unlock_bh(&nf_conntrack_lock);
252 } 252 }
253 EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists); 253 EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists);
254 254
255 static void death_by_event(unsigned long ul_conntrack) 255 static void death_by_event(unsigned long ul_conntrack)
256 { 256 {
257 struct nf_conn *ct = (void *)ul_conntrack; 257 struct nf_conn *ct = (void *)ul_conntrack;
258 struct net *net = nf_ct_net(ct); 258 struct net *net = nf_ct_net(ct);
259 struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); 259 struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
260 260
261 BUG_ON(ecache == NULL); 261 BUG_ON(ecache == NULL);
262 262
263 if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) { 263 if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
264 /* bad luck, let's retry again */ 264 /* bad luck, let's retry again */
265 ecache->timeout.expires = jiffies + 265 ecache->timeout.expires = jiffies +
266 (random32() % net->ct.sysctl_events_retry_timeout); 266 (random32() % net->ct.sysctl_events_retry_timeout);
267 add_timer(&ecache->timeout); 267 add_timer(&ecache->timeout);
268 return; 268 return;
269 } 269 }
270 /* we've got the event delivered, now it's dying */ 270 /* we've got the event delivered, now it's dying */
271 set_bit(IPS_DYING_BIT, &ct->status); 271 set_bit(IPS_DYING_BIT, &ct->status);
272 nf_ct_put(ct); 272 nf_ct_put(ct);
273 } 273 }
274 274
275 void nf_ct_dying_timeout(struct nf_conn *ct) 275 void nf_ct_dying_timeout(struct nf_conn *ct)
276 { 276 {
277 struct net *net = nf_ct_net(ct); 277 struct net *net = nf_ct_net(ct);
278 struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); 278 struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
279 279
280 BUG_ON(ecache == NULL); 280 BUG_ON(ecache == NULL);
281 281
282 /* set a new timer to retry event delivery */ 282 /* set a new timer to retry event delivery */
283 setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct); 283 setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct);
284 ecache->timeout.expires = jiffies + 284 ecache->timeout.expires = jiffies +
285 (random32() % net->ct.sysctl_events_retry_timeout); 285 (random32() % net->ct.sysctl_events_retry_timeout);
286 add_timer(&ecache->timeout); 286 add_timer(&ecache->timeout);
287 } 287 }
288 EXPORT_SYMBOL_GPL(nf_ct_dying_timeout); 288 EXPORT_SYMBOL_GPL(nf_ct_dying_timeout);
289 289
290 static void death_by_timeout(unsigned long ul_conntrack) 290 static void death_by_timeout(unsigned long ul_conntrack)
291 { 291 {
292 struct nf_conn *ct = (void *)ul_conntrack; 292 struct nf_conn *ct = (void *)ul_conntrack;
293 struct nf_conn_tstamp *tstamp; 293 struct nf_conn_tstamp *tstamp;
294 294
295 tstamp = nf_conn_tstamp_find(ct); 295 tstamp = nf_conn_tstamp_find(ct);
296 if (tstamp && tstamp->stop == 0) 296 if (tstamp && tstamp->stop == 0)
297 tstamp->stop = ktime_to_ns(ktime_get_real()); 297 tstamp->stop = ktime_to_ns(ktime_get_real());
298 298
299 if (!test_bit(IPS_DYING_BIT, &ct->status) && 299 if (!test_bit(IPS_DYING_BIT, &ct->status) &&
300 unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) { 300 unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
301 /* destroy event was not delivered */ 301 /* destroy event was not delivered */
302 nf_ct_delete_from_lists(ct); 302 nf_ct_delete_from_lists(ct);
303 nf_ct_dying_timeout(ct); 303 nf_ct_dying_timeout(ct);
304 return; 304 return;
305 } 305 }
306 set_bit(IPS_DYING_BIT, &ct->status); 306 set_bit(IPS_DYING_BIT, &ct->status);
307 nf_ct_delete_from_lists(ct); 307 nf_ct_delete_from_lists(ct);
308 nf_ct_put(ct); 308 nf_ct_put(ct);
309 } 309 }
310 310
311 /* 311 /*
312 * Warning : 312 * Warning :
313 * - Caller must take a reference on returned object 313 * - Caller must take a reference on returned object
314 * and recheck nf_ct_tuple_equal(tuple, &h->tuple) 314 * and recheck nf_ct_tuple_equal(tuple, &h->tuple)
315 * OR 315 * OR
316 * - Caller must lock nf_conntrack_lock before calling this function 316 * - Caller must lock nf_conntrack_lock before calling this function
317 */ 317 */
318 static struct nf_conntrack_tuple_hash * 318 static struct nf_conntrack_tuple_hash *
319 ____nf_conntrack_find(struct net *net, u16 zone, 319 ____nf_conntrack_find(struct net *net, u16 zone,
320 const struct nf_conntrack_tuple *tuple, u32 hash) 320 const struct nf_conntrack_tuple *tuple, u32 hash)
321 { 321 {
322 struct nf_conntrack_tuple_hash *h; 322 struct nf_conntrack_tuple_hash *h;
323 struct hlist_nulls_node *n; 323 struct hlist_nulls_node *n;
324 unsigned int bucket = hash_bucket(hash, net); 324 unsigned int bucket = hash_bucket(hash, net);
325 325
326 /* Disable BHs the entire time since we normally need to disable them 326 /* Disable BHs the entire time since we normally need to disable them
327 * at least once for the stats anyway. 327 * at least once for the stats anyway.
328 */ 328 */
329 local_bh_disable(); 329 local_bh_disable();
330 begin: 330 begin:
331 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) { 331 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) {
332 if (nf_ct_tuple_equal(tuple, &h->tuple) && 332 if (nf_ct_tuple_equal(tuple, &h->tuple) &&
333 nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) { 333 nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) {
334 NF_CT_STAT_INC(net, found); 334 NF_CT_STAT_INC(net, found);
335 local_bh_enable(); 335 local_bh_enable();
336 return h; 336 return h;
337 } 337 }
338 NF_CT_STAT_INC(net, searched); 338 NF_CT_STAT_INC(net, searched);
339 } 339 }
340 /* 340 /*
341 * if the nulls value we got at the end of this lookup is 341 * if the nulls value we got at the end of this lookup is
342 * not the expected one, we must restart lookup. 342 * not the expected one, we must restart lookup.
343 * We probably met an item that was moved to another chain. 343 * We probably met an item that was moved to another chain.
344 */ 344 */
345 if (get_nulls_value(n) != bucket) { 345 if (get_nulls_value(n) != bucket) {
346 NF_CT_STAT_INC(net, search_restart); 346 NF_CT_STAT_INC(net, search_restart);
347 goto begin; 347 goto begin;
348 } 348 }
349 local_bh_enable(); 349 local_bh_enable();
350 350
351 return NULL; 351 return NULL;
352 } 352 }
353 353
354 struct nf_conntrack_tuple_hash * 354 struct nf_conntrack_tuple_hash *
355 __nf_conntrack_find(struct net *net, u16 zone, 355 __nf_conntrack_find(struct net *net, u16 zone,
356 const struct nf_conntrack_tuple *tuple) 356 const struct nf_conntrack_tuple *tuple)
357 { 357 {
358 return ____nf_conntrack_find(net, zone, tuple, 358 return ____nf_conntrack_find(net, zone, tuple,
359 hash_conntrack_raw(tuple, zone)); 359 hash_conntrack_raw(tuple, zone));
360 } 360 }
361 EXPORT_SYMBOL_GPL(__nf_conntrack_find); 361 EXPORT_SYMBOL_GPL(__nf_conntrack_find);
362 362
363 /* Find a connection corresponding to a tuple. */ 363 /* Find a connection corresponding to a tuple. */
364 static struct nf_conntrack_tuple_hash * 364 static struct nf_conntrack_tuple_hash *
365 __nf_conntrack_find_get(struct net *net, u16 zone, 365 __nf_conntrack_find_get(struct net *net, u16 zone,
366 const struct nf_conntrack_tuple *tuple, u32 hash) 366 const struct nf_conntrack_tuple *tuple, u32 hash)
367 { 367 {
368 struct nf_conntrack_tuple_hash *h; 368 struct nf_conntrack_tuple_hash *h;
369 struct nf_conn *ct; 369 struct nf_conn *ct;
370 370
371 rcu_read_lock(); 371 rcu_read_lock();
372 begin: 372 begin:
373 h = ____nf_conntrack_find(net, zone, tuple, hash); 373 h = ____nf_conntrack_find(net, zone, tuple, hash);
374 if (h) { 374 if (h) {
375 ct = nf_ct_tuplehash_to_ctrack(h); 375 ct = nf_ct_tuplehash_to_ctrack(h);
376 if (unlikely(nf_ct_is_dying(ct) || 376 if (unlikely(nf_ct_is_dying(ct) ||
377 !atomic_inc_not_zero(&ct->ct_general.use))) 377 !atomic_inc_not_zero(&ct->ct_general.use)))
378 h = NULL; 378 h = NULL;
379 else { 379 else {
380 if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple) || 380 if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple) ||
381 nf_ct_zone(ct) != zone)) { 381 nf_ct_zone(ct) != zone)) {
382 nf_ct_put(ct); 382 nf_ct_put(ct);
383 goto begin; 383 goto begin;
384 } 384 }
385 } 385 }
386 } 386 }
387 rcu_read_unlock(); 387 rcu_read_unlock();
388 388
389 return h; 389 return h;
390 } 390 }
391 391
392 struct nf_conntrack_tuple_hash * 392 struct nf_conntrack_tuple_hash *
393 nf_conntrack_find_get(struct net *net, u16 zone, 393 nf_conntrack_find_get(struct net *net, u16 zone,
394 const struct nf_conntrack_tuple *tuple) 394 const struct nf_conntrack_tuple *tuple)
395 { 395 {
396 return __nf_conntrack_find_get(net, zone, tuple, 396 return __nf_conntrack_find_get(net, zone, tuple,
397 hash_conntrack_raw(tuple, zone)); 397 hash_conntrack_raw(tuple, zone));
398 } 398 }
399 EXPORT_SYMBOL_GPL(nf_conntrack_find_get); 399 EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
400 400
401 static void __nf_conntrack_hash_insert(struct nf_conn *ct, 401 static void __nf_conntrack_hash_insert(struct nf_conn *ct,
402 unsigned int hash, 402 unsigned int hash,
403 unsigned int repl_hash) 403 unsigned int repl_hash)
404 { 404 {
405 struct net *net = nf_ct_net(ct); 405 struct net *net = nf_ct_net(ct);
406 406
407 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, 407 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
408 &net->ct.hash[hash]); 408 &net->ct.hash[hash]);
409 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, 409 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
410 &net->ct.hash[repl_hash]); 410 &net->ct.hash[repl_hash]);
411 } 411 }
412 412
413 int 413 int
414 nf_conntrack_hash_check_insert(struct nf_conn *ct) 414 nf_conntrack_hash_check_insert(struct nf_conn *ct)
415 { 415 {
416 struct net *net = nf_ct_net(ct); 416 struct net *net = nf_ct_net(ct);
417 unsigned int hash, repl_hash; 417 unsigned int hash, repl_hash;
418 struct nf_conntrack_tuple_hash *h; 418 struct nf_conntrack_tuple_hash *h;
419 struct hlist_nulls_node *n; 419 struct hlist_nulls_node *n;
420 u16 zone; 420 u16 zone;
421 421
422 zone = nf_ct_zone(ct); 422 zone = nf_ct_zone(ct);
423 hash = hash_conntrack(net, zone, 423 hash = hash_conntrack(net, zone,
424 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 424 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
425 repl_hash = hash_conntrack(net, zone, 425 repl_hash = hash_conntrack(net, zone,
426 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 426 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
427 427
428 spin_lock_bh(&nf_conntrack_lock); 428 spin_lock_bh(&nf_conntrack_lock);
429 429
430 /* See if there's one in the list already, including reverse */ 430 /* See if there's one in the list already, including reverse */
431 hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) 431 hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
432 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 432 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
433 &h->tuple) && 433 &h->tuple) &&
434 zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) 434 zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
435 goto out; 435 goto out;
436 hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode) 436 hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode)
437 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, 437 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
438 &h->tuple) && 438 &h->tuple) &&
439 zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) 439 zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
440 goto out; 440 goto out;
441 441
442 add_timer(&ct->timeout); 442 add_timer(&ct->timeout);
443 nf_conntrack_get(&ct->ct_general); 443 nf_conntrack_get(&ct->ct_general);
444 __nf_conntrack_hash_insert(ct, hash, repl_hash); 444 __nf_conntrack_hash_insert(ct, hash, repl_hash);
445 NF_CT_STAT_INC(net, insert); 445 NF_CT_STAT_INC(net, insert);
446 spin_unlock_bh(&nf_conntrack_lock); 446 spin_unlock_bh(&nf_conntrack_lock);
447 447
448 return 0; 448 return 0;
449 449
450 out: 450 out:
451 NF_CT_STAT_INC(net, insert_failed); 451 NF_CT_STAT_INC(net, insert_failed);
452 spin_unlock_bh(&nf_conntrack_lock); 452 spin_unlock_bh(&nf_conntrack_lock);
453 return -EEXIST; 453 return -EEXIST;
454 } 454 }
455 EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); 455 EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
456 456
457 /* Confirm a connection given skb; places it in hash table */ 457 /* Confirm a connection given skb; places it in hash table */
458 int 458 int
459 __nf_conntrack_confirm(struct sk_buff *skb) 459 __nf_conntrack_confirm(struct sk_buff *skb)
460 { 460 {
461 unsigned int hash, repl_hash; 461 unsigned int hash, repl_hash;
462 struct nf_conntrack_tuple_hash *h; 462 struct nf_conntrack_tuple_hash *h;
463 struct nf_conn *ct; 463 struct nf_conn *ct;
464 struct nf_conn_help *help; 464 struct nf_conn_help *help;
465 struct nf_conn_tstamp *tstamp; 465 struct nf_conn_tstamp *tstamp;
466 struct hlist_nulls_node *n; 466 struct hlist_nulls_node *n;
467 enum ip_conntrack_info ctinfo; 467 enum ip_conntrack_info ctinfo;
468 struct net *net; 468 struct net *net;
469 u16 zone; 469 u16 zone;
470 470
471 ct = nf_ct_get(skb, &ctinfo); 471 ct = nf_ct_get(skb, &ctinfo);
472 net = nf_ct_net(ct); 472 net = nf_ct_net(ct);
473 473
474 /* ipt_REJECT uses nf_conntrack_attach to attach related 474 /* ipt_REJECT uses nf_conntrack_attach to attach related
475 ICMP/TCP RST packets in other direction. Actual packet 475 ICMP/TCP RST packets in other direction. Actual packet
476 which created connection will be IP_CT_NEW or for an 476 which created connection will be IP_CT_NEW or for an
477 expected connection, IP_CT_RELATED. */ 477 expected connection, IP_CT_RELATED. */
478 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) 478 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
479 return NF_ACCEPT; 479 return NF_ACCEPT;
480 480
481 zone = nf_ct_zone(ct); 481 zone = nf_ct_zone(ct);
482 /* reuse the hash saved before */ 482 /* reuse the hash saved before */
483 hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; 483 hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
484 hash = hash_bucket(hash, net); 484 hash = hash_bucket(hash, net);
485 repl_hash = hash_conntrack(net, zone, 485 repl_hash = hash_conntrack(net, zone,
486 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 486 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
487 487
488 /* We're not in hash table, and we refuse to set up related 488 /* We're not in hash table, and we refuse to set up related
489 connections for unconfirmed conns. But packet copies and 489 connections for unconfirmed conns. But packet copies and
490 REJECT will give spurious warnings here. */ 490 REJECT will give spurious warnings here. */
491 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ 491 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
492 492
493 /* No external references means no one else could have 493 /* No external references means no one else could have
494 confirmed us. */ 494 confirmed us. */
495 NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); 495 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
496 pr_debug("Confirming conntrack %p\n", ct); 496 pr_debug("Confirming conntrack %p\n", ct);
497 497
498 spin_lock_bh(&nf_conntrack_lock); 498 spin_lock_bh(&nf_conntrack_lock);
499 499
500 /* We have to check the DYING flag inside the lock to prevent 500 /* We have to check the DYING flag inside the lock to prevent
501 a race against nf_ct_get_next_corpse() possibly called from 501 a race against nf_ct_get_next_corpse() possibly called from
502 user context, else we insert an already 'dead' hash, blocking 502 user context, else we insert an already 'dead' hash, blocking
503 further use of that particular connection -JM */ 503 further use of that particular connection -JM */
504 504
505 if (unlikely(nf_ct_is_dying(ct))) { 505 if (unlikely(nf_ct_is_dying(ct))) {
506 spin_unlock_bh(&nf_conntrack_lock); 506 spin_unlock_bh(&nf_conntrack_lock);
507 return NF_ACCEPT; 507 return NF_ACCEPT;
508 } 508 }
509 509
510 /* See if there's one in the list already, including reverse: 510 /* See if there's one in the list already, including reverse:
511 NAT could have grabbed it without realizing, since we're 511 NAT could have grabbed it without realizing, since we're
512 not in the hash. If there is, we lost race. */ 512 not in the hash. If there is, we lost race. */
513 hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) 513 hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
514 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 514 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
515 &h->tuple) && 515 &h->tuple) &&
516 zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) 516 zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
517 goto out; 517 goto out;
518 hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode) 518 hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode)
519 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, 519 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
520 &h->tuple) && 520 &h->tuple) &&
521 zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) 521 zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
522 goto out; 522 goto out;
523 523
524 /* Remove from unconfirmed list */ 524 /* Remove from unconfirmed list */
525 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); 525 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
526 526
527 /* Timer relative to confirmation time, not original 527 /* Timer relative to confirmation time, not original
528 setting time, otherwise we'd get timer wrap in 528 setting time, otherwise we'd get timer wrap in
529 weird delay cases. */ 529 weird delay cases. */
530 ct->timeout.expires += jiffies; 530 ct->timeout.expires += jiffies;
531 add_timer(&ct->timeout); 531 add_timer(&ct->timeout);
532 atomic_inc(&ct->ct_general.use); 532 atomic_inc(&ct->ct_general.use);
533 ct->status |= IPS_CONFIRMED; 533 ct->status |= IPS_CONFIRMED;
534 534
535 /* set conntrack timestamp, if enabled. */ 535 /* set conntrack timestamp, if enabled. */
536 tstamp = nf_conn_tstamp_find(ct); 536 tstamp = nf_conn_tstamp_find(ct);
537 if (tstamp) { 537 if (tstamp) {
538 if (skb->tstamp.tv64 == 0) 538 if (skb->tstamp.tv64 == 0)
539 __net_timestamp(skb); 539 __net_timestamp(skb);
540 540
541 tstamp->start = ktime_to_ns(skb->tstamp); 541 tstamp->start = ktime_to_ns(skb->tstamp);
542 } 542 }
543 /* Since the lookup is lockless, hash insertion must be done after 543 /* Since the lookup is lockless, hash insertion must be done after
544 * starting the timer and setting the CONFIRMED bit. The RCU barriers 544 * starting the timer and setting the CONFIRMED bit. The RCU barriers
545 * guarantee that no other CPU can find the conntrack before the above 545 * guarantee that no other CPU can find the conntrack before the above
546 * stores are visible. 546 * stores are visible.
547 */ 547 */
548 __nf_conntrack_hash_insert(ct, hash, repl_hash); 548 __nf_conntrack_hash_insert(ct, hash, repl_hash);
549 NF_CT_STAT_INC(net, insert); 549 NF_CT_STAT_INC(net, insert);
550 spin_unlock_bh(&nf_conntrack_lock); 550 spin_unlock_bh(&nf_conntrack_lock);
551 551
552 help = nfct_help(ct); 552 help = nfct_help(ct);
553 if (help && help->helper) 553 if (help && help->helper)
554 nf_conntrack_event_cache(IPCT_HELPER, ct); 554 nf_conntrack_event_cache(IPCT_HELPER, ct);
555 555
556 nf_conntrack_event_cache(master_ct(ct) ? 556 nf_conntrack_event_cache(master_ct(ct) ?
557 IPCT_RELATED : IPCT_NEW, ct); 557 IPCT_RELATED : IPCT_NEW, ct);
558 return NF_ACCEPT; 558 return NF_ACCEPT;
559 559
560 out: 560 out:
561 NF_CT_STAT_INC(net, insert_failed); 561 NF_CT_STAT_INC(net, insert_failed);
562 spin_unlock_bh(&nf_conntrack_lock); 562 spin_unlock_bh(&nf_conntrack_lock);
563 return NF_DROP; 563 return NF_DROP;
564 } 564 }
565 EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); 565 EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
566 566
567 /* Returns true if a connection correspondings to the tuple (required 567 /* Returns true if a connection correspondings to the tuple (required
568 for NAT). */ 568 for NAT). */
569 int 569 int
570 nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, 570 nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
571 const struct nf_conn *ignored_conntrack) 571 const struct nf_conn *ignored_conntrack)
572 { 572 {
573 struct net *net = nf_ct_net(ignored_conntrack); 573 struct net *net = nf_ct_net(ignored_conntrack);
574 struct nf_conntrack_tuple_hash *h; 574 struct nf_conntrack_tuple_hash *h;
575 struct hlist_nulls_node *n; 575 struct hlist_nulls_node *n;
576 struct nf_conn *ct; 576 struct nf_conn *ct;
577 u16 zone = nf_ct_zone(ignored_conntrack); 577 u16 zone = nf_ct_zone(ignored_conntrack);
578 unsigned int hash = hash_conntrack(net, zone, tuple); 578 unsigned int hash = hash_conntrack(net, zone, tuple);
579 579
580 /* Disable BHs the entire time since we need to disable them at 580 /* Disable BHs the entire time since we need to disable them at
581 * least once for the stats anyway. 581 * least once for the stats anyway.
582 */ 582 */
583 rcu_read_lock_bh(); 583 rcu_read_lock_bh();
584 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { 584 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
585 ct = nf_ct_tuplehash_to_ctrack(h); 585 ct = nf_ct_tuplehash_to_ctrack(h);
586 if (ct != ignored_conntrack && 586 if (ct != ignored_conntrack &&
587 nf_ct_tuple_equal(tuple, &h->tuple) && 587 nf_ct_tuple_equal(tuple, &h->tuple) &&
588 nf_ct_zone(ct) == zone) { 588 nf_ct_zone(ct) == zone) {
589 NF_CT_STAT_INC(net, found); 589 NF_CT_STAT_INC(net, found);
590 rcu_read_unlock_bh(); 590 rcu_read_unlock_bh();
591 return 1; 591 return 1;
592 } 592 }
593 NF_CT_STAT_INC(net, searched); 593 NF_CT_STAT_INC(net, searched);
594 } 594 }
595 rcu_read_unlock_bh(); 595 rcu_read_unlock_bh();
596 596
597 return 0; 597 return 0;
598 } 598 }
599 EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); 599 EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
600 600
601 #define NF_CT_EVICTION_RANGE 8 601 #define NF_CT_EVICTION_RANGE 8
602 602
603 /* There's a small race here where we may free a just-assured 603 /* There's a small race here where we may free a just-assured
604 connection. Too bad: we're in trouble anyway. */ 604 connection. Too bad: we're in trouble anyway. */
605 static noinline int early_drop(struct net *net, unsigned int hash) 605 static noinline int early_drop(struct net *net, unsigned int hash)
606 { 606 {
607 /* Use oldest entry, which is roughly LRU */ 607 /* Use oldest entry, which is roughly LRU */
608 struct nf_conntrack_tuple_hash *h; 608 struct nf_conntrack_tuple_hash *h;
609 struct nf_conn *ct = NULL, *tmp; 609 struct nf_conn *ct = NULL, *tmp;
610 struct hlist_nulls_node *n; 610 struct hlist_nulls_node *n;
611 unsigned int i, cnt = 0; 611 unsigned int i, cnt = 0;
612 int dropped = 0; 612 int dropped = 0;
613 613
614 rcu_read_lock(); 614 rcu_read_lock();
615 for (i = 0; i < net->ct.htable_size; i++) { 615 for (i = 0; i < net->ct.htable_size; i++) {
616 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], 616 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash],
617 hnnode) { 617 hnnode) {
618 tmp = nf_ct_tuplehash_to_ctrack(h); 618 tmp = nf_ct_tuplehash_to_ctrack(h);
619 if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) 619 if (!test_bit(IPS_ASSURED_BIT, &tmp->status))
620 ct = tmp; 620 ct = tmp;
621 cnt++; 621 cnt++;
622 } 622 }
623 623
624 if (ct != NULL) { 624 if (ct != NULL) {
625 if (likely(!nf_ct_is_dying(ct) && 625 if (likely(!nf_ct_is_dying(ct) &&
626 atomic_inc_not_zero(&ct->ct_general.use))) 626 atomic_inc_not_zero(&ct->ct_general.use)))
627 break; 627 break;
628 else 628 else
629 ct = NULL; 629 ct = NULL;
630 } 630 }
631 631
632 if (cnt >= NF_CT_EVICTION_RANGE) 632 if (cnt >= NF_CT_EVICTION_RANGE)
633 break; 633 break;
634 634
635 hash = (hash + 1) % net->ct.htable_size; 635 hash = (hash + 1) % net->ct.htable_size;
636 } 636 }
637 rcu_read_unlock(); 637 rcu_read_unlock();
638 638
639 if (!ct) 639 if (!ct)
640 return dropped; 640 return dropped;
641 641
642 if (del_timer(&ct->timeout)) { 642 if (del_timer(&ct->timeout)) {
643 death_by_timeout((unsigned long)ct); 643 death_by_timeout((unsigned long)ct);
644 /* Check if we indeed killed this entry. Reliable event 644 /* Check if we indeed killed this entry. Reliable event
645 delivery may have inserted it into the dying list. */ 645 delivery may have inserted it into the dying list. */
646 if (test_bit(IPS_DYING_BIT, &ct->status)) { 646 if (test_bit(IPS_DYING_BIT, &ct->status)) {
647 dropped = 1; 647 dropped = 1;
648 NF_CT_STAT_INC_ATOMIC(net, early_drop); 648 NF_CT_STAT_INC_ATOMIC(net, early_drop);
649 } 649 }
650 } 650 }
651 nf_ct_put(ct); 651 nf_ct_put(ct);
652 return dropped; 652 return dropped;
653 } 653 }
654 654
655 void init_nf_conntrack_hash_rnd(void) 655 void init_nf_conntrack_hash_rnd(void)
656 { 656 {
657 unsigned int rand; 657 unsigned int rand;
658 658
659 /* 659 /*
660 * Why not initialize nf_conntrack_rnd in a "init()" function ? 660 * Why not initialize nf_conntrack_rnd in a "init()" function ?
661 * Because there isn't enough entropy when system initializing, 661 * Because there isn't enough entropy when system initializing,
662 * and we initialize it as late as possible. 662 * and we initialize it as late as possible.
663 */ 663 */
664 do { 664 do {
665 get_random_bytes(&rand, sizeof(rand)); 665 get_random_bytes(&rand, sizeof(rand));
666 } while (!rand); 666 } while (!rand);
667 cmpxchg(&nf_conntrack_hash_rnd, 0, rand); 667 cmpxchg(&nf_conntrack_hash_rnd, 0, rand);
668 } 668 }
669 669
670 static struct nf_conn * 670 static struct nf_conn *
671 __nf_conntrack_alloc(struct net *net, u16 zone, 671 __nf_conntrack_alloc(struct net *net, u16 zone,
672 const struct nf_conntrack_tuple *orig, 672 const struct nf_conntrack_tuple *orig,
673 const struct nf_conntrack_tuple *repl, 673 const struct nf_conntrack_tuple *repl,
674 gfp_t gfp, u32 hash) 674 gfp_t gfp, u32 hash)
675 { 675 {
676 struct nf_conn *ct; 676 struct nf_conn *ct;
677 677
678 if (unlikely(!nf_conntrack_hash_rnd)) { 678 if (unlikely(!nf_conntrack_hash_rnd)) {
679 init_nf_conntrack_hash_rnd(); 679 init_nf_conntrack_hash_rnd();
680 /* recompute the hash as nf_conntrack_hash_rnd is initialized */ 680 /* recompute the hash as nf_conntrack_hash_rnd is initialized */
681 hash = hash_conntrack_raw(orig, zone); 681 hash = hash_conntrack_raw(orig, zone);
682 } 682 }
683 683
684 /* We don't want any race condition at early drop stage */ 684 /* We don't want any race condition at early drop stage */
685 atomic_inc(&net->ct.count); 685 atomic_inc(&net->ct.count);
686 686
687 if (nf_conntrack_max && 687 if (nf_conntrack_max &&
688 unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { 688 unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
689 if (!early_drop(net, hash_bucket(hash, net))) { 689 if (!early_drop(net, hash_bucket(hash, net))) {
690 atomic_dec(&net->ct.count); 690 atomic_dec(&net->ct.count);
691 net_warn_ratelimited("nf_conntrack: table full, dropping packet\n"); 691 net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
692 return ERR_PTR(-ENOMEM); 692 return ERR_PTR(-ENOMEM);
693 } 693 }
694 } 694 }
695 695
696 /* 696 /*
697 * Do not use kmem_cache_zalloc(), as this cache uses 697 * Do not use kmem_cache_zalloc(), as this cache uses
698 * SLAB_DESTROY_BY_RCU. 698 * SLAB_DESTROY_BY_RCU.
699 */ 699 */
700 ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp); 700 ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp);
701 if (ct == NULL) { 701 if (ct == NULL) {
702 atomic_dec(&net->ct.count); 702 atomic_dec(&net->ct.count);
703 return ERR_PTR(-ENOMEM); 703 return ERR_PTR(-ENOMEM);
704 } 704 }
705 /* 705 /*
706 * Let ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.next 706 * Let ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.next
707 * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged. 707 * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged.
708 */ 708 */
709 memset(&ct->tuplehash[IP_CT_DIR_MAX], 0, 709 memset(&ct->tuplehash[IP_CT_DIR_MAX], 0,
710 offsetof(struct nf_conn, proto) - 710 offsetof(struct nf_conn, proto) -
711 offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX])); 711 offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
712 spin_lock_init(&ct->lock); 712 spin_lock_init(&ct->lock);
713 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; 713 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
714 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; 714 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
715 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; 715 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
716 /* save hash for reusing when confirming */ 716 /* save hash for reusing when confirming */
717 *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; 717 *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
718 /* Don't set timer yet: wait for confirmation */ 718 /* Don't set timer yet: wait for confirmation */
719 setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); 719 setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
720 write_pnet(&ct->ct_net, net); 720 write_pnet(&ct->ct_net, net);
721 #ifdef CONFIG_NF_CONNTRACK_ZONES 721 #ifdef CONFIG_NF_CONNTRACK_ZONES
722 if (zone) { 722 if (zone) {
723 struct nf_conntrack_zone *nf_ct_zone; 723 struct nf_conntrack_zone *nf_ct_zone;
724 724
725 nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, GFP_ATOMIC); 725 nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, GFP_ATOMIC);
726 if (!nf_ct_zone) 726 if (!nf_ct_zone)
727 goto out_free; 727 goto out_free;
728 nf_ct_zone->id = zone; 728 nf_ct_zone->id = zone;
729 } 729 }
730 #endif 730 #endif
731 /* 731 /*
732 * changes to lookup keys must be done before setting refcnt to 1 732 * changes to lookup keys must be done before setting refcnt to 1
733 */ 733 */
734 smp_wmb(); 734 smp_wmb();
735 atomic_set(&ct->ct_general.use, 1); 735 atomic_set(&ct->ct_general.use, 1);
736 return ct; 736 return ct;
737 737
738 #ifdef CONFIG_NF_CONNTRACK_ZONES 738 #ifdef CONFIG_NF_CONNTRACK_ZONES
739 out_free: 739 out_free:
740 atomic_dec(&net->ct.count); 740 atomic_dec(&net->ct.count);
741 kmem_cache_free(net->ct.nf_conntrack_cachep, ct); 741 kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
742 return ERR_PTR(-ENOMEM); 742 return ERR_PTR(-ENOMEM);
743 #endif 743 #endif
744 } 744 }
745 745
746 struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, 746 struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
747 const struct nf_conntrack_tuple *orig, 747 const struct nf_conntrack_tuple *orig,
748 const struct nf_conntrack_tuple *repl, 748 const struct nf_conntrack_tuple *repl,
749 gfp_t gfp) 749 gfp_t gfp)
750 { 750 {
751 return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0); 751 return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0);
752 } 752 }
753 EXPORT_SYMBOL_GPL(nf_conntrack_alloc); 753 EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
754 754
755 void nf_conntrack_free(struct nf_conn *ct) 755 void nf_conntrack_free(struct nf_conn *ct)
756 { 756 {
757 struct net *net = nf_ct_net(ct); 757 struct net *net = nf_ct_net(ct);
758 758
759 nf_ct_ext_destroy(ct); 759 nf_ct_ext_destroy(ct);
760 atomic_dec(&net->ct.count); 760 atomic_dec(&net->ct.count);
761 nf_ct_ext_free(ct); 761 nf_ct_ext_free(ct);
762 kmem_cache_free(net->ct.nf_conntrack_cachep, ct); 762 kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
763 } 763 }
764 EXPORT_SYMBOL_GPL(nf_conntrack_free); 764 EXPORT_SYMBOL_GPL(nf_conntrack_free);
765 765
766 /* Allocate a new conntrack: we return -ENOMEM if classification 766 /* Allocate a new conntrack: we return -ENOMEM if classification
767 failed due to stress. Otherwise it really is unclassifiable. */ 767 failed due to stress. Otherwise it really is unclassifiable. */
768 static struct nf_conntrack_tuple_hash * 768 static struct nf_conntrack_tuple_hash *
769 init_conntrack(struct net *net, struct nf_conn *tmpl, 769 init_conntrack(struct net *net, struct nf_conn *tmpl,
770 const struct nf_conntrack_tuple *tuple, 770 const struct nf_conntrack_tuple *tuple,
771 struct nf_conntrack_l3proto *l3proto, 771 struct nf_conntrack_l3proto *l3proto,
772 struct nf_conntrack_l4proto *l4proto, 772 struct nf_conntrack_l4proto *l4proto,
773 struct sk_buff *skb, 773 struct sk_buff *skb,
774 unsigned int dataoff, u32 hash) 774 unsigned int dataoff, u32 hash)
775 { 775 {
776 struct nf_conn *ct; 776 struct nf_conn *ct;
777 struct nf_conn_help *help; 777 struct nf_conn_help *help;
778 struct nf_conntrack_tuple repl_tuple; 778 struct nf_conntrack_tuple repl_tuple;
779 struct nf_conntrack_ecache *ecache; 779 struct nf_conntrack_ecache *ecache;
780 struct nf_conntrack_expect *exp; 780 struct nf_conntrack_expect *exp;
781 u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; 781 u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
782 struct nf_conn_timeout *timeout_ext; 782 struct nf_conn_timeout *timeout_ext;
783 unsigned int *timeouts; 783 unsigned int *timeouts;
784 784
785 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { 785 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
786 pr_debug("Can't invert tuple.\n"); 786 pr_debug("Can't invert tuple.\n");
787 return NULL; 787 return NULL;
788 } 788 }
789 789
790 ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC, 790 ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
791 hash); 791 hash);
792 if (IS_ERR(ct)) 792 if (IS_ERR(ct))
793 return (struct nf_conntrack_tuple_hash *)ct; 793 return (struct nf_conntrack_tuple_hash *)ct;
794 794
795 timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL; 795 timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
796 if (timeout_ext) 796 if (timeout_ext)
797 timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext); 797 timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext);
798 else 798 else
799 timeouts = l4proto->get_timeouts(net); 799 timeouts = l4proto->get_timeouts(net);
800 800
801 if (!l4proto->new(ct, skb, dataoff, timeouts)) { 801 if (!l4proto->new(ct, skb, dataoff, timeouts)) {
802 nf_conntrack_free(ct); 802 nf_conntrack_free(ct);
803 pr_debug("init conntrack: can't track with proto module\n"); 803 pr_debug("init conntrack: can't track with proto module\n");
804 return NULL; 804 return NULL;
805 } 805 }
806 806
807 if (timeout_ext) 807 if (timeout_ext)
808 nf_ct_timeout_ext_add(ct, timeout_ext->timeout, GFP_ATOMIC); 808 nf_ct_timeout_ext_add(ct, timeout_ext->timeout, GFP_ATOMIC);
809 809
810 nf_ct_acct_ext_add(ct, GFP_ATOMIC); 810 nf_ct_acct_ext_add(ct, GFP_ATOMIC);
811 nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); 811 nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
812 812
813 ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; 813 ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
814 nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, 814 nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
815 ecache ? ecache->expmask : 0, 815 ecache ? ecache->expmask : 0,
816 GFP_ATOMIC); 816 GFP_ATOMIC);
817 817
818 spin_lock_bh(&nf_conntrack_lock); 818 spin_lock_bh(&nf_conntrack_lock);
819 exp = nf_ct_find_expectation(net, zone, tuple); 819 exp = nf_ct_find_expectation(net, zone, tuple);
820 if (exp) { 820 if (exp) {
821 pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", 821 pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
822 ct, exp); 822 ct, exp);
823 /* Welcome, Mr. Bond. We've been expecting you... */ 823 /* Welcome, Mr. Bond. We've been expecting you... */
824 __set_bit(IPS_EXPECTED_BIT, &ct->status); 824 __set_bit(IPS_EXPECTED_BIT, &ct->status);
825 ct->master = exp->master; 825 ct->master = exp->master;
826 if (exp->helper) { 826 if (exp->helper) {
827 help = nf_ct_helper_ext_add(ct, exp->helper, 827 help = nf_ct_helper_ext_add(ct, exp->helper,
828 GFP_ATOMIC); 828 GFP_ATOMIC);
829 if (help) 829 if (help)
830 rcu_assign_pointer(help->helper, exp->helper); 830 rcu_assign_pointer(help->helper, exp->helper);
831 } 831 }
832 832
833 #ifdef CONFIG_NF_CONNTRACK_MARK 833 #ifdef CONFIG_NF_CONNTRACK_MARK
834 ct->mark = exp->master->mark; 834 ct->mark = exp->master->mark;
835 #endif 835 #endif
836 #ifdef CONFIG_NF_CONNTRACK_SECMARK 836 #ifdef CONFIG_NF_CONNTRACK_SECMARK
837 ct->secmark = exp->master->secmark; 837 ct->secmark = exp->master->secmark;
838 #endif 838 #endif
839 nf_conntrack_get(&ct->master->ct_general); 839 nf_conntrack_get(&ct->master->ct_general);
840 NF_CT_STAT_INC(net, expect_new); 840 NF_CT_STAT_INC(net, expect_new);
841 } else { 841 } else {
842 __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); 842 __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
843 NF_CT_STAT_INC(net, new); 843 NF_CT_STAT_INC(net, new);
844 } 844 }
845 845
846 /* Overload tuple linked list to put us in unconfirmed list. */ 846 /* Overload tuple linked list to put us in unconfirmed list. */
847 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, 847 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
848 &net->ct.unconfirmed); 848 &net->ct.unconfirmed);
849 849
850 spin_unlock_bh(&nf_conntrack_lock); 850 spin_unlock_bh(&nf_conntrack_lock);
851 851
852 if (exp) { 852 if (exp) {
853 if (exp->expectfn) 853 if (exp->expectfn)
854 exp->expectfn(ct, exp); 854 exp->expectfn(ct, exp);
855 nf_ct_expect_put(exp); 855 nf_ct_expect_put(exp);
856 } 856 }
857 857
858 return &ct->tuplehash[IP_CT_DIR_ORIGINAL]; 858 return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
859 } 859 }
860 860
861 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ 861 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
862 static inline struct nf_conn * 862 static inline struct nf_conn *
863 resolve_normal_ct(struct net *net, struct nf_conn *tmpl, 863 resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
864 struct sk_buff *skb, 864 struct sk_buff *skb,
865 unsigned int dataoff, 865 unsigned int dataoff,
866 u_int16_t l3num, 866 u_int16_t l3num,
867 u_int8_t protonum, 867 u_int8_t protonum,
868 struct nf_conntrack_l3proto *l3proto, 868 struct nf_conntrack_l3proto *l3proto,
869 struct nf_conntrack_l4proto *l4proto, 869 struct nf_conntrack_l4proto *l4proto,
870 int *set_reply, 870 int *set_reply,
871 enum ip_conntrack_info *ctinfo) 871 enum ip_conntrack_info *ctinfo)
872 { 872 {
873 struct nf_conntrack_tuple tuple; 873 struct nf_conntrack_tuple tuple;
874 struct nf_conntrack_tuple_hash *h; 874 struct nf_conntrack_tuple_hash *h;
875 struct nf_conn *ct; 875 struct nf_conn *ct;
876 u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; 876 u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
877 u32 hash; 877 u32 hash;
878 878
879 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), 879 if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
880 dataoff, l3num, protonum, &tuple, l3proto, 880 dataoff, l3num, protonum, &tuple, l3proto,
881 l4proto)) { 881 l4proto)) {
882 pr_debug("resolve_normal_ct: Can't get tuple\n"); 882 pr_debug("resolve_normal_ct: Can't get tuple\n");
883 return NULL; 883 return NULL;
884 } 884 }
885 885
886 /* look for tuple match */ 886 /* look for tuple match */
887 hash = hash_conntrack_raw(&tuple, zone); 887 hash = hash_conntrack_raw(&tuple, zone);
888 h = __nf_conntrack_find_get(net, zone, &tuple, hash); 888 h = __nf_conntrack_find_get(net, zone, &tuple, hash);
889 if (!h) { 889 if (!h) {
890 h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, 890 h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
891 skb, dataoff, hash); 891 skb, dataoff, hash);
892 if (!h) 892 if (!h)
893 return NULL; 893 return NULL;
894 if (IS_ERR(h)) 894 if (IS_ERR(h))
895 return (void *)h; 895 return (void *)h;
896 } 896 }
897 ct = nf_ct_tuplehash_to_ctrack(h); 897 ct = nf_ct_tuplehash_to_ctrack(h);
898 898
899 /* It exists; we have (non-exclusive) reference. */ 899 /* It exists; we have (non-exclusive) reference. */
900 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { 900 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
901 *ctinfo = IP_CT_ESTABLISHED_REPLY; 901 *ctinfo = IP_CT_ESTABLISHED_REPLY;
902 /* Please set reply bit if this packet OK */ 902 /* Please set reply bit if this packet OK */
903 *set_reply = 1; 903 *set_reply = 1;
904 } else { 904 } else {
905 /* Once we've had two way comms, always ESTABLISHED. */ 905 /* Once we've had two way comms, always ESTABLISHED. */
906 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { 906 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
907 pr_debug("nf_conntrack_in: normal packet for %p\n", ct); 907 pr_debug("nf_conntrack_in: normal packet for %p\n", ct);
908 *ctinfo = IP_CT_ESTABLISHED; 908 *ctinfo = IP_CT_ESTABLISHED;
909 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { 909 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
910 pr_debug("nf_conntrack_in: related packet for %p\n", 910 pr_debug("nf_conntrack_in: related packet for %p\n",
911 ct); 911 ct);
912 *ctinfo = IP_CT_RELATED; 912 *ctinfo = IP_CT_RELATED;
913 } else { 913 } else {
914 pr_debug("nf_conntrack_in: new packet for %p\n", ct); 914 pr_debug("nf_conntrack_in: new packet for %p\n", ct);
915 *ctinfo = IP_CT_NEW; 915 *ctinfo = IP_CT_NEW;
916 } 916 }
917 *set_reply = 0; 917 *set_reply = 0;
918 } 918 }
919 skb->nfct = &ct->ct_general; 919 skb->nfct = &ct->ct_general;
920 skb->nfctinfo = *ctinfo; 920 skb->nfctinfo = *ctinfo;
921 return ct; 921 return ct;
922 } 922 }
923 923
924 unsigned int 924 unsigned int
925 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, 925 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
926 struct sk_buff *skb) 926 struct sk_buff *skb)
927 { 927 {
928 struct nf_conn *ct, *tmpl = NULL; 928 struct nf_conn *ct, *tmpl = NULL;
929 enum ip_conntrack_info ctinfo; 929 enum ip_conntrack_info ctinfo;
930 struct nf_conntrack_l3proto *l3proto; 930 struct nf_conntrack_l3proto *l3proto;
931 struct nf_conntrack_l4proto *l4proto; 931 struct nf_conntrack_l4proto *l4proto;
932 unsigned int *timeouts; 932 unsigned int *timeouts;
933 unsigned int dataoff; 933 unsigned int dataoff;
934 u_int8_t protonum; 934 u_int8_t protonum;
935 int set_reply = 0; 935 int set_reply = 0;
936 int ret; 936 int ret;
937 937
938 if (skb->nfct) { 938 if (skb->nfct) {
939 /* Previously seen (loopback or untracked)? Ignore. */ 939 /* Previously seen (loopback or untracked)? Ignore. */
940 tmpl = (struct nf_conn *)skb->nfct; 940 tmpl = (struct nf_conn *)skb->nfct;
941 if (!nf_ct_is_template(tmpl)) { 941 if (!nf_ct_is_template(tmpl)) {
942 NF_CT_STAT_INC_ATOMIC(net, ignore); 942 NF_CT_STAT_INC_ATOMIC(net, ignore);
943 return NF_ACCEPT; 943 return NF_ACCEPT;
944 } 944 }
945 skb->nfct = NULL; 945 skb->nfct = NULL;
946 } 946 }
947 947
948 /* rcu_read_lock()ed by nf_hook_slow */ 948 /* rcu_read_lock()ed by nf_hook_slow */
949 l3proto = __nf_ct_l3proto_find(pf); 949 l3proto = __nf_ct_l3proto_find(pf);
950 ret = l3proto->get_l4proto(skb, skb_network_offset(skb), 950 ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
951 &dataoff, &protonum); 951 &dataoff, &protonum);
952 if (ret <= 0) { 952 if (ret <= 0) {
953 pr_debug("not prepared to track yet or error occurred\n"); 953 pr_debug("not prepared to track yet or error occurred\n");
954 NF_CT_STAT_INC_ATOMIC(net, error); 954 NF_CT_STAT_INC_ATOMIC(net, error);
955 NF_CT_STAT_INC_ATOMIC(net, invalid); 955 NF_CT_STAT_INC_ATOMIC(net, invalid);
956 ret = -ret; 956 ret = -ret;
957 goto out; 957 goto out;
958 } 958 }
959 959
960 l4proto = __nf_ct_l4proto_find(pf, protonum); 960 l4proto = __nf_ct_l4proto_find(pf, protonum);
961 961
962 /* It may be an special packet, error, unclean... 962 /* It may be an special packet, error, unclean...
963 * inverse of the return code tells to the netfilter 963 * inverse of the return code tells to the netfilter
964 * core what to do with the packet. */ 964 * core what to do with the packet. */
965 if (l4proto->error != NULL) { 965 if (l4proto->error != NULL) {
966 ret = l4proto->error(net, tmpl, skb, dataoff, &ctinfo, 966 ret = l4proto->error(net, tmpl, skb, dataoff, &ctinfo,
967 pf, hooknum); 967 pf, hooknum);
968 if (ret <= 0) { 968 if (ret <= 0) {
969 NF_CT_STAT_INC_ATOMIC(net, error); 969 NF_CT_STAT_INC_ATOMIC(net, error);
970 NF_CT_STAT_INC_ATOMIC(net, invalid); 970 NF_CT_STAT_INC_ATOMIC(net, invalid);
971 ret = -ret; 971 ret = -ret;
972 goto out; 972 goto out;
973 } 973 }
974 /* ICMP[v6] protocol trackers may assign one conntrack. */ 974 /* ICMP[v6] protocol trackers may assign one conntrack. */
975 if (skb->nfct) 975 if (skb->nfct)
976 goto out; 976 goto out;
977 } 977 }
978 978
979 ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, 979 ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
980 l3proto, l4proto, &set_reply, &ctinfo); 980 l3proto, l4proto, &set_reply, &ctinfo);
981 if (!ct) { 981 if (!ct) {
982 /* Not valid part of a connection */ 982 /* Not valid part of a connection */
983 NF_CT_STAT_INC_ATOMIC(net, invalid); 983 NF_CT_STAT_INC_ATOMIC(net, invalid);
984 ret = NF_ACCEPT; 984 ret = NF_ACCEPT;
985 goto out; 985 goto out;
986 } 986 }
987 987
988 if (IS_ERR(ct)) { 988 if (IS_ERR(ct)) {
989 /* Too stressed to deal. */ 989 /* Too stressed to deal. */
990 NF_CT_STAT_INC_ATOMIC(net, drop); 990 NF_CT_STAT_INC_ATOMIC(net, drop);
991 ret = NF_DROP; 991 ret = NF_DROP;
992 goto out; 992 goto out;
993 } 993 }
994 994
995 NF_CT_ASSERT(skb->nfct); 995 NF_CT_ASSERT(skb->nfct);
996 996
997 /* Decide what timeout policy we want to apply to this flow. */ 997 /* Decide what timeout policy we want to apply to this flow. */
998 timeouts = nf_ct_timeout_lookup(net, ct, l4proto); 998 timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
999 999
1000 ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts); 1000 ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts);
1001 if (ret <= 0) { 1001 if (ret <= 0) {
1002 /* Invalid: inverse of the return code tells 1002 /* Invalid: inverse of the return code tells
1003 * the netfilter core what to do */ 1003 * the netfilter core what to do */
1004 pr_debug("nf_conntrack_in: Can't track with proto module\n"); 1004 pr_debug("nf_conntrack_in: Can't track with proto module\n");
1005 nf_conntrack_put(skb->nfct); 1005 nf_conntrack_put(skb->nfct);
1006 skb->nfct = NULL; 1006 skb->nfct = NULL;
1007 NF_CT_STAT_INC_ATOMIC(net, invalid); 1007 NF_CT_STAT_INC_ATOMIC(net, invalid);
1008 if (ret == -NF_DROP) 1008 if (ret == -NF_DROP)
1009 NF_CT_STAT_INC_ATOMIC(net, drop); 1009 NF_CT_STAT_INC_ATOMIC(net, drop);
1010 ret = -ret; 1010 ret = -ret;
1011 goto out; 1011 goto out;
1012 } 1012 }
1013 1013
1014 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) 1014 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
1015 nf_conntrack_event_cache(IPCT_REPLY, ct); 1015 nf_conntrack_event_cache(IPCT_REPLY, ct);
1016 out: 1016 out:
1017 if (tmpl) { 1017 if (tmpl) {
1018 /* Special case: we have to repeat this hook, assign the 1018 /* Special case: we have to repeat this hook, assign the
1019 * template again to this packet. We assume that this packet 1019 * template again to this packet. We assume that this packet
1020 * has no conntrack assigned. This is used by nf_ct_tcp. */ 1020 * has no conntrack assigned. This is used by nf_ct_tcp. */
1021 if (ret == NF_REPEAT) 1021 if (ret == NF_REPEAT)
1022 skb->nfct = (struct nf_conntrack *)tmpl; 1022 skb->nfct = (struct nf_conntrack *)tmpl;
1023 else 1023 else
1024 nf_ct_put(tmpl); 1024 nf_ct_put(tmpl);
1025 } 1025 }
1026 1026
1027 return ret; 1027 return ret;
1028 } 1028 }
1029 EXPORT_SYMBOL_GPL(nf_conntrack_in); 1029 EXPORT_SYMBOL_GPL(nf_conntrack_in);
1030 1030
1031 bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, 1031 bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
1032 const struct nf_conntrack_tuple *orig) 1032 const struct nf_conntrack_tuple *orig)
1033 { 1033 {
1034 bool ret; 1034 bool ret;
1035 1035
1036 rcu_read_lock(); 1036 rcu_read_lock();
1037 ret = nf_ct_invert_tuple(inverse, orig, 1037 ret = nf_ct_invert_tuple(inverse, orig,
1038 __nf_ct_l3proto_find(orig->src.l3num), 1038 __nf_ct_l3proto_find(orig->src.l3num),
1039 __nf_ct_l4proto_find(orig->src.l3num, 1039 __nf_ct_l4proto_find(orig->src.l3num,
1040 orig->dst.protonum)); 1040 orig->dst.protonum));
1041 rcu_read_unlock(); 1041 rcu_read_unlock();
1042 return ret; 1042 return ret;
1043 } 1043 }
1044 EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr); 1044 EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr);
1045 1045
1046 /* Alter reply tuple (maybe alter helper). This is for NAT, and is 1046 /* Alter reply tuple (maybe alter helper). This is for NAT, and is
1047 implicitly racy: see __nf_conntrack_confirm */ 1047 implicitly racy: see __nf_conntrack_confirm */
1048 void nf_conntrack_alter_reply(struct nf_conn *ct, 1048 void nf_conntrack_alter_reply(struct nf_conn *ct,
1049 const struct nf_conntrack_tuple *newreply) 1049 const struct nf_conntrack_tuple *newreply)
1050 { 1050 {
1051 struct nf_conn_help *help = nfct_help(ct); 1051 struct nf_conn_help *help = nfct_help(ct);
1052 1052
1053 /* Should be unconfirmed, so not in hash table yet */ 1053 /* Should be unconfirmed, so not in hash table yet */
1054 NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); 1054 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
1055 1055
1056 pr_debug("Altering reply tuple of %p to ", ct); 1056 pr_debug("Altering reply tuple of %p to ", ct);
1057 nf_ct_dump_tuple(newreply); 1057 nf_ct_dump_tuple(newreply);
1058 1058
1059 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; 1059 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1060 if (ct->master || (help && !hlist_empty(&help->expectations))) 1060 if (ct->master || (help && !hlist_empty(&help->expectations)))
1061 return; 1061 return;
1062 1062
1063 rcu_read_lock(); 1063 rcu_read_lock();
1064 __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC); 1064 __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC);
1065 rcu_read_unlock(); 1065 rcu_read_unlock();
1066 } 1066 }
1067 EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); 1067 EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
1068 1068
1069 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ 1069 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1070 void __nf_ct_refresh_acct(struct nf_conn *ct, 1070 void __nf_ct_refresh_acct(struct nf_conn *ct,
1071 enum ip_conntrack_info ctinfo, 1071 enum ip_conntrack_info ctinfo,
1072 const struct sk_buff *skb, 1072 const struct sk_buff *skb,
1073 unsigned long extra_jiffies, 1073 unsigned long extra_jiffies,
1074 int do_acct) 1074 int do_acct)
1075 { 1075 {
1076 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); 1076 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
1077 NF_CT_ASSERT(skb); 1077 NF_CT_ASSERT(skb);
1078 1078
1079 /* Only update if this is not a fixed timeout */ 1079 /* Only update if this is not a fixed timeout */
1080 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) 1080 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
1081 goto acct; 1081 goto acct;
1082 1082
1083 /* If not in hash table, timer will not be active yet */ 1083 /* If not in hash table, timer will not be active yet */
1084 if (!nf_ct_is_confirmed(ct)) { 1084 if (!nf_ct_is_confirmed(ct)) {
1085 ct->timeout.expires = extra_jiffies; 1085 ct->timeout.expires = extra_jiffies;
1086 } else { 1086 } else {
1087 unsigned long newtime = jiffies + extra_jiffies; 1087 unsigned long newtime = jiffies + extra_jiffies;
1088 1088
1089 /* Only update the timeout if the new timeout is at least 1089 /* Only update the timeout if the new timeout is at least
1090 HZ jiffies from the old timeout. Need del_timer for race 1090 HZ jiffies from the old timeout. Need del_timer for race
1091 avoidance (may already be dying). */ 1091 avoidance (may already be dying). */
1092 if (newtime - ct->timeout.expires >= HZ) 1092 if (newtime - ct->timeout.expires >= HZ)
1093 mod_timer_pending(&ct->timeout, newtime); 1093 mod_timer_pending(&ct->timeout, newtime);
1094 } 1094 }
1095 1095
1096 acct: 1096 acct:
1097 if (do_acct) { 1097 if (do_acct) {
1098 struct nf_conn_counter *acct; 1098 struct nf_conn_counter *acct;
1099 1099
1100 acct = nf_conn_acct_find(ct); 1100 acct = nf_conn_acct_find(ct);
1101 if (acct) { 1101 if (acct) {
1102 atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets); 1102 atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets);
1103 atomic64_add(skb->len, &acct[CTINFO2DIR(ctinfo)].bytes); 1103 atomic64_add(skb->len, &acct[CTINFO2DIR(ctinfo)].bytes);
1104 } 1104 }
1105 } 1105 }
1106 } 1106 }
1107 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); 1107 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
1108 1108
1109 bool __nf_ct_kill_acct(struct nf_conn *ct, 1109 bool __nf_ct_kill_acct(struct nf_conn *ct,
1110 enum ip_conntrack_info ctinfo, 1110 enum ip_conntrack_info ctinfo,
1111 const struct sk_buff *skb, 1111 const struct sk_buff *skb,
1112 int do_acct) 1112 int do_acct)
1113 { 1113 {
1114 if (do_acct) { 1114 if (do_acct) {
1115 struct nf_conn_counter *acct; 1115 struct nf_conn_counter *acct;
1116 1116
1117 acct = nf_conn_acct_find(ct); 1117 acct = nf_conn_acct_find(ct);
1118 if (acct) { 1118 if (acct) {
1119 atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets); 1119 atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets);
1120 atomic64_add(skb->len - skb_network_offset(skb), 1120 atomic64_add(skb->len - skb_network_offset(skb),
1121 &acct[CTINFO2DIR(ctinfo)].bytes); 1121 &acct[CTINFO2DIR(ctinfo)].bytes);
1122 } 1122 }
1123 } 1123 }
1124 1124
1125 if (del_timer(&ct->timeout)) { 1125 if (del_timer(&ct->timeout)) {
1126 ct->timeout.function((unsigned long)ct); 1126 ct->timeout.function((unsigned long)ct);
1127 return true; 1127 return true;
1128 } 1128 }
1129 return false; 1129 return false;
1130 } 1130 }
1131 EXPORT_SYMBOL_GPL(__nf_ct_kill_acct); 1131 EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
1132 1132
1133 #ifdef CONFIG_NF_CONNTRACK_ZONES 1133 #ifdef CONFIG_NF_CONNTRACK_ZONES
1134 static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = { 1134 static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = {
1135 .len = sizeof(struct nf_conntrack_zone), 1135 .len = sizeof(struct nf_conntrack_zone),
1136 .align = __alignof__(struct nf_conntrack_zone), 1136 .align = __alignof__(struct nf_conntrack_zone),
1137 .id = NF_CT_EXT_ZONE, 1137 .id = NF_CT_EXT_ZONE,
1138 }; 1138 };
1139 #endif 1139 #endif
1140 1140
1141 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 1141 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1142 1142
1143 #include <linux/netfilter/nfnetlink.h> 1143 #include <linux/netfilter/nfnetlink.h>
1144 #include <linux/netfilter/nfnetlink_conntrack.h> 1144 #include <linux/netfilter/nfnetlink_conntrack.h>
1145 #include <linux/mutex.h> 1145 #include <linux/mutex.h>
1146 1146
1147 /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be 1147 /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1148 * in ip_conntrack_core, since we don't want the protocols to autoload 1148 * in ip_conntrack_core, since we don't want the protocols to autoload
1149 * or depend on ctnetlink */ 1149 * or depend on ctnetlink */
1150 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, 1150 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
1151 const struct nf_conntrack_tuple *tuple) 1151 const struct nf_conntrack_tuple *tuple)
1152 { 1152 {
1153 if (nla_put_be16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port) || 1153 if (nla_put_be16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port) ||
1154 nla_put_be16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port)) 1154 nla_put_be16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port))
1155 goto nla_put_failure; 1155 goto nla_put_failure;
1156 return 0; 1156 return 0;
1157 1157
1158 nla_put_failure: 1158 nla_put_failure:
1159 return -1; 1159 return -1;
1160 } 1160 }
1161 EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr); 1161 EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr);
1162 1162
1163 const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = { 1163 const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = {
1164 [CTA_PROTO_SRC_PORT] = { .type = NLA_U16 }, 1164 [CTA_PROTO_SRC_PORT] = { .type = NLA_U16 },
1165 [CTA_PROTO_DST_PORT] = { .type = NLA_U16 }, 1165 [CTA_PROTO_DST_PORT] = { .type = NLA_U16 },
1166 }; 1166 };
1167 EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy); 1167 EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy);
1168 1168
1169 int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], 1169 int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
1170 struct nf_conntrack_tuple *t) 1170 struct nf_conntrack_tuple *t)
1171 { 1171 {
1172 if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT]) 1172 if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT])
1173 return -EINVAL; 1173 return -EINVAL;
1174 1174
1175 t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]); 1175 t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]);
1176 t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]); 1176 t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]);
1177 1177
1178 return 0; 1178 return 0;
1179 } 1179 }
1180 EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple); 1180 EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
1181 1181
1182 int nf_ct_port_nlattr_tuple_size(void) 1182 int nf_ct_port_nlattr_tuple_size(void)
1183 { 1183 {
1184 return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); 1184 return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1185 } 1185 }
1186 EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size); 1186 EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
1187 #endif 1187 #endif
1188 1188
1189 /* Used by ipt_REJECT and ip6t_REJECT. */ 1189 /* Used by ipt_REJECT and ip6t_REJECT. */
1190 static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) 1190 static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
1191 { 1191 {
1192 struct nf_conn *ct; 1192 struct nf_conn *ct;
1193 enum ip_conntrack_info ctinfo; 1193 enum ip_conntrack_info ctinfo;
1194 1194
1195 /* This ICMP is in reverse direction to the packet which caused it */ 1195 /* This ICMP is in reverse direction to the packet which caused it */
1196 ct = nf_ct_get(skb, &ctinfo); 1196 ct = nf_ct_get(skb, &ctinfo);
1197 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) 1197 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1198 ctinfo = IP_CT_RELATED_REPLY; 1198 ctinfo = IP_CT_RELATED_REPLY;
1199 else 1199 else
1200 ctinfo = IP_CT_RELATED; 1200 ctinfo = IP_CT_RELATED;
1201 1201
1202 /* Attach to new skbuff, and increment count */ 1202 /* Attach to new skbuff, and increment count */
1203 nskb->nfct = &ct->ct_general; 1203 nskb->nfct = &ct->ct_general;
1204 nskb->nfctinfo = ctinfo; 1204 nskb->nfctinfo = ctinfo;
1205 nf_conntrack_get(nskb->nfct); 1205 nf_conntrack_get(nskb->nfct);
1206 } 1206 }
1207 1207
1208 /* Bring out ya dead! */ 1208 /* Bring out ya dead! */
1209 static struct nf_conn * 1209 static struct nf_conn *
1210 get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), 1210 get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
1211 void *data, unsigned int *bucket) 1211 void *data, unsigned int *bucket)
1212 { 1212 {
1213 struct nf_conntrack_tuple_hash *h; 1213 struct nf_conntrack_tuple_hash *h;
1214 struct nf_conn *ct; 1214 struct nf_conn *ct;
1215 struct hlist_nulls_node *n; 1215 struct hlist_nulls_node *n;
1216 1216
1217 spin_lock_bh(&nf_conntrack_lock); 1217 spin_lock_bh(&nf_conntrack_lock);
1218 for (; *bucket < net->ct.htable_size; (*bucket)++) { 1218 for (; *bucket < net->ct.htable_size; (*bucket)++) {
1219 hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) { 1219 hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
1220 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) 1220 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
1221 continue; 1221 continue;
1222 ct = nf_ct_tuplehash_to_ctrack(h); 1222 ct = nf_ct_tuplehash_to_ctrack(h);
1223 if (iter(ct, data)) 1223 if (iter(ct, data))
1224 goto found; 1224 goto found;
1225 } 1225 }
1226 } 1226 }
1227 hlist_nulls_for_each_entry(h, n, &net->ct.unconfirmed, hnnode) { 1227 hlist_nulls_for_each_entry(h, n, &net->ct.unconfirmed, hnnode) {
1228 ct = nf_ct_tuplehash_to_ctrack(h); 1228 ct = nf_ct_tuplehash_to_ctrack(h);
1229 if (iter(ct, data)) 1229 if (iter(ct, data))
1230 set_bit(IPS_DYING_BIT, &ct->status); 1230 set_bit(IPS_DYING_BIT, &ct->status);
1231 } 1231 }
1232 spin_unlock_bh(&nf_conntrack_lock); 1232 spin_unlock_bh(&nf_conntrack_lock);
1233 return NULL; 1233 return NULL;
1234 found: 1234 found:
1235 atomic_inc(&ct->ct_general.use); 1235 atomic_inc(&ct->ct_general.use);
1236 spin_unlock_bh(&nf_conntrack_lock); 1236 spin_unlock_bh(&nf_conntrack_lock);
1237 return ct; 1237 return ct;
1238 } 1238 }
1239 1239
1240 void nf_ct_iterate_cleanup(struct net *net, 1240 void nf_ct_iterate_cleanup(struct net *net,
1241 int (*iter)(struct nf_conn *i, void *data), 1241 int (*iter)(struct nf_conn *i, void *data),
1242 void *data) 1242 void *data)
1243 { 1243 {
1244 struct nf_conn *ct; 1244 struct nf_conn *ct;
1245 unsigned int bucket = 0; 1245 unsigned int bucket = 0;
1246 1246
1247 while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { 1247 while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
1248 /* Time to push up daises... */ 1248 /* Time to push up daises... */
1249 if (del_timer(&ct->timeout)) 1249 if (del_timer(&ct->timeout))
1250 death_by_timeout((unsigned long)ct); 1250 death_by_timeout((unsigned long)ct);
1251 /* ... else the timer will get him soon. */ 1251 /* ... else the timer will get him soon. */
1252 1252
1253 nf_ct_put(ct); 1253 nf_ct_put(ct);
1254 } 1254 }
1255 } 1255 }
1256 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); 1256 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
1257 1257
1258 struct __nf_ct_flush_report { 1258 struct __nf_ct_flush_report {
1259 u32 pid; 1259 u32 pid;
1260 int report; 1260 int report;
1261 }; 1261 };
1262 1262
1263 static int kill_report(struct nf_conn *i, void *data) 1263 static int kill_report(struct nf_conn *i, void *data)
1264 { 1264 {
1265 struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; 1265 struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
1266 struct nf_conn_tstamp *tstamp; 1266 struct nf_conn_tstamp *tstamp;
1267 1267
1268 tstamp = nf_conn_tstamp_find(i); 1268 tstamp = nf_conn_tstamp_find(i);
1269 if (tstamp && tstamp->stop == 0) 1269 if (tstamp && tstamp->stop == 0)
1270 tstamp->stop = ktime_to_ns(ktime_get_real()); 1270 tstamp->stop = ktime_to_ns(ktime_get_real());
1271 1271
1272 /* If we fail to deliver the event, death_by_timeout() will retry */ 1272 /* If we fail to deliver the event, death_by_timeout() will retry */
1273 if (nf_conntrack_event_report(IPCT_DESTROY, i, 1273 if (nf_conntrack_event_report(IPCT_DESTROY, i,
1274 fr->pid, fr->report) < 0) 1274 fr->pid, fr->report) < 0)
1275 return 1; 1275 return 1;
1276 1276
1277 /* Avoid the delivery of the destroy event in death_by_timeout(). */ 1277 /* Avoid the delivery of the destroy event in death_by_timeout(). */
1278 set_bit(IPS_DYING_BIT, &i->status); 1278 set_bit(IPS_DYING_BIT, &i->status);
1279 return 1; 1279 return 1;
1280 } 1280 }
1281 1281
1282 static int kill_all(struct nf_conn *i, void *data) 1282 static int kill_all(struct nf_conn *i, void *data)
1283 { 1283 {
1284 return 1; 1284 return 1;
1285 } 1285 }
1286 1286
1287 void nf_ct_free_hashtable(void *hash, unsigned int size) 1287 void nf_ct_free_hashtable(void *hash, unsigned int size)
1288 { 1288 {
1289 if (is_vmalloc_addr(hash)) 1289 if (is_vmalloc_addr(hash))
1290 vfree(hash); 1290 vfree(hash);
1291 else 1291 else
1292 free_pages((unsigned long)hash, 1292 free_pages((unsigned long)hash,
1293 get_order(sizeof(struct hlist_head) * size)); 1293 get_order(sizeof(struct hlist_head) * size));
1294 } 1294 }
1295 EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); 1295 EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
1296 1296
1297 void nf_conntrack_flush_report(struct net *net, u32 pid, int report) 1297 void nf_conntrack_flush_report(struct net *net, u32 pid, int report)
1298 { 1298 {
1299 struct __nf_ct_flush_report fr = { 1299 struct __nf_ct_flush_report fr = {
1300 .pid = pid, 1300 .pid = pid,
1301 .report = report, 1301 .report = report,
1302 }; 1302 };
1303 nf_ct_iterate_cleanup(net, kill_report, &fr); 1303 nf_ct_iterate_cleanup(net, kill_report, &fr);
1304 } 1304 }
1305 EXPORT_SYMBOL_GPL(nf_conntrack_flush_report); 1305 EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
1306 1306
1307 static void nf_ct_release_dying_list(struct net *net) 1307 static void nf_ct_release_dying_list(struct net *net)
1308 { 1308 {
1309 struct nf_conntrack_tuple_hash *h; 1309 struct nf_conntrack_tuple_hash *h;
1310 struct nf_conn *ct; 1310 struct nf_conn *ct;
1311 struct hlist_nulls_node *n; 1311 struct hlist_nulls_node *n;
1312 1312
1313 spin_lock_bh(&nf_conntrack_lock); 1313 spin_lock_bh(&nf_conntrack_lock);
1314 hlist_nulls_for_each_entry(h, n, &net->ct.dying, hnnode) { 1314 hlist_nulls_for_each_entry(h, n, &net->ct.dying, hnnode) {
1315 ct = nf_ct_tuplehash_to_ctrack(h); 1315 ct = nf_ct_tuplehash_to_ctrack(h);
1316 /* never fails to remove them, no listeners at this point */ 1316 /* never fails to remove them, no listeners at this point */
1317 nf_ct_kill(ct); 1317 nf_ct_kill(ct);
1318 } 1318 }
1319 spin_unlock_bh(&nf_conntrack_lock); 1319 spin_unlock_bh(&nf_conntrack_lock);
1320 } 1320 }
1321 1321
1322 static int untrack_refs(void) 1322 static int untrack_refs(void)
1323 { 1323 {
1324 int cnt = 0, cpu; 1324 int cnt = 0, cpu;
1325 1325
1326 for_each_possible_cpu(cpu) { 1326 for_each_possible_cpu(cpu) {
1327 struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu); 1327 struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
1328 1328
1329 cnt += atomic_read(&ct->ct_general.use) - 1; 1329 cnt += atomic_read(&ct->ct_general.use) - 1;
1330 } 1330 }
1331 return cnt; 1331 return cnt;
1332 } 1332 }
1333 1333
1334 static void nf_conntrack_cleanup_init_net(void) 1334 static void nf_conntrack_cleanup_init_net(void)
1335 { 1335 {
1336 while (untrack_refs() > 0) 1336 while (untrack_refs() > 0)
1337 schedule(); 1337 schedule();
1338 1338
1339 #ifdef CONFIG_NF_CONNTRACK_ZONES 1339 #ifdef CONFIG_NF_CONNTRACK_ZONES
1340 nf_ct_extend_unregister(&nf_ct_zone_extend); 1340 nf_ct_extend_unregister(&nf_ct_zone_extend);
1341 #endif 1341 #endif
1342 } 1342 }
1343 1343
1344 static void nf_conntrack_cleanup_net(struct net *net) 1344 static void nf_conntrack_cleanup_net(struct net *net)
1345 { 1345 {
1346 i_see_dead_people: 1346 i_see_dead_people:
1347 nf_ct_iterate_cleanup(net, kill_all, NULL); 1347 nf_ct_iterate_cleanup(net, kill_all, NULL);
1348 nf_ct_release_dying_list(net); 1348 nf_ct_release_dying_list(net);
1349 if (atomic_read(&net->ct.count) != 0) { 1349 if (atomic_read(&net->ct.count) != 0) {
1350 schedule(); 1350 schedule();
1351 goto i_see_dead_people; 1351 goto i_see_dead_people;
1352 } 1352 }
1353 1353
1354 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); 1354 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
1355 nf_conntrack_helper_fini(net); 1355 nf_conntrack_helper_fini(net);
1356 nf_conntrack_timeout_fini(net); 1356 nf_conntrack_timeout_fini(net);
1357 nf_conntrack_ecache_fini(net); 1357 nf_conntrack_ecache_fini(net);
1358 nf_conntrack_tstamp_fini(net); 1358 nf_conntrack_tstamp_fini(net);
1359 nf_conntrack_acct_fini(net); 1359 nf_conntrack_acct_fini(net);
1360 nf_conntrack_expect_fini(net); 1360 nf_conntrack_expect_fini(net);
1361 kmem_cache_destroy(net->ct.nf_conntrack_cachep); 1361 kmem_cache_destroy(net->ct.nf_conntrack_cachep);
1362 kfree(net->ct.slabname); 1362 kfree(net->ct.slabname);
1363 free_percpu(net->ct.stat); 1363 free_percpu(net->ct.stat);
1364 } 1364 }
1365 1365
1366 /* Mishearing the voices in his head, our hero wonders how he's 1366 /* Mishearing the voices in his head, our hero wonders how he's
1367 supposed to kill the mall. */ 1367 supposed to kill the mall. */
1368 void nf_conntrack_cleanup(struct net *net) 1368 void nf_conntrack_cleanup(struct net *net)
1369 { 1369 {
1370 if (net_eq(net, &init_net)) 1370 if (net_eq(net, &init_net))
1371 RCU_INIT_POINTER(ip_ct_attach, NULL); 1371 RCU_INIT_POINTER(ip_ct_attach, NULL);
1372 1372
1373 /* This makes sure all current packets have passed through 1373 /* This makes sure all current packets have passed through
1374 netfilter framework. Roll on, two-stage module 1374 netfilter framework. Roll on, two-stage module
1375 delete... */ 1375 delete... */
1376 synchronize_net(); 1376 synchronize_net();
1377 nf_conntrack_proto_fini(net); 1377 nf_conntrack_proto_fini(net);
1378 nf_conntrack_cleanup_net(net); 1378 nf_conntrack_cleanup_net(net);
1379 }
1379 1380
1380 if (net_eq(net, &init_net)) { 1381 void nf_conntrack_cleanup_end(void)
1381 RCU_INIT_POINTER(nf_ct_destroy, NULL); 1382 {
1382 nf_conntrack_cleanup_init_net(); 1383 RCU_INIT_POINTER(nf_ct_destroy, NULL);
1383 } 1384 nf_conntrack_cleanup_init_net();
1384 } 1385 }
1385 1386
1386 void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) 1387 void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
1387 { 1388 {
1388 struct hlist_nulls_head *hash; 1389 struct hlist_nulls_head *hash;
1389 unsigned int nr_slots, i; 1390 unsigned int nr_slots, i;
1390 size_t sz; 1391 size_t sz;
1391 1392
1392 BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); 1393 BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
1393 nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); 1394 nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
1394 sz = nr_slots * sizeof(struct hlist_nulls_head); 1395 sz = nr_slots * sizeof(struct hlist_nulls_head);
1395 hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1396 hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1396 get_order(sz)); 1397 get_order(sz));
1397 if (!hash) { 1398 if (!hash) {
1398 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); 1399 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
1399 hash = vzalloc(sz); 1400 hash = vzalloc(sz);
1400 } 1401 }
1401 1402
1402 if (hash && nulls) 1403 if (hash && nulls)
1403 for (i = 0; i < nr_slots; i++) 1404 for (i = 0; i < nr_slots; i++)
1404 INIT_HLIST_NULLS_HEAD(&hash[i], i); 1405 INIT_HLIST_NULLS_HEAD(&hash[i], i);
1405 1406
1406 return hash; 1407 return hash;
1407 } 1408 }
1408 EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable); 1409 EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
1409 1410
1410 int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) 1411 int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1411 { 1412 {
1412 int i, bucket, rc; 1413 int i, bucket, rc;
1413 unsigned int hashsize, old_size; 1414 unsigned int hashsize, old_size;
1414 struct hlist_nulls_head *hash, *old_hash; 1415 struct hlist_nulls_head *hash, *old_hash;
1415 struct nf_conntrack_tuple_hash *h; 1416 struct nf_conntrack_tuple_hash *h;
1416 struct nf_conn *ct; 1417 struct nf_conn *ct;
1417 1418
1418 if (current->nsproxy->net_ns != &init_net) 1419 if (current->nsproxy->net_ns != &init_net)
1419 return -EOPNOTSUPP; 1420 return -EOPNOTSUPP;
1420 1421
1421 /* On boot, we can set this without any fancy locking. */ 1422 /* On boot, we can set this without any fancy locking. */
1422 if (!nf_conntrack_htable_size) 1423 if (!nf_conntrack_htable_size)
1423 return param_set_uint(val, kp); 1424 return param_set_uint(val, kp);
1424 1425
1425 rc = kstrtouint(val, 0, &hashsize); 1426 rc = kstrtouint(val, 0, &hashsize);
1426 if (rc) 1427 if (rc)
1427 return rc; 1428 return rc;
1428 if (!hashsize) 1429 if (!hashsize)
1429 return -EINVAL; 1430 return -EINVAL;
1430 1431
1431 hash = nf_ct_alloc_hashtable(&hashsize, 1); 1432 hash = nf_ct_alloc_hashtable(&hashsize, 1);
1432 if (!hash) 1433 if (!hash)
1433 return -ENOMEM; 1434 return -ENOMEM;
1434 1435
1435 /* Lookups in the old hash might happen in parallel, which means we 1436 /* Lookups in the old hash might happen in parallel, which means we
1436 * might get false negatives during connection lookup. New connections 1437 * might get false negatives during connection lookup. New connections
1437 * created because of a false negative won't make it into the hash 1438 * created because of a false negative won't make it into the hash
1438 * though since that required taking the lock. 1439 * though since that required taking the lock.
1439 */ 1440 */
1440 spin_lock_bh(&nf_conntrack_lock); 1441 spin_lock_bh(&nf_conntrack_lock);
1441 for (i = 0; i < init_net.ct.htable_size; i++) { 1442 for (i = 0; i < init_net.ct.htable_size; i++) {
1442 while (!hlist_nulls_empty(&init_net.ct.hash[i])) { 1443 while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
1443 h = hlist_nulls_entry(init_net.ct.hash[i].first, 1444 h = hlist_nulls_entry(init_net.ct.hash[i].first,
1444 struct nf_conntrack_tuple_hash, hnnode); 1445 struct nf_conntrack_tuple_hash, hnnode);
1445 ct = nf_ct_tuplehash_to_ctrack(h); 1446 ct = nf_ct_tuplehash_to_ctrack(h);
1446 hlist_nulls_del_rcu(&h->hnnode); 1447 hlist_nulls_del_rcu(&h->hnnode);
1447 bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct), 1448 bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct),
1448 hashsize); 1449 hashsize);
1449 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); 1450 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
1450 } 1451 }
1451 } 1452 }
1452 old_size = init_net.ct.htable_size; 1453 old_size = init_net.ct.htable_size;
1453 old_hash = init_net.ct.hash; 1454 old_hash = init_net.ct.hash;
1454 1455
1455 init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; 1456 init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
1456 init_net.ct.hash = hash; 1457 init_net.ct.hash = hash;
1457 spin_unlock_bh(&nf_conntrack_lock); 1458 spin_unlock_bh(&nf_conntrack_lock);
1458 1459
1459 nf_ct_free_hashtable(old_hash, old_size); 1460 nf_ct_free_hashtable(old_hash, old_size);
1460 return 0; 1461 return 0;
1461 } 1462 }
1462 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); 1463 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
1463 1464
1464 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, 1465 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
1465 &nf_conntrack_htable_size, 0600); 1466 &nf_conntrack_htable_size, 0600);
1466 1467
1467 void nf_ct_untracked_status_or(unsigned long bits) 1468 void nf_ct_untracked_status_or(unsigned long bits)
1468 { 1469 {
1469 int cpu; 1470 int cpu;
1470 1471
1471 for_each_possible_cpu(cpu) 1472 for_each_possible_cpu(cpu)
1472 per_cpu(nf_conntrack_untracked, cpu).status |= bits; 1473 per_cpu(nf_conntrack_untracked, cpu).status |= bits;
1473 } 1474 }
1474 EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or); 1475 EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
1475 1476
1476 static int nf_conntrack_init_init_net(void) 1477 static int nf_conntrack_init_init_net(void)
1477 { 1478 {
1478 int max_factor = 8; 1479 int max_factor = 8;
1479 int ret, cpu; 1480 int ret, cpu;
1480 1481
1481 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB 1482 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1482 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */ 1483 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
1483 if (!nf_conntrack_htable_size) { 1484 if (!nf_conntrack_htable_size) {
1484 nf_conntrack_htable_size 1485 nf_conntrack_htable_size
1485 = (((totalram_pages << PAGE_SHIFT) / 16384) 1486 = (((totalram_pages << PAGE_SHIFT) / 16384)
1486 / sizeof(struct hlist_head)); 1487 / sizeof(struct hlist_head));
1487 if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE)) 1488 if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
1488 nf_conntrack_htable_size = 16384; 1489 nf_conntrack_htable_size = 16384;
1489 if (nf_conntrack_htable_size < 32) 1490 if (nf_conntrack_htable_size < 32)
1490 nf_conntrack_htable_size = 32; 1491 nf_conntrack_htable_size = 32;
1491 1492
1492 /* Use a max. factor of four by default to get the same max as 1493 /* Use a max. factor of four by default to get the same max as
1493 * with the old struct list_heads. When a table size is given 1494 * with the old struct list_heads. When a table size is given
1494 * we use the old value of 8 to avoid reducing the max. 1495 * we use the old value of 8 to avoid reducing the max.
1495 * entries. */ 1496 * entries. */
1496 max_factor = 4; 1497 max_factor = 4;
1497 } 1498 }
1498 nf_conntrack_max = max_factor * nf_conntrack_htable_size; 1499 nf_conntrack_max = max_factor * nf_conntrack_htable_size;
1499 1500
1500 printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n", 1501 printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
1501 NF_CONNTRACK_VERSION, nf_conntrack_htable_size, 1502 NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
1502 nf_conntrack_max); 1503 nf_conntrack_max);
1503 #ifdef CONFIG_NF_CONNTRACK_ZONES 1504 #ifdef CONFIG_NF_CONNTRACK_ZONES
1504 ret = nf_ct_extend_register(&nf_ct_zone_extend); 1505 ret = nf_ct_extend_register(&nf_ct_zone_extend);
1505 if (ret < 0) 1506 if (ret < 0)
1506 goto err_extend; 1507 goto err_extend;
1507 #endif 1508 #endif
1508 /* Set up fake conntrack: to never be deleted, not in any hashes */ 1509 /* Set up fake conntrack: to never be deleted, not in any hashes */
1509 for_each_possible_cpu(cpu) { 1510 for_each_possible_cpu(cpu) {
1510 struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu); 1511 struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
1511 write_pnet(&ct->ct_net, &init_net); 1512 write_pnet(&ct->ct_net, &init_net);
1512 atomic_set(&ct->ct_general.use, 1); 1513 atomic_set(&ct->ct_general.use, 1);
1513 } 1514 }
1514 /* - and look it like as a confirmed connection */ 1515 /* - and look it like as a confirmed connection */
1515 nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); 1516 nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
1516 return 0; 1517 return 0;
1517 1518
1518 #ifdef CONFIG_NF_CONNTRACK_ZONES 1519 #ifdef CONFIG_NF_CONNTRACK_ZONES
1519 err_extend: 1520 err_extend:
1520 #endif 1521 #endif
1521 return ret; 1522 return ret;
1522 } 1523 }
1523 1524
1524 /* 1525 /*
1525 * We need to use special "null" values, not used in hash table 1526 * We need to use special "null" values, not used in hash table
1526 */ 1527 */
1527 #define UNCONFIRMED_NULLS_VAL ((1<<30)+0) 1528 #define UNCONFIRMED_NULLS_VAL ((1<<30)+0)
1528 #define DYING_NULLS_VAL ((1<<30)+1) 1529 #define DYING_NULLS_VAL ((1<<30)+1)
1529 #define TEMPLATE_NULLS_VAL ((1<<30)+2) 1530 #define TEMPLATE_NULLS_VAL ((1<<30)+2)
1530 1531
1531 static int nf_conntrack_init_net(struct net *net) 1532 static int nf_conntrack_init_net(struct net *net)
1532 { 1533 {
1533 int ret; 1534 int ret;
1534 1535
1535 atomic_set(&net->ct.count, 0); 1536 atomic_set(&net->ct.count, 0);
1536 INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL); 1537 INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL);
1537 INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL); 1538 INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL);
1538 INIT_HLIST_NULLS_HEAD(&net->ct.tmpl, TEMPLATE_NULLS_VAL); 1539 INIT_HLIST_NULLS_HEAD(&net->ct.tmpl, TEMPLATE_NULLS_VAL);
1539 net->ct.stat = alloc_percpu(struct ip_conntrack_stat); 1540 net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
1540 if (!net->ct.stat) { 1541 if (!net->ct.stat) {
1541 ret = -ENOMEM; 1542 ret = -ENOMEM;
1542 goto err_stat; 1543 goto err_stat;
1543 } 1544 }
1544 1545
1545 net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net); 1546 net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
1546 if (!net->ct.slabname) { 1547 if (!net->ct.slabname) {
1547 ret = -ENOMEM; 1548 ret = -ENOMEM;
1548 goto err_slabname; 1549 goto err_slabname;
1549 } 1550 }
1550 1551
1551 net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname, 1552 net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname,
1552 sizeof(struct nf_conn), 0, 1553 sizeof(struct nf_conn), 0,
1553 SLAB_DESTROY_BY_RCU, NULL); 1554 SLAB_DESTROY_BY_RCU, NULL);
1554 if (!net->ct.nf_conntrack_cachep) { 1555 if (!net->ct.nf_conntrack_cachep) {
1555 printk(KERN_ERR "Unable to create nf_conn slab cache\n"); 1556 printk(KERN_ERR "Unable to create nf_conn slab cache\n");
1556 ret = -ENOMEM; 1557 ret = -ENOMEM;
1557 goto err_cache; 1558 goto err_cache;
1558 } 1559 }
1559 1560
1560 net->ct.htable_size = nf_conntrack_htable_size; 1561 net->ct.htable_size = nf_conntrack_htable_size;
1561 net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1); 1562 net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
1562 if (!net->ct.hash) { 1563 if (!net->ct.hash) {
1563 ret = -ENOMEM; 1564 ret = -ENOMEM;
1564 printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); 1565 printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
1565 goto err_hash; 1566 goto err_hash;
1566 } 1567 }
1567 ret = nf_conntrack_expect_init(net); 1568 ret = nf_conntrack_expect_init(net);
1568 if (ret < 0) 1569 if (ret < 0)
1569 goto err_expect; 1570 goto err_expect;
1570 ret = nf_conntrack_acct_init(net); 1571 ret = nf_conntrack_acct_init(net);
1571 if (ret < 0) 1572 if (ret < 0)
1572 goto err_acct; 1573 goto err_acct;
1573 ret = nf_conntrack_tstamp_init(net); 1574 ret = nf_conntrack_tstamp_init(net);
1574 if (ret < 0) 1575 if (ret < 0)
1575 goto err_tstamp; 1576 goto err_tstamp;
1576 ret = nf_conntrack_ecache_init(net); 1577 ret = nf_conntrack_ecache_init(net);
1577 if (ret < 0) 1578 if (ret < 0)
1578 goto err_ecache; 1579 goto err_ecache;
1579 ret = nf_conntrack_timeout_init(net); 1580 ret = nf_conntrack_timeout_init(net);
1580 if (ret < 0) 1581 if (ret < 0)
1581 goto err_timeout; 1582 goto err_timeout;
1582 ret = nf_conntrack_helper_init(net); 1583 ret = nf_conntrack_helper_init(net);
1583 if (ret < 0) 1584 if (ret < 0)
1584 goto err_helper; 1585 goto err_helper;
1585 return 0; 1586 return 0;
1586 err_helper: 1587 err_helper:
1587 nf_conntrack_timeout_fini(net); 1588 nf_conntrack_timeout_fini(net);
1588 err_timeout: 1589 err_timeout:
1589 nf_conntrack_ecache_fini(net); 1590 nf_conntrack_ecache_fini(net);
1590 err_ecache: 1591 err_ecache:
1591 nf_conntrack_tstamp_fini(net); 1592 nf_conntrack_tstamp_fini(net);
1592 err_tstamp: 1593 err_tstamp:
1593 nf_conntrack_acct_fini(net); 1594 nf_conntrack_acct_fini(net);
1594 err_acct: 1595 err_acct:
1595 nf_conntrack_expect_fini(net); 1596 nf_conntrack_expect_fini(net);
1596 err_expect: 1597 err_expect:
1597 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); 1598 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
1598 err_hash: 1599 err_hash:
1599 kmem_cache_destroy(net->ct.nf_conntrack_cachep); 1600 kmem_cache_destroy(net->ct.nf_conntrack_cachep);
1600 err_cache: 1601 err_cache:
1601 kfree(net->ct.slabname); 1602 kfree(net->ct.slabname);
1602 err_slabname: 1603 err_slabname:
1603 free_percpu(net->ct.stat); 1604 free_percpu(net->ct.stat);
1604 err_stat: 1605 err_stat:
1605 return ret; 1606 return ret;
1606 } 1607 }
1607 1608
1608 s16 (*nf_ct_nat_offset)(const struct nf_conn *ct, 1609 s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
1609 enum ip_conntrack_dir dir, 1610 enum ip_conntrack_dir dir,
1610 u32 seq); 1611 u32 seq);
1611 EXPORT_SYMBOL_GPL(nf_ct_nat_offset); 1612 EXPORT_SYMBOL_GPL(nf_ct_nat_offset);
1612 1613
1613 int nf_conntrack_init(struct net *net) 1614 int nf_conntrack_init(struct net *net)
1614 { 1615 {
1615 int ret; 1616 int ret;
1616 1617
1617 if (net_eq(net, &init_net)) { 1618 if (net_eq(net, &init_net)) {
1618 ret = nf_conntrack_init_init_net(); 1619 ret = nf_conntrack_init_init_net();
1619 if (ret < 0) 1620 if (ret < 0)
1620 goto out_init_net; 1621 goto out_init_net;
1621 } 1622 }
1622 ret = nf_conntrack_proto_init(net); 1623 ret = nf_conntrack_proto_init(net);
1623 if (ret < 0) 1624 if (ret < 0)
1624 goto out_proto; 1625 goto out_proto;
1625 ret = nf_conntrack_init_net(net); 1626 ret = nf_conntrack_init_net(net);
1626 if (ret < 0) 1627 if (ret < 0)
1627 goto out_net; 1628 goto out_net;
1628 1629
1629 if (net_eq(net, &init_net)) { 1630 if (net_eq(net, &init_net)) {
1630 /* For use by REJECT target */ 1631 /* For use by REJECT target */
1631 RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); 1632 RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
1632 RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack); 1633 RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack);
1633 1634
1634 /* Howto get NAT offsets */ 1635 /* Howto get NAT offsets */
1635 RCU_INIT_POINTER(nf_ct_nat_offset, NULL); 1636 RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
1636 } 1637 }
1637 return 0; 1638 return 0;
1638 1639
1639 out_net: 1640 out_net:
1640 nf_conntrack_proto_fini(net); 1641 nf_conntrack_proto_fini(net);
1641 out_proto: 1642 out_proto:
1642 if (net_eq(net, &init_net)) 1643 if (net_eq(net, &init_net))
1643 nf_conntrack_cleanup_init_net(); 1644 nf_conntrack_cleanup_init_net();
1644 out_init_net: 1645 out_init_net:
1645 return ret; 1646 return ret;
1646 } 1647 }
1647 1648
net/netfilter/nf_conntrack_standalone.c
1 /* (C) 1999-2001 Paul `Rusty' Russell 1 /* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation. 6 * published by the Free Software Foundation.
7 */ 7 */
8 8
9 #include <linux/types.h> 9 #include <linux/types.h>
10 #include <linux/netfilter.h> 10 #include <linux/netfilter.h>
11 #include <linux/slab.h> 11 #include <linux/slab.h>
12 #include <linux/module.h> 12 #include <linux/module.h>
13 #include <linux/skbuff.h> 13 #include <linux/skbuff.h>
14 #include <linux/proc_fs.h> 14 #include <linux/proc_fs.h>
15 #include <linux/seq_file.h> 15 #include <linux/seq_file.h>
16 #include <linux/percpu.h> 16 #include <linux/percpu.h>
17 #include <linux/netdevice.h> 17 #include <linux/netdevice.h>
18 #include <linux/security.h> 18 #include <linux/security.h>
19 #include <net/net_namespace.h> 19 #include <net/net_namespace.h>
20 #ifdef CONFIG_SYSCTL 20 #ifdef CONFIG_SYSCTL
21 #include <linux/sysctl.h> 21 #include <linux/sysctl.h>
22 #endif 22 #endif
23 23
24 #include <net/netfilter/nf_conntrack.h> 24 #include <net/netfilter/nf_conntrack.h>
25 #include <net/netfilter/nf_conntrack_core.h> 25 #include <net/netfilter/nf_conntrack_core.h>
26 #include <net/netfilter/nf_conntrack_l3proto.h> 26 #include <net/netfilter/nf_conntrack_l3proto.h>
27 #include <net/netfilter/nf_conntrack_l4proto.h> 27 #include <net/netfilter/nf_conntrack_l4proto.h>
28 #include <net/netfilter/nf_conntrack_expect.h> 28 #include <net/netfilter/nf_conntrack_expect.h>
29 #include <net/netfilter/nf_conntrack_helper.h> 29 #include <net/netfilter/nf_conntrack_helper.h>
30 #include <net/netfilter/nf_conntrack_acct.h> 30 #include <net/netfilter/nf_conntrack_acct.h>
31 #include <net/netfilter/nf_conntrack_zones.h> 31 #include <net/netfilter/nf_conntrack_zones.h>
32 #include <net/netfilter/nf_conntrack_timestamp.h> 32 #include <net/netfilter/nf_conntrack_timestamp.h>
33 #include <linux/rculist_nulls.h> 33 #include <linux/rculist_nulls.h>
34 34
35 MODULE_LICENSE("GPL"); 35 MODULE_LICENSE("GPL");
36 36
37 #ifdef CONFIG_NF_CONNTRACK_PROCFS 37 #ifdef CONFIG_NF_CONNTRACK_PROCFS
38 int 38 int
39 print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, 39 print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
40 const struct nf_conntrack_l3proto *l3proto, 40 const struct nf_conntrack_l3proto *l3proto,
41 const struct nf_conntrack_l4proto *l4proto) 41 const struct nf_conntrack_l4proto *l4proto)
42 { 42 {
43 return l3proto->print_tuple(s, tuple) || l4proto->print_tuple(s, tuple); 43 return l3proto->print_tuple(s, tuple) || l4proto->print_tuple(s, tuple);
44 } 44 }
45 EXPORT_SYMBOL_GPL(print_tuple); 45 EXPORT_SYMBOL_GPL(print_tuple);
46 46
47 struct ct_iter_state { 47 struct ct_iter_state {
48 struct seq_net_private p; 48 struct seq_net_private p;
49 unsigned int bucket; 49 unsigned int bucket;
50 u_int64_t time_now; 50 u_int64_t time_now;
51 }; 51 };
52 52
53 static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) 53 static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
54 { 54 {
55 struct net *net = seq_file_net(seq); 55 struct net *net = seq_file_net(seq);
56 struct ct_iter_state *st = seq->private; 56 struct ct_iter_state *st = seq->private;
57 struct hlist_nulls_node *n; 57 struct hlist_nulls_node *n;
58 58
59 for (st->bucket = 0; 59 for (st->bucket = 0;
60 st->bucket < net->ct.htable_size; 60 st->bucket < net->ct.htable_size;
61 st->bucket++) { 61 st->bucket++) {
62 n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); 62 n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
63 if (!is_a_nulls(n)) 63 if (!is_a_nulls(n))
64 return n; 64 return n;
65 } 65 }
66 return NULL; 66 return NULL;
67 } 67 }
68 68
69 static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, 69 static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
70 struct hlist_nulls_node *head) 70 struct hlist_nulls_node *head)
71 { 71 {
72 struct net *net = seq_file_net(seq); 72 struct net *net = seq_file_net(seq);
73 struct ct_iter_state *st = seq->private; 73 struct ct_iter_state *st = seq->private;
74 74
75 head = rcu_dereference(hlist_nulls_next_rcu(head)); 75 head = rcu_dereference(hlist_nulls_next_rcu(head));
76 while (is_a_nulls(head)) { 76 while (is_a_nulls(head)) {
77 if (likely(get_nulls_value(head) == st->bucket)) { 77 if (likely(get_nulls_value(head) == st->bucket)) {
78 if (++st->bucket >= net->ct.htable_size) 78 if (++st->bucket >= net->ct.htable_size)
79 return NULL; 79 return NULL;
80 } 80 }
81 head = rcu_dereference( 81 head = rcu_dereference(
82 hlist_nulls_first_rcu( 82 hlist_nulls_first_rcu(
83 &net->ct.hash[st->bucket])); 83 &net->ct.hash[st->bucket]));
84 } 84 }
85 return head; 85 return head;
86 } 86 }
87 87
88 static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos) 88 static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
89 { 89 {
90 struct hlist_nulls_node *head = ct_get_first(seq); 90 struct hlist_nulls_node *head = ct_get_first(seq);
91 91
92 if (head) 92 if (head)
93 while (pos && (head = ct_get_next(seq, head))) 93 while (pos && (head = ct_get_next(seq, head)))
94 pos--; 94 pos--;
95 return pos ? NULL : head; 95 return pos ? NULL : head;
96 } 96 }
97 97
98 static void *ct_seq_start(struct seq_file *seq, loff_t *pos) 98 static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
99 __acquires(RCU) 99 __acquires(RCU)
100 { 100 {
101 struct ct_iter_state *st = seq->private; 101 struct ct_iter_state *st = seq->private;
102 102
103 st->time_now = ktime_to_ns(ktime_get_real()); 103 st->time_now = ktime_to_ns(ktime_get_real());
104 rcu_read_lock(); 104 rcu_read_lock();
105 return ct_get_idx(seq, *pos); 105 return ct_get_idx(seq, *pos);
106 } 106 }
107 107
108 static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) 108 static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
109 { 109 {
110 (*pos)++; 110 (*pos)++;
111 return ct_get_next(s, v); 111 return ct_get_next(s, v);
112 } 112 }
113 113
114 static void ct_seq_stop(struct seq_file *s, void *v) 114 static void ct_seq_stop(struct seq_file *s, void *v)
115 __releases(RCU) 115 __releases(RCU)
116 { 116 {
117 rcu_read_unlock(); 117 rcu_read_unlock();
118 } 118 }
119 119
120 #ifdef CONFIG_NF_CONNTRACK_SECMARK 120 #ifdef CONFIG_NF_CONNTRACK_SECMARK
121 static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) 121 static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
122 { 122 {
123 int ret; 123 int ret;
124 u32 len; 124 u32 len;
125 char *secctx; 125 char *secctx;
126 126
127 ret = security_secid_to_secctx(ct->secmark, &secctx, &len); 127 ret = security_secid_to_secctx(ct->secmark, &secctx, &len);
128 if (ret) 128 if (ret)
129 return 0; 129 return 0;
130 130
131 ret = seq_printf(s, "secctx=%s ", secctx); 131 ret = seq_printf(s, "secctx=%s ", secctx);
132 132
133 security_release_secctx(secctx, len); 133 security_release_secctx(secctx, len);
134 return ret; 134 return ret;
135 } 135 }
136 #else 136 #else
137 static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) 137 static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
138 { 138 {
139 return 0; 139 return 0;
140 } 140 }
141 #endif 141 #endif
142 142
143 #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP 143 #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
144 static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) 144 static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
145 { 145 {
146 struct ct_iter_state *st = s->private; 146 struct ct_iter_state *st = s->private;
147 struct nf_conn_tstamp *tstamp; 147 struct nf_conn_tstamp *tstamp;
148 s64 delta_time; 148 s64 delta_time;
149 149
150 tstamp = nf_conn_tstamp_find(ct); 150 tstamp = nf_conn_tstamp_find(ct);
151 if (tstamp) { 151 if (tstamp) {
152 delta_time = st->time_now - tstamp->start; 152 delta_time = st->time_now - tstamp->start;
153 if (delta_time > 0) 153 if (delta_time > 0)
154 delta_time = div_s64(delta_time, NSEC_PER_SEC); 154 delta_time = div_s64(delta_time, NSEC_PER_SEC);
155 else 155 else
156 delta_time = 0; 156 delta_time = 0;
157 157
158 return seq_printf(s, "delta-time=%llu ", 158 return seq_printf(s, "delta-time=%llu ",
159 (unsigned long long)delta_time); 159 (unsigned long long)delta_time);
160 } 160 }
161 return 0; 161 return 0;
162 } 162 }
163 #else 163 #else
164 static inline int 164 static inline int
165 ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) 165 ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
166 { 166 {
167 return 0; 167 return 0;
168 } 168 }
169 #endif 169 #endif
170 170
171 /* return 0 on success, 1 in case of error */ 171 /* return 0 on success, 1 in case of error */
172 static int ct_seq_show(struct seq_file *s, void *v) 172 static int ct_seq_show(struct seq_file *s, void *v)
173 { 173 {
174 struct nf_conntrack_tuple_hash *hash = v; 174 struct nf_conntrack_tuple_hash *hash = v;
175 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); 175 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
176 const struct nf_conntrack_l3proto *l3proto; 176 const struct nf_conntrack_l3proto *l3proto;
177 const struct nf_conntrack_l4proto *l4proto; 177 const struct nf_conntrack_l4proto *l4proto;
178 int ret = 0; 178 int ret = 0;
179 179
180 NF_CT_ASSERT(ct); 180 NF_CT_ASSERT(ct);
181 if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) 181 if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
182 return 0; 182 return 0;
183 183
184 /* we only want to print DIR_ORIGINAL */ 184 /* we only want to print DIR_ORIGINAL */
185 if (NF_CT_DIRECTION(hash)) 185 if (NF_CT_DIRECTION(hash))
186 goto release; 186 goto release;
187 187
188 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); 188 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
189 NF_CT_ASSERT(l3proto); 189 NF_CT_ASSERT(l3proto);
190 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 190 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
191 NF_CT_ASSERT(l4proto); 191 NF_CT_ASSERT(l4proto);
192 192
193 ret = -ENOSPC; 193 ret = -ENOSPC;
194 if (seq_printf(s, "%-8s %u %-8s %u %ld ", 194 if (seq_printf(s, "%-8s %u %-8s %u %ld ",
195 l3proto->name, nf_ct_l3num(ct), 195 l3proto->name, nf_ct_l3num(ct),
196 l4proto->name, nf_ct_protonum(ct), 196 l4proto->name, nf_ct_protonum(ct),
197 timer_pending(&ct->timeout) 197 timer_pending(&ct->timeout)
198 ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) 198 ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
199 goto release; 199 goto release;
200 200
201 if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct)) 201 if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct))
202 goto release; 202 goto release;
203 203
204 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 204 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
205 l3proto, l4proto)) 205 l3proto, l4proto))
206 goto release; 206 goto release;
207 207
208 if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) 208 if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
209 goto release; 209 goto release;
210 210
211 if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) 211 if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
212 if (seq_printf(s, "[UNREPLIED] ")) 212 if (seq_printf(s, "[UNREPLIED] "))
213 goto release; 213 goto release;
214 214
215 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, 215 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
216 l3proto, l4proto)) 216 l3proto, l4proto))
217 goto release; 217 goto release;
218 218
219 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) 219 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
220 goto release; 220 goto release;
221 221
222 if (test_bit(IPS_ASSURED_BIT, &ct->status)) 222 if (test_bit(IPS_ASSURED_BIT, &ct->status))
223 if (seq_printf(s, "[ASSURED] ")) 223 if (seq_printf(s, "[ASSURED] "))
224 goto release; 224 goto release;
225 225
226 #if defined(CONFIG_NF_CONNTRACK_MARK) 226 #if defined(CONFIG_NF_CONNTRACK_MARK)
227 if (seq_printf(s, "mark=%u ", ct->mark)) 227 if (seq_printf(s, "mark=%u ", ct->mark))
228 goto release; 228 goto release;
229 #endif 229 #endif
230 230
231 if (ct_show_secctx(s, ct)) 231 if (ct_show_secctx(s, ct))
232 goto release; 232 goto release;
233 233
234 #ifdef CONFIG_NF_CONNTRACK_ZONES 234 #ifdef CONFIG_NF_CONNTRACK_ZONES
235 if (seq_printf(s, "zone=%u ", nf_ct_zone(ct))) 235 if (seq_printf(s, "zone=%u ", nf_ct_zone(ct)))
236 goto release; 236 goto release;
237 #endif 237 #endif
238 238
239 if (ct_show_delta_time(s, ct)) 239 if (ct_show_delta_time(s, ct))
240 goto release; 240 goto release;
241 241
242 if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) 242 if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
243 goto release; 243 goto release;
244 244
245 ret = 0; 245 ret = 0;
246 release: 246 release:
247 nf_ct_put(ct); 247 nf_ct_put(ct);
248 return ret; 248 return ret;
249 } 249 }
250 250
251 static const struct seq_operations ct_seq_ops = { 251 static const struct seq_operations ct_seq_ops = {
252 .start = ct_seq_start, 252 .start = ct_seq_start,
253 .next = ct_seq_next, 253 .next = ct_seq_next,
254 .stop = ct_seq_stop, 254 .stop = ct_seq_stop,
255 .show = ct_seq_show 255 .show = ct_seq_show
256 }; 256 };
257 257
258 static int ct_open(struct inode *inode, struct file *file) 258 static int ct_open(struct inode *inode, struct file *file)
259 { 259 {
260 return seq_open_net(inode, file, &ct_seq_ops, 260 return seq_open_net(inode, file, &ct_seq_ops,
261 sizeof(struct ct_iter_state)); 261 sizeof(struct ct_iter_state));
262 } 262 }
263 263
264 static const struct file_operations ct_file_ops = { 264 static const struct file_operations ct_file_ops = {
265 .owner = THIS_MODULE, 265 .owner = THIS_MODULE,
266 .open = ct_open, 266 .open = ct_open,
267 .read = seq_read, 267 .read = seq_read,
268 .llseek = seq_lseek, 268 .llseek = seq_lseek,
269 .release = seq_release_net, 269 .release = seq_release_net,
270 }; 270 };
271 271
272 static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) 272 static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
273 { 273 {
274 struct net *net = seq_file_net(seq); 274 struct net *net = seq_file_net(seq);
275 int cpu; 275 int cpu;
276 276
277 if (*pos == 0) 277 if (*pos == 0)
278 return SEQ_START_TOKEN; 278 return SEQ_START_TOKEN;
279 279
280 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { 280 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
281 if (!cpu_possible(cpu)) 281 if (!cpu_possible(cpu))
282 continue; 282 continue;
283 *pos = cpu + 1; 283 *pos = cpu + 1;
284 return per_cpu_ptr(net->ct.stat, cpu); 284 return per_cpu_ptr(net->ct.stat, cpu);
285 } 285 }
286 286
287 return NULL; 287 return NULL;
288 } 288 }
289 289
290 static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) 290 static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
291 { 291 {
292 struct net *net = seq_file_net(seq); 292 struct net *net = seq_file_net(seq);
293 int cpu; 293 int cpu;
294 294
295 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { 295 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
296 if (!cpu_possible(cpu)) 296 if (!cpu_possible(cpu))
297 continue; 297 continue;
298 *pos = cpu + 1; 298 *pos = cpu + 1;
299 return per_cpu_ptr(net->ct.stat, cpu); 299 return per_cpu_ptr(net->ct.stat, cpu);
300 } 300 }
301 301
302 return NULL; 302 return NULL;
303 } 303 }
304 304
305 static void ct_cpu_seq_stop(struct seq_file *seq, void *v) 305 static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
306 { 306 {
307 } 307 }
308 308
309 static int ct_cpu_seq_show(struct seq_file *seq, void *v) 309 static int ct_cpu_seq_show(struct seq_file *seq, void *v)
310 { 310 {
311 struct net *net = seq_file_net(seq); 311 struct net *net = seq_file_net(seq);
312 unsigned int nr_conntracks = atomic_read(&net->ct.count); 312 unsigned int nr_conntracks = atomic_read(&net->ct.count);
313 const struct ip_conntrack_stat *st = v; 313 const struct ip_conntrack_stat *st = v;
314 314
315 if (v == SEQ_START_TOKEN) { 315 if (v == SEQ_START_TOKEN) {
316 seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n"); 316 seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n");
317 return 0; 317 return 0;
318 } 318 }
319 319
320 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " 320 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
321 "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", 321 "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
322 nr_conntracks, 322 nr_conntracks,
323 st->searched, 323 st->searched,
324 st->found, 324 st->found,
325 st->new, 325 st->new,
326 st->invalid, 326 st->invalid,
327 st->ignore, 327 st->ignore,
328 st->delete, 328 st->delete,
329 st->delete_list, 329 st->delete_list,
330 st->insert, 330 st->insert,
331 st->insert_failed, 331 st->insert_failed,
332 st->drop, 332 st->drop,
333 st->early_drop, 333 st->early_drop,
334 st->error, 334 st->error,
335 335
336 st->expect_new, 336 st->expect_new,
337 st->expect_create, 337 st->expect_create,
338 st->expect_delete, 338 st->expect_delete,
339 st->search_restart 339 st->search_restart
340 ); 340 );
341 return 0; 341 return 0;
342 } 342 }
343 343
344 static const struct seq_operations ct_cpu_seq_ops = { 344 static const struct seq_operations ct_cpu_seq_ops = {
345 .start = ct_cpu_seq_start, 345 .start = ct_cpu_seq_start,
346 .next = ct_cpu_seq_next, 346 .next = ct_cpu_seq_next,
347 .stop = ct_cpu_seq_stop, 347 .stop = ct_cpu_seq_stop,
348 .show = ct_cpu_seq_show, 348 .show = ct_cpu_seq_show,
349 }; 349 };
350 350
351 static int ct_cpu_seq_open(struct inode *inode, struct file *file) 351 static int ct_cpu_seq_open(struct inode *inode, struct file *file)
352 { 352 {
353 return seq_open_net(inode, file, &ct_cpu_seq_ops, 353 return seq_open_net(inode, file, &ct_cpu_seq_ops,
354 sizeof(struct seq_net_private)); 354 sizeof(struct seq_net_private));
355 } 355 }
356 356
357 static const struct file_operations ct_cpu_seq_fops = { 357 static const struct file_operations ct_cpu_seq_fops = {
358 .owner = THIS_MODULE, 358 .owner = THIS_MODULE,
359 .open = ct_cpu_seq_open, 359 .open = ct_cpu_seq_open,
360 .read = seq_read, 360 .read = seq_read,
361 .llseek = seq_lseek, 361 .llseek = seq_lseek,
362 .release = seq_release_net, 362 .release = seq_release_net,
363 }; 363 };
364 364
365 static int nf_conntrack_standalone_init_proc(struct net *net) 365 static int nf_conntrack_standalone_init_proc(struct net *net)
366 { 366 {
367 struct proc_dir_entry *pde; 367 struct proc_dir_entry *pde;
368 368
369 pde = proc_net_fops_create(net, "nf_conntrack", 0440, &ct_file_ops); 369 pde = proc_net_fops_create(net, "nf_conntrack", 0440, &ct_file_ops);
370 if (!pde) 370 if (!pde)
371 goto out_nf_conntrack; 371 goto out_nf_conntrack;
372 372
373 pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat, 373 pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat,
374 &ct_cpu_seq_fops); 374 &ct_cpu_seq_fops);
375 if (!pde) 375 if (!pde)
376 goto out_stat_nf_conntrack; 376 goto out_stat_nf_conntrack;
377 return 0; 377 return 0;
378 378
379 out_stat_nf_conntrack: 379 out_stat_nf_conntrack:
380 proc_net_remove(net, "nf_conntrack"); 380 proc_net_remove(net, "nf_conntrack");
381 out_nf_conntrack: 381 out_nf_conntrack:
382 return -ENOMEM; 382 return -ENOMEM;
383 } 383 }
384 384
385 static void nf_conntrack_standalone_fini_proc(struct net *net) 385 static void nf_conntrack_standalone_fini_proc(struct net *net)
386 { 386 {
387 remove_proc_entry("nf_conntrack", net->proc_net_stat); 387 remove_proc_entry("nf_conntrack", net->proc_net_stat);
388 proc_net_remove(net, "nf_conntrack"); 388 proc_net_remove(net, "nf_conntrack");
389 } 389 }
390 #else 390 #else
391 static int nf_conntrack_standalone_init_proc(struct net *net) 391 static int nf_conntrack_standalone_init_proc(struct net *net)
392 { 392 {
393 return 0; 393 return 0;
394 } 394 }
395 395
396 static void nf_conntrack_standalone_fini_proc(struct net *net) 396 static void nf_conntrack_standalone_fini_proc(struct net *net)
397 { 397 {
398 } 398 }
399 #endif /* CONFIG_NF_CONNTRACK_PROCFS */ 399 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
400 400
401 /* Sysctl support */ 401 /* Sysctl support */
402 402
403 #ifdef CONFIG_SYSCTL 403 #ifdef CONFIG_SYSCTL
404 /* Log invalid packets of a given protocol */ 404 /* Log invalid packets of a given protocol */
405 static int log_invalid_proto_min = 0; 405 static int log_invalid_proto_min = 0;
406 static int log_invalid_proto_max = 255; 406 static int log_invalid_proto_max = 255;
407 407
408 static struct ctl_table_header *nf_ct_netfilter_header; 408 static struct ctl_table_header *nf_ct_netfilter_header;
409 409
410 static ctl_table nf_ct_sysctl_table[] = { 410 static ctl_table nf_ct_sysctl_table[] = {
411 { 411 {
412 .procname = "nf_conntrack_max", 412 .procname = "nf_conntrack_max",
413 .data = &nf_conntrack_max, 413 .data = &nf_conntrack_max,
414 .maxlen = sizeof(int), 414 .maxlen = sizeof(int),
415 .mode = 0644, 415 .mode = 0644,
416 .proc_handler = proc_dointvec, 416 .proc_handler = proc_dointvec,
417 }, 417 },
418 { 418 {
419 .procname = "nf_conntrack_count", 419 .procname = "nf_conntrack_count",
420 .data = &init_net.ct.count, 420 .data = &init_net.ct.count,
421 .maxlen = sizeof(int), 421 .maxlen = sizeof(int),
422 .mode = 0444, 422 .mode = 0444,
423 .proc_handler = proc_dointvec, 423 .proc_handler = proc_dointvec,
424 }, 424 },
425 { 425 {
426 .procname = "nf_conntrack_buckets", 426 .procname = "nf_conntrack_buckets",
427 .data = &init_net.ct.htable_size, 427 .data = &init_net.ct.htable_size,
428 .maxlen = sizeof(unsigned int), 428 .maxlen = sizeof(unsigned int),
429 .mode = 0444, 429 .mode = 0444,
430 .proc_handler = proc_dointvec, 430 .proc_handler = proc_dointvec,
431 }, 431 },
432 { 432 {
433 .procname = "nf_conntrack_checksum", 433 .procname = "nf_conntrack_checksum",
434 .data = &init_net.ct.sysctl_checksum, 434 .data = &init_net.ct.sysctl_checksum,
435 .maxlen = sizeof(unsigned int), 435 .maxlen = sizeof(unsigned int),
436 .mode = 0644, 436 .mode = 0644,
437 .proc_handler = proc_dointvec, 437 .proc_handler = proc_dointvec,
438 }, 438 },
439 { 439 {
440 .procname = "nf_conntrack_log_invalid", 440 .procname = "nf_conntrack_log_invalid",
441 .data = &init_net.ct.sysctl_log_invalid, 441 .data = &init_net.ct.sysctl_log_invalid,
442 .maxlen = sizeof(unsigned int), 442 .maxlen = sizeof(unsigned int),
443 .mode = 0644, 443 .mode = 0644,
444 .proc_handler = proc_dointvec_minmax, 444 .proc_handler = proc_dointvec_minmax,
445 .extra1 = &log_invalid_proto_min, 445 .extra1 = &log_invalid_proto_min,
446 .extra2 = &log_invalid_proto_max, 446 .extra2 = &log_invalid_proto_max,
447 }, 447 },
448 { 448 {
449 .procname = "nf_conntrack_expect_max", 449 .procname = "nf_conntrack_expect_max",
450 .data = &nf_ct_expect_max, 450 .data = &nf_ct_expect_max,
451 .maxlen = sizeof(int), 451 .maxlen = sizeof(int),
452 .mode = 0644, 452 .mode = 0644,
453 .proc_handler = proc_dointvec, 453 .proc_handler = proc_dointvec,
454 }, 454 },
455 { } 455 { }
456 }; 456 };
457 457
458 #define NET_NF_CONNTRACK_MAX 2089 458 #define NET_NF_CONNTRACK_MAX 2089
459 459
460 static ctl_table nf_ct_netfilter_table[] = { 460 static ctl_table nf_ct_netfilter_table[] = {
461 { 461 {
462 .procname = "nf_conntrack_max", 462 .procname = "nf_conntrack_max",
463 .data = &nf_conntrack_max, 463 .data = &nf_conntrack_max,
464 .maxlen = sizeof(int), 464 .maxlen = sizeof(int),
465 .mode = 0644, 465 .mode = 0644,
466 .proc_handler = proc_dointvec, 466 .proc_handler = proc_dointvec,
467 }, 467 },
468 { } 468 { }
469 }; 469 };
470 470
471 static int nf_conntrack_standalone_init_sysctl(struct net *net) 471 static int nf_conntrack_standalone_init_sysctl(struct net *net)
472 { 472 {
473 struct ctl_table *table; 473 struct ctl_table *table;
474 474
475 if (net_eq(net, &init_net)) { 475 if (net_eq(net, &init_net)) {
476 nf_ct_netfilter_header = 476 nf_ct_netfilter_header =
477 register_net_sysctl(&init_net, "net", nf_ct_netfilter_table); 477 register_net_sysctl(&init_net, "net", nf_ct_netfilter_table);
478 if (!nf_ct_netfilter_header) 478 if (!nf_ct_netfilter_header)
479 goto out; 479 goto out;
480 } 480 }
481 481
482 table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table), 482 table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table),
483 GFP_KERNEL); 483 GFP_KERNEL);
484 if (!table) 484 if (!table)
485 goto out_kmemdup; 485 goto out_kmemdup;
486 486
487 table[1].data = &net->ct.count; 487 table[1].data = &net->ct.count;
488 table[2].data = &net->ct.htable_size; 488 table[2].data = &net->ct.htable_size;
489 table[3].data = &net->ct.sysctl_checksum; 489 table[3].data = &net->ct.sysctl_checksum;
490 table[4].data = &net->ct.sysctl_log_invalid; 490 table[4].data = &net->ct.sysctl_log_invalid;
491 491
492 /* Don't export sysctls to unprivileged users */ 492 /* Don't export sysctls to unprivileged users */
493 if (net->user_ns != &init_user_ns) 493 if (net->user_ns != &init_user_ns)
494 table[0].procname = NULL; 494 table[0].procname = NULL;
495 495
496 net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table); 496 net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table);
497 if (!net->ct.sysctl_header) 497 if (!net->ct.sysctl_header)
498 goto out_unregister_netfilter; 498 goto out_unregister_netfilter;
499 499
500 return 0; 500 return 0;
501 501
502 out_unregister_netfilter: 502 out_unregister_netfilter:
503 kfree(table); 503 kfree(table);
504 out_kmemdup: 504 out_kmemdup:
505 if (net_eq(net, &init_net)) 505 if (net_eq(net, &init_net))
506 unregister_net_sysctl_table(nf_ct_netfilter_header); 506 unregister_net_sysctl_table(nf_ct_netfilter_header);
507 out: 507 out:
508 printk(KERN_ERR "nf_conntrack: can't register to sysctl.\n"); 508 printk(KERN_ERR "nf_conntrack: can't register to sysctl.\n");
509 return -ENOMEM; 509 return -ENOMEM;
510 } 510 }
511 511
512 static void nf_conntrack_standalone_fini_sysctl(struct net *net) 512 static void nf_conntrack_standalone_fini_sysctl(struct net *net)
513 { 513 {
514 struct ctl_table *table; 514 struct ctl_table *table;
515 515
516 if (net_eq(net, &init_net)) 516 if (net_eq(net, &init_net))
517 unregister_net_sysctl_table(nf_ct_netfilter_header); 517 unregister_net_sysctl_table(nf_ct_netfilter_header);
518 table = net->ct.sysctl_header->ctl_table_arg; 518 table = net->ct.sysctl_header->ctl_table_arg;
519 unregister_net_sysctl_table(net->ct.sysctl_header); 519 unregister_net_sysctl_table(net->ct.sysctl_header);
520 kfree(table); 520 kfree(table);
521 } 521 }
522 #else 522 #else
523 static int nf_conntrack_standalone_init_sysctl(struct net *net) 523 static int nf_conntrack_standalone_init_sysctl(struct net *net)
524 { 524 {
525 return 0; 525 return 0;
526 } 526 }
527 527
528 static void nf_conntrack_standalone_fini_sysctl(struct net *net) 528 static void nf_conntrack_standalone_fini_sysctl(struct net *net)
529 { 529 {
530 } 530 }
531 #endif /* CONFIG_SYSCTL */ 531 #endif /* CONFIG_SYSCTL */
532 532
533 static int nf_conntrack_net_init(struct net *net) 533 static int nf_conntrack_net_init(struct net *net)
534 { 534 {
535 int ret; 535 int ret;
536 536
537 ret = nf_conntrack_init(net); 537 ret = nf_conntrack_init(net);
538 if (ret < 0) 538 if (ret < 0)
539 goto out_init; 539 goto out_init;
540 ret = nf_conntrack_standalone_init_proc(net); 540 ret = nf_conntrack_standalone_init_proc(net);
541 if (ret < 0) 541 if (ret < 0)
542 goto out_proc; 542 goto out_proc;
543 net->ct.sysctl_checksum = 1; 543 net->ct.sysctl_checksum = 1;
544 net->ct.sysctl_log_invalid = 0; 544 net->ct.sysctl_log_invalid = 0;
545 ret = nf_conntrack_standalone_init_sysctl(net); 545 ret = nf_conntrack_standalone_init_sysctl(net);
546 if (ret < 0) 546 if (ret < 0)
547 goto out_sysctl; 547 goto out_sysctl;
548 return 0; 548 return 0;
549 549
550 out_sysctl: 550 out_sysctl:
551 nf_conntrack_standalone_fini_proc(net); 551 nf_conntrack_standalone_fini_proc(net);
552 out_proc: 552 out_proc:
553 nf_conntrack_cleanup(net); 553 nf_conntrack_cleanup(net);
554 out_init: 554 out_init:
555 return ret; 555 return ret;
556 } 556 }
557 557
558 static void nf_conntrack_net_exit(struct net *net) 558 static void nf_conntrack_net_exit(struct net *net)
559 { 559 {
560 nf_conntrack_standalone_fini_sysctl(net); 560 nf_conntrack_standalone_fini_sysctl(net);
561 nf_conntrack_standalone_fini_proc(net); 561 nf_conntrack_standalone_fini_proc(net);
562 nf_conntrack_cleanup(net); 562 nf_conntrack_cleanup(net);
563 } 563 }
564 564
565 static struct pernet_operations nf_conntrack_net_ops = { 565 static struct pernet_operations nf_conntrack_net_ops = {
566 .init = nf_conntrack_net_init, 566 .init = nf_conntrack_net_init,
567 .exit = nf_conntrack_net_exit, 567 .exit = nf_conntrack_net_exit,
568 }; 568 };
569 569
570 static int __init nf_conntrack_standalone_init(void) 570 static int __init nf_conntrack_standalone_init(void)
571 { 571 {
572 return register_pernet_subsys(&nf_conntrack_net_ops); 572 return register_pernet_subsys(&nf_conntrack_net_ops);
573 } 573 }
574 574
575 static void __exit nf_conntrack_standalone_fini(void) 575 static void __exit nf_conntrack_standalone_fini(void)
576 { 576 {
577 unregister_pernet_subsys(&nf_conntrack_net_ops); 577 unregister_pernet_subsys(&nf_conntrack_net_ops);
578 nf_conntrack_cleanup_end();
578 } 579 }
579 580
580 module_init(nf_conntrack_standalone_init); 581 module_init(nf_conntrack_standalone_init);
581 module_exit(nf_conntrack_standalone_fini); 582 module_exit(nf_conntrack_standalone_fini);
582 583
583 /* Some modules need us, but don't depend directly on any symbol. 584 /* Some modules need us, but don't depend directly on any symbol.
584 They should call this. */ 585 They should call this. */
585 void need_conntrack(void) 586 void need_conntrack(void)
586 { 587 {
587 } 588 }
588 EXPORT_SYMBOL_GPL(need_conntrack); 589 EXPORT_SYMBOL_GPL(need_conntrack);
589 590