Commit 76eb946040a7b4c797979a9c22464b9a07890ba5

Authored by Patrick McHardy
Committed by David S. Miller
1 parent ffaa9c100b

[NETFILTER]: nf_conntrack: don't inline early_drop()

early_drop() is only called *very* rarely, unfortunately gcc inlines it
into the hotpath because there is only a single caller. Explicitly mark
it noinline.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 1 changed file with 3 additions and 3 deletions Inline Diff

net/netfilter/nf_conntrack_core.c
1 /* Connection state tracking for netfilter. This is separated from, 1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables 2 but required by, the NAT layer; it can also be used by an iptables
3 extension. */ 3 extension. */
4 4
5 /* (C) 1999-2001 Paul `Rusty' Russell 5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 6 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
8 * 8 *
9 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as 10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation. 11 * published by the Free Software Foundation.
12 */ 12 */
13 13
14 #include <linux/types.h> 14 #include <linux/types.h>
15 #include <linux/netfilter.h> 15 #include <linux/netfilter.h>
16 #include <linux/module.h> 16 #include <linux/module.h>
17 #include <linux/skbuff.h> 17 #include <linux/skbuff.h>
18 #include <linux/proc_fs.h> 18 #include <linux/proc_fs.h>
19 #include <linux/vmalloc.h> 19 #include <linux/vmalloc.h>
20 #include <linux/stddef.h> 20 #include <linux/stddef.h>
21 #include <linux/slab.h> 21 #include <linux/slab.h>
22 #include <linux/random.h> 22 #include <linux/random.h>
23 #include <linux/jhash.h> 23 #include <linux/jhash.h>
24 #include <linux/err.h> 24 #include <linux/err.h>
25 #include <linux/percpu.h> 25 #include <linux/percpu.h>
26 #include <linux/moduleparam.h> 26 #include <linux/moduleparam.h>
27 #include <linux/notifier.h> 27 #include <linux/notifier.h>
28 #include <linux/kernel.h> 28 #include <linux/kernel.h>
29 #include <linux/netdevice.h> 29 #include <linux/netdevice.h>
30 #include <linux/socket.h> 30 #include <linux/socket.h>
31 #include <linux/mm.h> 31 #include <linux/mm.h>
32 32
33 #include <net/netfilter/nf_conntrack.h> 33 #include <net/netfilter/nf_conntrack.h>
34 #include <net/netfilter/nf_conntrack_l3proto.h> 34 #include <net/netfilter/nf_conntrack_l3proto.h>
35 #include <net/netfilter/nf_conntrack_l4proto.h> 35 #include <net/netfilter/nf_conntrack_l4proto.h>
36 #include <net/netfilter/nf_conntrack_expect.h> 36 #include <net/netfilter/nf_conntrack_expect.h>
37 #include <net/netfilter/nf_conntrack_helper.h> 37 #include <net/netfilter/nf_conntrack_helper.h>
38 #include <net/netfilter/nf_conntrack_core.h> 38 #include <net/netfilter/nf_conntrack_core.h>
39 #include <net/netfilter/nf_conntrack_extend.h> 39 #include <net/netfilter/nf_conntrack_extend.h>
40 40
41 #define NF_CONNTRACK_VERSION "0.5.0" 41 #define NF_CONNTRACK_VERSION "0.5.0"
42 42
43 DEFINE_SPINLOCK(nf_conntrack_lock); 43 DEFINE_SPINLOCK(nf_conntrack_lock);
44 EXPORT_SYMBOL_GPL(nf_conntrack_lock); 44 EXPORT_SYMBOL_GPL(nf_conntrack_lock);
45 45
46 /* nf_conntrack_standalone needs this */ 46 /* nf_conntrack_standalone needs this */
47 atomic_t nf_conntrack_count = ATOMIC_INIT(0); 47 atomic_t nf_conntrack_count = ATOMIC_INIT(0);
48 EXPORT_SYMBOL_GPL(nf_conntrack_count); 48 EXPORT_SYMBOL_GPL(nf_conntrack_count);
49 49
50 unsigned int nf_conntrack_htable_size __read_mostly; 50 unsigned int nf_conntrack_htable_size __read_mostly;
51 EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); 51 EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
52 52
53 int nf_conntrack_max __read_mostly; 53 int nf_conntrack_max __read_mostly;
54 EXPORT_SYMBOL_GPL(nf_conntrack_max); 54 EXPORT_SYMBOL_GPL(nf_conntrack_max);
55 55
56 struct hlist_head *nf_conntrack_hash __read_mostly; 56 struct hlist_head *nf_conntrack_hash __read_mostly;
57 EXPORT_SYMBOL_GPL(nf_conntrack_hash); 57 EXPORT_SYMBOL_GPL(nf_conntrack_hash);
58 58
59 struct nf_conn nf_conntrack_untracked __read_mostly; 59 struct nf_conn nf_conntrack_untracked __read_mostly;
60 EXPORT_SYMBOL_GPL(nf_conntrack_untracked); 60 EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
61 61
62 unsigned int nf_ct_log_invalid __read_mostly; 62 unsigned int nf_ct_log_invalid __read_mostly;
63 HLIST_HEAD(unconfirmed); 63 HLIST_HEAD(unconfirmed);
64 static int nf_conntrack_vmalloc __read_mostly; 64 static int nf_conntrack_vmalloc __read_mostly;
65 static struct kmem_cache *nf_conntrack_cachep __read_mostly; 65 static struct kmem_cache *nf_conntrack_cachep __read_mostly;
66 66
67 DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); 67 DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
68 EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat); 68 EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat);
69 69
70 static int nf_conntrack_hash_rnd_initted; 70 static int nf_conntrack_hash_rnd_initted;
71 static unsigned int nf_conntrack_hash_rnd; 71 static unsigned int nf_conntrack_hash_rnd;
72 72
73 static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, 73 static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
74 unsigned int size, unsigned int rnd) 74 unsigned int size, unsigned int rnd)
75 { 75 {
76 unsigned int n; 76 unsigned int n;
77 u_int32_t h; 77 u_int32_t h;
78 78
79 /* The direction must be ignored, so we hash everything up to the 79 /* The direction must be ignored, so we hash everything up to the
80 * destination ports (which is a multiple of 4) and treat the last 80 * destination ports (which is a multiple of 4) and treat the last
81 * three bytes manually. 81 * three bytes manually.
82 */ 82 */
83 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); 83 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
84 h = jhash2((u32 *)tuple, n, 84 h = jhash2((u32 *)tuple, n,
85 rnd ^ (((__force __u16)tuple->dst.u.all << 16) | 85 rnd ^ (((__force __u16)tuple->dst.u.all << 16) |
86 tuple->dst.protonum)); 86 tuple->dst.protonum));
87 87
88 return ((u64)h * size) >> 32; 88 return ((u64)h * size) >> 32;
89 } 89 }
90 90
91 static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple) 91 static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
92 { 92 {
93 return __hash_conntrack(tuple, nf_conntrack_htable_size, 93 return __hash_conntrack(tuple, nf_conntrack_htable_size,
94 nf_conntrack_hash_rnd); 94 nf_conntrack_hash_rnd);
95 } 95 }
96 96
97 int 97 int
98 nf_ct_get_tuple(const struct sk_buff *skb, 98 nf_ct_get_tuple(const struct sk_buff *skb,
99 unsigned int nhoff, 99 unsigned int nhoff,
100 unsigned int dataoff, 100 unsigned int dataoff,
101 u_int16_t l3num, 101 u_int16_t l3num,
102 u_int8_t protonum, 102 u_int8_t protonum,
103 struct nf_conntrack_tuple *tuple, 103 struct nf_conntrack_tuple *tuple,
104 const struct nf_conntrack_l3proto *l3proto, 104 const struct nf_conntrack_l3proto *l3proto,
105 const struct nf_conntrack_l4proto *l4proto) 105 const struct nf_conntrack_l4proto *l4proto)
106 { 106 {
107 NF_CT_TUPLE_U_BLANK(tuple); 107 NF_CT_TUPLE_U_BLANK(tuple);
108 108
109 tuple->src.l3num = l3num; 109 tuple->src.l3num = l3num;
110 if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0) 110 if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
111 return 0; 111 return 0;
112 112
113 tuple->dst.protonum = protonum; 113 tuple->dst.protonum = protonum;
114 tuple->dst.dir = IP_CT_DIR_ORIGINAL; 114 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
115 115
116 return l4proto->pkt_to_tuple(skb, dataoff, tuple); 116 return l4proto->pkt_to_tuple(skb, dataoff, tuple);
117 } 117 }
118 EXPORT_SYMBOL_GPL(nf_ct_get_tuple); 118 EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
119 119
120 int nf_ct_get_tuplepr(const struct sk_buff *skb, 120 int nf_ct_get_tuplepr(const struct sk_buff *skb,
121 unsigned int nhoff, 121 unsigned int nhoff,
122 u_int16_t l3num, 122 u_int16_t l3num,
123 struct nf_conntrack_tuple *tuple) 123 struct nf_conntrack_tuple *tuple)
124 { 124 {
125 struct nf_conntrack_l3proto *l3proto; 125 struct nf_conntrack_l3proto *l3proto;
126 struct nf_conntrack_l4proto *l4proto; 126 struct nf_conntrack_l4proto *l4proto;
127 unsigned int protoff; 127 unsigned int protoff;
128 u_int8_t protonum; 128 u_int8_t protonum;
129 int ret; 129 int ret;
130 130
131 rcu_read_lock(); 131 rcu_read_lock();
132 132
133 l3proto = __nf_ct_l3proto_find(l3num); 133 l3proto = __nf_ct_l3proto_find(l3num);
134 ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum); 134 ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum);
135 if (ret != NF_ACCEPT) { 135 if (ret != NF_ACCEPT) {
136 rcu_read_unlock(); 136 rcu_read_unlock();
137 return 0; 137 return 0;
138 } 138 }
139 139
140 l4proto = __nf_ct_l4proto_find(l3num, protonum); 140 l4proto = __nf_ct_l4proto_find(l3num, protonum);
141 141
142 ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, tuple, 142 ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, tuple,
143 l3proto, l4proto); 143 l3proto, l4proto);
144 144
145 rcu_read_unlock(); 145 rcu_read_unlock();
146 return ret; 146 return ret;
147 } 147 }
148 EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr); 148 EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
149 149
150 int 150 int
151 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, 151 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
152 const struct nf_conntrack_tuple *orig, 152 const struct nf_conntrack_tuple *orig,
153 const struct nf_conntrack_l3proto *l3proto, 153 const struct nf_conntrack_l3proto *l3proto,
154 const struct nf_conntrack_l4proto *l4proto) 154 const struct nf_conntrack_l4proto *l4proto)
155 { 155 {
156 NF_CT_TUPLE_U_BLANK(inverse); 156 NF_CT_TUPLE_U_BLANK(inverse);
157 157
158 inverse->src.l3num = orig->src.l3num; 158 inverse->src.l3num = orig->src.l3num;
159 if (l3proto->invert_tuple(inverse, orig) == 0) 159 if (l3proto->invert_tuple(inverse, orig) == 0)
160 return 0; 160 return 0;
161 161
162 inverse->dst.dir = !orig->dst.dir; 162 inverse->dst.dir = !orig->dst.dir;
163 163
164 inverse->dst.protonum = orig->dst.protonum; 164 inverse->dst.protonum = orig->dst.protonum;
165 return l4proto->invert_tuple(inverse, orig); 165 return l4proto->invert_tuple(inverse, orig);
166 } 166 }
167 EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); 167 EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
168 168
169 static void 169 static void
170 clean_from_lists(struct nf_conn *ct) 170 clean_from_lists(struct nf_conn *ct)
171 { 171 {
172 pr_debug("clean_from_lists(%p)\n", ct); 172 pr_debug("clean_from_lists(%p)\n", ct);
173 hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); 173 hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode);
174 hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode); 174 hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode);
175 175
176 /* Destroy all pending expectations */ 176 /* Destroy all pending expectations */
177 nf_ct_remove_expectations(ct); 177 nf_ct_remove_expectations(ct);
178 } 178 }
179 179
180 static void 180 static void
181 destroy_conntrack(struct nf_conntrack *nfct) 181 destroy_conntrack(struct nf_conntrack *nfct)
182 { 182 {
183 struct nf_conn *ct = (struct nf_conn *)nfct; 183 struct nf_conn *ct = (struct nf_conn *)nfct;
184 struct nf_conntrack_l4proto *l4proto; 184 struct nf_conntrack_l4proto *l4proto;
185 185
186 pr_debug("destroy_conntrack(%p)\n", ct); 186 pr_debug("destroy_conntrack(%p)\n", ct);
187 NF_CT_ASSERT(atomic_read(&nfct->use) == 0); 187 NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
188 NF_CT_ASSERT(!timer_pending(&ct->timeout)); 188 NF_CT_ASSERT(!timer_pending(&ct->timeout));
189 189
190 nf_conntrack_event(IPCT_DESTROY, ct); 190 nf_conntrack_event(IPCT_DESTROY, ct);
191 set_bit(IPS_DYING_BIT, &ct->status); 191 set_bit(IPS_DYING_BIT, &ct->status);
192 192
193 /* To make sure we don't get any weird locking issues here: 193 /* To make sure we don't get any weird locking issues here:
194 * destroy_conntrack() MUST NOT be called with a write lock 194 * destroy_conntrack() MUST NOT be called with a write lock
195 * to nf_conntrack_lock!!! -HW */ 195 * to nf_conntrack_lock!!! -HW */
196 rcu_read_lock(); 196 rcu_read_lock();
197 l4proto = __nf_ct_l4proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, 197 l4proto = __nf_ct_l4proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num,
198 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); 198 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
199 if (l4proto && l4proto->destroy) 199 if (l4proto && l4proto->destroy)
200 l4proto->destroy(ct); 200 l4proto->destroy(ct);
201 201
202 nf_ct_ext_destroy(ct); 202 nf_ct_ext_destroy(ct);
203 203
204 rcu_read_unlock(); 204 rcu_read_unlock();
205 205
206 spin_lock_bh(&nf_conntrack_lock); 206 spin_lock_bh(&nf_conntrack_lock);
207 /* Expectations will have been removed in clean_from_lists, 207 /* Expectations will have been removed in clean_from_lists,
208 * except TFTP can create an expectation on the first packet, 208 * except TFTP can create an expectation on the first packet,
209 * before connection is in the list, so we need to clean here, 209 * before connection is in the list, so we need to clean here,
210 * too. */ 210 * too. */
211 nf_ct_remove_expectations(ct); 211 nf_ct_remove_expectations(ct);
212 212
213 /* We overload first tuple to link into unconfirmed list. */ 213 /* We overload first tuple to link into unconfirmed list. */
214 if (!nf_ct_is_confirmed(ct)) { 214 if (!nf_ct_is_confirmed(ct)) {
215 BUG_ON(hlist_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode)); 215 BUG_ON(hlist_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode));
216 hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); 216 hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode);
217 } 217 }
218 218
219 NF_CT_STAT_INC(delete); 219 NF_CT_STAT_INC(delete);
220 spin_unlock_bh(&nf_conntrack_lock); 220 spin_unlock_bh(&nf_conntrack_lock);
221 221
222 if (ct->master) 222 if (ct->master)
223 nf_ct_put(ct->master); 223 nf_ct_put(ct->master);
224 224
225 pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct); 225 pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct);
226 nf_conntrack_free(ct); 226 nf_conntrack_free(ct);
227 } 227 }
228 228
229 static void death_by_timeout(unsigned long ul_conntrack) 229 static void death_by_timeout(unsigned long ul_conntrack)
230 { 230 {
231 struct nf_conn *ct = (void *)ul_conntrack; 231 struct nf_conn *ct = (void *)ul_conntrack;
232 struct nf_conn_help *help = nfct_help(ct); 232 struct nf_conn_help *help = nfct_help(ct);
233 struct nf_conntrack_helper *helper; 233 struct nf_conntrack_helper *helper;
234 234
235 if (help) { 235 if (help) {
236 rcu_read_lock(); 236 rcu_read_lock();
237 helper = rcu_dereference(help->helper); 237 helper = rcu_dereference(help->helper);
238 if (helper && helper->destroy) 238 if (helper && helper->destroy)
239 helper->destroy(ct); 239 helper->destroy(ct);
240 rcu_read_unlock(); 240 rcu_read_unlock();
241 } 241 }
242 242
243 spin_lock_bh(&nf_conntrack_lock); 243 spin_lock_bh(&nf_conntrack_lock);
244 /* Inside lock so preempt is disabled on module removal path. 244 /* Inside lock so preempt is disabled on module removal path.
245 * Otherwise we can get spurious warnings. */ 245 * Otherwise we can get spurious warnings. */
246 NF_CT_STAT_INC(delete_list); 246 NF_CT_STAT_INC(delete_list);
247 clean_from_lists(ct); 247 clean_from_lists(ct);
248 spin_unlock_bh(&nf_conntrack_lock); 248 spin_unlock_bh(&nf_conntrack_lock);
249 nf_ct_put(ct); 249 nf_ct_put(ct);
250 } 250 }
251 251
252 struct nf_conntrack_tuple_hash * 252 struct nf_conntrack_tuple_hash *
253 __nf_conntrack_find(const struct nf_conntrack_tuple *tuple) 253 __nf_conntrack_find(const struct nf_conntrack_tuple *tuple)
254 { 254 {
255 struct nf_conntrack_tuple_hash *h; 255 struct nf_conntrack_tuple_hash *h;
256 struct hlist_node *n; 256 struct hlist_node *n;
257 unsigned int hash = hash_conntrack(tuple); 257 unsigned int hash = hash_conntrack(tuple);
258 258
259 hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) { 259 hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) {
260 if (nf_ct_tuple_equal(tuple, &h->tuple)) { 260 if (nf_ct_tuple_equal(tuple, &h->tuple)) {
261 NF_CT_STAT_INC(found); 261 NF_CT_STAT_INC(found);
262 return h; 262 return h;
263 } 263 }
264 NF_CT_STAT_INC(searched); 264 NF_CT_STAT_INC(searched);
265 } 265 }
266 266
267 return NULL; 267 return NULL;
268 } 268 }
269 EXPORT_SYMBOL_GPL(__nf_conntrack_find); 269 EXPORT_SYMBOL_GPL(__nf_conntrack_find);
270 270
271 /* Find a connection corresponding to a tuple. */ 271 /* Find a connection corresponding to a tuple. */
272 struct nf_conntrack_tuple_hash * 272 struct nf_conntrack_tuple_hash *
273 nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple) 273 nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple)
274 { 274 {
275 struct nf_conntrack_tuple_hash *h; 275 struct nf_conntrack_tuple_hash *h;
276 struct nf_conn *ct; 276 struct nf_conn *ct;
277 277
278 rcu_read_lock(); 278 rcu_read_lock();
279 h = __nf_conntrack_find(tuple); 279 h = __nf_conntrack_find(tuple);
280 if (h) { 280 if (h) {
281 ct = nf_ct_tuplehash_to_ctrack(h); 281 ct = nf_ct_tuplehash_to_ctrack(h);
282 if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) 282 if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
283 h = NULL; 283 h = NULL;
284 } 284 }
285 rcu_read_unlock(); 285 rcu_read_unlock();
286 286
287 return h; 287 return h;
288 } 288 }
289 EXPORT_SYMBOL_GPL(nf_conntrack_find_get); 289 EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
290 290
291 static void __nf_conntrack_hash_insert(struct nf_conn *ct, 291 static void __nf_conntrack_hash_insert(struct nf_conn *ct,
292 unsigned int hash, 292 unsigned int hash,
293 unsigned int repl_hash) 293 unsigned int repl_hash)
294 { 294 {
295 hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, 295 hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode,
296 &nf_conntrack_hash[hash]); 296 &nf_conntrack_hash[hash]);
297 hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode, 297 hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode,
298 &nf_conntrack_hash[repl_hash]); 298 &nf_conntrack_hash[repl_hash]);
299 } 299 }
300 300
301 void nf_conntrack_hash_insert(struct nf_conn *ct) 301 void nf_conntrack_hash_insert(struct nf_conn *ct)
302 { 302 {
303 unsigned int hash, repl_hash; 303 unsigned int hash, repl_hash;
304 304
305 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 305 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
306 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); 306 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
307 307
308 spin_lock_bh(&nf_conntrack_lock); 308 spin_lock_bh(&nf_conntrack_lock);
309 __nf_conntrack_hash_insert(ct, hash, repl_hash); 309 __nf_conntrack_hash_insert(ct, hash, repl_hash);
310 spin_unlock_bh(&nf_conntrack_lock); 310 spin_unlock_bh(&nf_conntrack_lock);
311 } 311 }
312 EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert); 312 EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert);
313 313
314 /* Confirm a connection given skb; places it in hash table */ 314 /* Confirm a connection given skb; places it in hash table */
315 int 315 int
316 __nf_conntrack_confirm(struct sk_buff *skb) 316 __nf_conntrack_confirm(struct sk_buff *skb)
317 { 317 {
318 unsigned int hash, repl_hash; 318 unsigned int hash, repl_hash;
319 struct nf_conntrack_tuple_hash *h; 319 struct nf_conntrack_tuple_hash *h;
320 struct nf_conn *ct; 320 struct nf_conn *ct;
321 struct nf_conn_help *help; 321 struct nf_conn_help *help;
322 struct hlist_node *n; 322 struct hlist_node *n;
323 enum ip_conntrack_info ctinfo; 323 enum ip_conntrack_info ctinfo;
324 324
325 ct = nf_ct_get(skb, &ctinfo); 325 ct = nf_ct_get(skb, &ctinfo);
326 326
327 /* ipt_REJECT uses nf_conntrack_attach to attach related 327 /* ipt_REJECT uses nf_conntrack_attach to attach related
328 ICMP/TCP RST packets in other direction. Actual packet 328 ICMP/TCP RST packets in other direction. Actual packet
329 which created connection will be IP_CT_NEW or for an 329 which created connection will be IP_CT_NEW or for an
330 expected connection, IP_CT_RELATED. */ 330 expected connection, IP_CT_RELATED. */
331 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) 331 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
332 return NF_ACCEPT; 332 return NF_ACCEPT;
333 333
334 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 334 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
335 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); 335 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
336 336
337 /* We're not in hash table, and we refuse to set up related 337 /* We're not in hash table, and we refuse to set up related
338 connections for unconfirmed conns. But packet copies and 338 connections for unconfirmed conns. But packet copies and
339 REJECT will give spurious warnings here. */ 339 REJECT will give spurious warnings here. */
340 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ 340 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
341 341
342 /* No external references means noone else could have 342 /* No external references means noone else could have
343 confirmed us. */ 343 confirmed us. */
344 NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); 344 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
345 pr_debug("Confirming conntrack %p\n", ct); 345 pr_debug("Confirming conntrack %p\n", ct);
346 346
347 spin_lock_bh(&nf_conntrack_lock); 347 spin_lock_bh(&nf_conntrack_lock);
348 348
349 /* See if there's one in the list already, including reverse: 349 /* See if there's one in the list already, including reverse:
350 NAT could have grabbed it without realizing, since we're 350 NAT could have grabbed it without realizing, since we're
351 not in the hash. If there is, we lost race. */ 351 not in the hash. If there is, we lost race. */
352 hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode) 352 hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode)
353 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 353 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
354 &h->tuple)) 354 &h->tuple))
355 goto out; 355 goto out;
356 hlist_for_each_entry(h, n, &nf_conntrack_hash[repl_hash], hnode) 356 hlist_for_each_entry(h, n, &nf_conntrack_hash[repl_hash], hnode)
357 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, 357 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
358 &h->tuple)) 358 &h->tuple))
359 goto out; 359 goto out;
360 360
361 /* Remove from unconfirmed list */ 361 /* Remove from unconfirmed list */
362 hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); 362 hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode);
363 363
364 __nf_conntrack_hash_insert(ct, hash, repl_hash); 364 __nf_conntrack_hash_insert(ct, hash, repl_hash);
365 /* Timer relative to confirmation time, not original 365 /* Timer relative to confirmation time, not original
366 setting time, otherwise we'd get timer wrap in 366 setting time, otherwise we'd get timer wrap in
367 weird delay cases. */ 367 weird delay cases. */
368 ct->timeout.expires += jiffies; 368 ct->timeout.expires += jiffies;
369 add_timer(&ct->timeout); 369 add_timer(&ct->timeout);
370 atomic_inc(&ct->ct_general.use); 370 atomic_inc(&ct->ct_general.use);
371 set_bit(IPS_CONFIRMED_BIT, &ct->status); 371 set_bit(IPS_CONFIRMED_BIT, &ct->status);
372 NF_CT_STAT_INC(insert); 372 NF_CT_STAT_INC(insert);
373 spin_unlock_bh(&nf_conntrack_lock); 373 spin_unlock_bh(&nf_conntrack_lock);
374 help = nfct_help(ct); 374 help = nfct_help(ct);
375 if (help && help->helper) 375 if (help && help->helper)
376 nf_conntrack_event_cache(IPCT_HELPER, skb); 376 nf_conntrack_event_cache(IPCT_HELPER, skb);
377 #ifdef CONFIG_NF_NAT_NEEDED 377 #ifdef CONFIG_NF_NAT_NEEDED
378 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || 378 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
379 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) 379 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
380 nf_conntrack_event_cache(IPCT_NATINFO, skb); 380 nf_conntrack_event_cache(IPCT_NATINFO, skb);
381 #endif 381 #endif
382 nf_conntrack_event_cache(master_ct(ct) ? 382 nf_conntrack_event_cache(master_ct(ct) ?
383 IPCT_RELATED : IPCT_NEW, skb); 383 IPCT_RELATED : IPCT_NEW, skb);
384 return NF_ACCEPT; 384 return NF_ACCEPT;
385 385
386 out: 386 out:
387 NF_CT_STAT_INC(insert_failed); 387 NF_CT_STAT_INC(insert_failed);
388 spin_unlock_bh(&nf_conntrack_lock); 388 spin_unlock_bh(&nf_conntrack_lock);
389 return NF_DROP; 389 return NF_DROP;
390 } 390 }
391 EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); 391 EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
392 392
393 /* Returns true if a connection correspondings to the tuple (required 393 /* Returns true if a connection correspondings to the tuple (required
394 for NAT). */ 394 for NAT). */
395 int 395 int
396 nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, 396 nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
397 const struct nf_conn *ignored_conntrack) 397 const struct nf_conn *ignored_conntrack)
398 { 398 {
399 struct nf_conntrack_tuple_hash *h; 399 struct nf_conntrack_tuple_hash *h;
400 struct hlist_node *n; 400 struct hlist_node *n;
401 unsigned int hash = hash_conntrack(tuple); 401 unsigned int hash = hash_conntrack(tuple);
402 402
403 rcu_read_lock(); 403 rcu_read_lock();
404 hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) { 404 hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) {
405 if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && 405 if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
406 nf_ct_tuple_equal(tuple, &h->tuple)) { 406 nf_ct_tuple_equal(tuple, &h->tuple)) {
407 NF_CT_STAT_INC(found); 407 NF_CT_STAT_INC(found);
408 rcu_read_unlock(); 408 rcu_read_unlock();
409 return 1; 409 return 1;
410 } 410 }
411 NF_CT_STAT_INC(searched); 411 NF_CT_STAT_INC(searched);
412 } 412 }
413 rcu_read_unlock(); 413 rcu_read_unlock();
414 414
415 return 0; 415 return 0;
416 } 416 }
417 EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); 417 EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
418 418
419 #define NF_CT_EVICTION_RANGE 8 419 #define NF_CT_EVICTION_RANGE 8
420 420
421 /* There's a small race here where we may free a just-assured 421 /* There's a small race here where we may free a just-assured
422 connection. Too bad: we're in trouble anyway. */ 422 connection. Too bad: we're in trouble anyway. */
423 static int early_drop(unsigned int hash) 423 static noinline int early_drop(unsigned int hash)
424 { 424 {
425 /* Use oldest entry, which is roughly LRU */ 425 /* Use oldest entry, which is roughly LRU */
426 struct nf_conntrack_tuple_hash *h; 426 struct nf_conntrack_tuple_hash *h;
427 struct nf_conn *ct = NULL, *tmp; 427 struct nf_conn *ct = NULL, *tmp;
428 struct hlist_node *n; 428 struct hlist_node *n;
429 unsigned int i, cnt = 0; 429 unsigned int i, cnt = 0;
430 int dropped = 0; 430 int dropped = 0;
431 431
432 rcu_read_lock(); 432 rcu_read_lock();
433 for (i = 0; i < nf_conntrack_htable_size; i++) { 433 for (i = 0; i < nf_conntrack_htable_size; i++) {
434 hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], 434 hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash],
435 hnode) { 435 hnode) {
436 tmp = nf_ct_tuplehash_to_ctrack(h); 436 tmp = nf_ct_tuplehash_to_ctrack(h);
437 if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) 437 if (!test_bit(IPS_ASSURED_BIT, &tmp->status))
438 ct = tmp; 438 ct = tmp;
439 cnt++; 439 cnt++;
440 } 440 }
441 441
442 if (ct && unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) 442 if (ct && unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
443 ct = NULL; 443 ct = NULL;
444 if (ct || cnt >= NF_CT_EVICTION_RANGE) 444 if (ct || cnt >= NF_CT_EVICTION_RANGE)
445 break; 445 break;
446 hash = (hash + 1) % nf_conntrack_htable_size; 446 hash = (hash + 1) % nf_conntrack_htable_size;
447 } 447 }
448 rcu_read_unlock(); 448 rcu_read_unlock();
449 449
450 if (!ct) 450 if (!ct)
451 return dropped; 451 return dropped;
452 452
453 if (del_timer(&ct->timeout)) { 453 if (del_timer(&ct->timeout)) {
454 death_by_timeout((unsigned long)ct); 454 death_by_timeout((unsigned long)ct);
455 dropped = 1; 455 dropped = 1;
456 NF_CT_STAT_INC_ATOMIC(early_drop); 456 NF_CT_STAT_INC_ATOMIC(early_drop);
457 } 457 }
458 nf_ct_put(ct); 458 nf_ct_put(ct);
459 return dropped; 459 return dropped;
460 } 460 }
461 461
462 struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, 462 struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
463 const struct nf_conntrack_tuple *repl) 463 const struct nf_conntrack_tuple *repl)
464 { 464 {
465 struct nf_conn *conntrack = NULL; 465 struct nf_conn *conntrack = NULL;
466 466
467 if (unlikely(!nf_conntrack_hash_rnd_initted)) { 467 if (unlikely(!nf_conntrack_hash_rnd_initted)) {
468 get_random_bytes(&nf_conntrack_hash_rnd, 4); 468 get_random_bytes(&nf_conntrack_hash_rnd, 4);
469 nf_conntrack_hash_rnd_initted = 1; 469 nf_conntrack_hash_rnd_initted = 1;
470 } 470 }
471 471
472 /* We don't want any race condition at early drop stage */ 472 /* We don't want any race condition at early drop stage */
473 atomic_inc(&nf_conntrack_count); 473 atomic_inc(&nf_conntrack_count);
474 474
475 if (nf_conntrack_max 475 if (nf_conntrack_max &&
476 && atomic_read(&nf_conntrack_count) > nf_conntrack_max) { 476 unlikely(atomic_read(&nf_conntrack_count) > nf_conntrack_max)) {
477 unsigned int hash = hash_conntrack(orig); 477 unsigned int hash = hash_conntrack(orig);
478 if (!early_drop(hash)) { 478 if (!early_drop(hash)) {
479 atomic_dec(&nf_conntrack_count); 479 atomic_dec(&nf_conntrack_count);
480 if (net_ratelimit()) 480 if (net_ratelimit())
481 printk(KERN_WARNING 481 printk(KERN_WARNING
482 "nf_conntrack: table full, dropping" 482 "nf_conntrack: table full, dropping"
483 " packet.\n"); 483 " packet.\n");
484 return ERR_PTR(-ENOMEM); 484 return ERR_PTR(-ENOMEM);
485 } 485 }
486 } 486 }
487 487
488 conntrack = kmem_cache_zalloc(nf_conntrack_cachep, GFP_ATOMIC); 488 conntrack = kmem_cache_zalloc(nf_conntrack_cachep, GFP_ATOMIC);
489 if (conntrack == NULL) { 489 if (conntrack == NULL) {
490 pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n"); 490 pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n");
491 atomic_dec(&nf_conntrack_count); 491 atomic_dec(&nf_conntrack_count);
492 return ERR_PTR(-ENOMEM); 492 return ERR_PTR(-ENOMEM);
493 } 493 }
494 494
495 atomic_set(&conntrack->ct_general.use, 1); 495 atomic_set(&conntrack->ct_general.use, 1);
496 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; 496 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
497 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; 497 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
498 /* Don't set timer yet: wait for confirmation */ 498 /* Don't set timer yet: wait for confirmation */
499 setup_timer(&conntrack->timeout, death_by_timeout, 499 setup_timer(&conntrack->timeout, death_by_timeout,
500 (unsigned long)conntrack); 500 (unsigned long)conntrack);
501 INIT_RCU_HEAD(&conntrack->rcu); 501 INIT_RCU_HEAD(&conntrack->rcu);
502 502
503 return conntrack; 503 return conntrack;
504 } 504 }
505 EXPORT_SYMBOL_GPL(nf_conntrack_alloc); 505 EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
506 506
507 static void nf_conntrack_free_rcu(struct rcu_head *head) 507 static void nf_conntrack_free_rcu(struct rcu_head *head)
508 { 508 {
509 struct nf_conn *ct = container_of(head, struct nf_conn, rcu); 509 struct nf_conn *ct = container_of(head, struct nf_conn, rcu);
510 510
511 nf_ct_ext_free(ct); 511 nf_ct_ext_free(ct);
512 kmem_cache_free(nf_conntrack_cachep, ct); 512 kmem_cache_free(nf_conntrack_cachep, ct);
513 atomic_dec(&nf_conntrack_count); 513 atomic_dec(&nf_conntrack_count);
514 } 514 }
515 515
516 void nf_conntrack_free(struct nf_conn *conntrack) 516 void nf_conntrack_free(struct nf_conn *conntrack)
517 { 517 {
518 call_rcu(&conntrack->rcu, nf_conntrack_free_rcu); 518 call_rcu(&conntrack->rcu, nf_conntrack_free_rcu);
519 } 519 }
520 EXPORT_SYMBOL_GPL(nf_conntrack_free); 520 EXPORT_SYMBOL_GPL(nf_conntrack_free);
521 521
522 /* Allocate a new conntrack: we return -ENOMEM if classification 522 /* Allocate a new conntrack: we return -ENOMEM if classification
523 failed due to stress. Otherwise it really is unclassifiable. */ 523 failed due to stress. Otherwise it really is unclassifiable. */
524 static struct nf_conntrack_tuple_hash * 524 static struct nf_conntrack_tuple_hash *
525 init_conntrack(const struct nf_conntrack_tuple *tuple, 525 init_conntrack(const struct nf_conntrack_tuple *tuple,
526 struct nf_conntrack_l3proto *l3proto, 526 struct nf_conntrack_l3proto *l3proto,
527 struct nf_conntrack_l4proto *l4proto, 527 struct nf_conntrack_l4proto *l4proto,
528 struct sk_buff *skb, 528 struct sk_buff *skb,
529 unsigned int dataoff) 529 unsigned int dataoff)
530 { 530 {
531 struct nf_conn *conntrack; 531 struct nf_conn *conntrack;
532 struct nf_conn_help *help; 532 struct nf_conn_help *help;
533 struct nf_conntrack_tuple repl_tuple; 533 struct nf_conntrack_tuple repl_tuple;
534 struct nf_conntrack_expect *exp; 534 struct nf_conntrack_expect *exp;
535 535
536 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { 536 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
537 pr_debug("Can't invert tuple.\n"); 537 pr_debug("Can't invert tuple.\n");
538 return NULL; 538 return NULL;
539 } 539 }
540 540
541 conntrack = nf_conntrack_alloc(tuple, &repl_tuple); 541 conntrack = nf_conntrack_alloc(tuple, &repl_tuple);
542 if (conntrack == NULL || IS_ERR(conntrack)) { 542 if (conntrack == NULL || IS_ERR(conntrack)) {
543 pr_debug("Can't allocate conntrack.\n"); 543 pr_debug("Can't allocate conntrack.\n");
544 return (struct nf_conntrack_tuple_hash *)conntrack; 544 return (struct nf_conntrack_tuple_hash *)conntrack;
545 } 545 }
546 546
547 if (!l4proto->new(conntrack, skb, dataoff)) { 547 if (!l4proto->new(conntrack, skb, dataoff)) {
548 nf_conntrack_free(conntrack); 548 nf_conntrack_free(conntrack);
549 pr_debug("init conntrack: can't track with proto module\n"); 549 pr_debug("init conntrack: can't track with proto module\n");
550 return NULL; 550 return NULL;
551 } 551 }
552 552
553 spin_lock_bh(&nf_conntrack_lock); 553 spin_lock_bh(&nf_conntrack_lock);
554 exp = nf_ct_find_expectation(tuple); 554 exp = nf_ct_find_expectation(tuple);
555 if (exp) { 555 if (exp) {
556 pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", 556 pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
557 conntrack, exp); 557 conntrack, exp);
558 /* Welcome, Mr. Bond. We've been expecting you... */ 558 /* Welcome, Mr. Bond. We've been expecting you... */
559 __set_bit(IPS_EXPECTED_BIT, &conntrack->status); 559 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
560 conntrack->master = exp->master; 560 conntrack->master = exp->master;
561 if (exp->helper) { 561 if (exp->helper) {
562 help = nf_ct_helper_ext_add(conntrack, GFP_ATOMIC); 562 help = nf_ct_helper_ext_add(conntrack, GFP_ATOMIC);
563 if (help) 563 if (help)
564 rcu_assign_pointer(help->helper, exp->helper); 564 rcu_assign_pointer(help->helper, exp->helper);
565 } 565 }
566 566
567 #ifdef CONFIG_NF_CONNTRACK_MARK 567 #ifdef CONFIG_NF_CONNTRACK_MARK
568 conntrack->mark = exp->master->mark; 568 conntrack->mark = exp->master->mark;
569 #endif 569 #endif
570 #ifdef CONFIG_NF_CONNTRACK_SECMARK 570 #ifdef CONFIG_NF_CONNTRACK_SECMARK
571 conntrack->secmark = exp->master->secmark; 571 conntrack->secmark = exp->master->secmark;
572 #endif 572 #endif
573 nf_conntrack_get(&conntrack->master->ct_general); 573 nf_conntrack_get(&conntrack->master->ct_general);
574 NF_CT_STAT_INC(expect_new); 574 NF_CT_STAT_INC(expect_new);
575 } else { 575 } else {
576 struct nf_conntrack_helper *helper; 576 struct nf_conntrack_helper *helper;
577 577
578 helper = __nf_ct_helper_find(&repl_tuple); 578 helper = __nf_ct_helper_find(&repl_tuple);
579 if (helper) { 579 if (helper) {
580 help = nf_ct_helper_ext_add(conntrack, GFP_ATOMIC); 580 help = nf_ct_helper_ext_add(conntrack, GFP_ATOMIC);
581 if (help) 581 if (help)
582 rcu_assign_pointer(help->helper, helper); 582 rcu_assign_pointer(help->helper, helper);
583 } 583 }
584 NF_CT_STAT_INC(new); 584 NF_CT_STAT_INC(new);
585 } 585 }
586 586
587 /* Overload tuple linked list to put us in unconfirmed list. */ 587 /* Overload tuple linked list to put us in unconfirmed list. */
588 hlist_add_head(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].hnode, 588 hlist_add_head(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].hnode,
589 &unconfirmed); 589 &unconfirmed);
590 590
591 spin_unlock_bh(&nf_conntrack_lock); 591 spin_unlock_bh(&nf_conntrack_lock);
592 592
593 if (exp) { 593 if (exp) {
594 if (exp->expectfn) 594 if (exp->expectfn)
595 exp->expectfn(conntrack, exp); 595 exp->expectfn(conntrack, exp);
596 nf_ct_expect_put(exp); 596 nf_ct_expect_put(exp);
597 } 597 }
598 598
599 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]; 599 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
600 } 600 }
601 601
602 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ 602 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
603 static inline struct nf_conn * 603 static inline struct nf_conn *
604 resolve_normal_ct(struct sk_buff *skb, 604 resolve_normal_ct(struct sk_buff *skb,
605 unsigned int dataoff, 605 unsigned int dataoff,
606 u_int16_t l3num, 606 u_int16_t l3num,
607 u_int8_t protonum, 607 u_int8_t protonum,
608 struct nf_conntrack_l3proto *l3proto, 608 struct nf_conntrack_l3proto *l3proto,
609 struct nf_conntrack_l4proto *l4proto, 609 struct nf_conntrack_l4proto *l4proto,
610 int *set_reply, 610 int *set_reply,
611 enum ip_conntrack_info *ctinfo) 611 enum ip_conntrack_info *ctinfo)
612 { 612 {
613 struct nf_conntrack_tuple tuple; 613 struct nf_conntrack_tuple tuple;
614 struct nf_conntrack_tuple_hash *h; 614 struct nf_conntrack_tuple_hash *h;
615 struct nf_conn *ct; 615 struct nf_conn *ct;
616 616
617 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), 617 if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
618 dataoff, l3num, protonum, &tuple, l3proto, 618 dataoff, l3num, protonum, &tuple, l3proto,
619 l4proto)) { 619 l4proto)) {
620 pr_debug("resolve_normal_ct: Can't get tuple\n"); 620 pr_debug("resolve_normal_ct: Can't get tuple\n");
621 return NULL; 621 return NULL;
622 } 622 }
623 623
624 /* look for tuple match */ 624 /* look for tuple match */
625 h = nf_conntrack_find_get(&tuple); 625 h = nf_conntrack_find_get(&tuple);
626 if (!h) { 626 if (!h) {
627 h = init_conntrack(&tuple, l3proto, l4proto, skb, dataoff); 627 h = init_conntrack(&tuple, l3proto, l4proto, skb, dataoff);
628 if (!h) 628 if (!h)
629 return NULL; 629 return NULL;
630 if (IS_ERR(h)) 630 if (IS_ERR(h))
631 return (void *)h; 631 return (void *)h;
632 } 632 }
633 ct = nf_ct_tuplehash_to_ctrack(h); 633 ct = nf_ct_tuplehash_to_ctrack(h);
634 634
635 /* It exists; we have (non-exclusive) reference. */ 635 /* It exists; we have (non-exclusive) reference. */
636 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { 636 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
637 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; 637 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
638 /* Please set reply bit if this packet OK */ 638 /* Please set reply bit if this packet OK */
639 *set_reply = 1; 639 *set_reply = 1;
640 } else { 640 } else {
641 /* Once we've had two way comms, always ESTABLISHED. */ 641 /* Once we've had two way comms, always ESTABLISHED. */
642 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { 642 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
643 pr_debug("nf_conntrack_in: normal packet for %p\n", ct); 643 pr_debug("nf_conntrack_in: normal packet for %p\n", ct);
644 *ctinfo = IP_CT_ESTABLISHED; 644 *ctinfo = IP_CT_ESTABLISHED;
645 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { 645 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
646 pr_debug("nf_conntrack_in: related packet for %p\n", 646 pr_debug("nf_conntrack_in: related packet for %p\n",
647 ct); 647 ct);
648 *ctinfo = IP_CT_RELATED; 648 *ctinfo = IP_CT_RELATED;
649 } else { 649 } else {
650 pr_debug("nf_conntrack_in: new packet for %p\n", ct); 650 pr_debug("nf_conntrack_in: new packet for %p\n", ct);
651 *ctinfo = IP_CT_NEW; 651 *ctinfo = IP_CT_NEW;
652 } 652 }
653 *set_reply = 0; 653 *set_reply = 0;
654 } 654 }
655 skb->nfct = &ct->ct_general; 655 skb->nfct = &ct->ct_general;
656 skb->nfctinfo = *ctinfo; 656 skb->nfctinfo = *ctinfo;
657 return ct; 657 return ct;
658 } 658 }
659 659
660 unsigned int 660 unsigned int
661 nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb) 661 nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb)
662 { 662 {
663 struct nf_conn *ct; 663 struct nf_conn *ct;
664 enum ip_conntrack_info ctinfo; 664 enum ip_conntrack_info ctinfo;
665 struct nf_conntrack_l3proto *l3proto; 665 struct nf_conntrack_l3proto *l3proto;
666 struct nf_conntrack_l4proto *l4proto; 666 struct nf_conntrack_l4proto *l4proto;
667 unsigned int dataoff; 667 unsigned int dataoff;
668 u_int8_t protonum; 668 u_int8_t protonum;
669 int set_reply = 0; 669 int set_reply = 0;
670 int ret; 670 int ret;
671 671
672 /* Previously seen (loopback or untracked)? Ignore. */ 672 /* Previously seen (loopback or untracked)? Ignore. */
673 if (skb->nfct) { 673 if (skb->nfct) {
674 NF_CT_STAT_INC_ATOMIC(ignore); 674 NF_CT_STAT_INC_ATOMIC(ignore);
675 return NF_ACCEPT; 675 return NF_ACCEPT;
676 } 676 }
677 677
678 /* rcu_read_lock()ed by nf_hook_slow */ 678 /* rcu_read_lock()ed by nf_hook_slow */
679 l3proto = __nf_ct_l3proto_find((u_int16_t)pf); 679 l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
680 ret = l3proto->get_l4proto(skb, skb_network_offset(skb), 680 ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
681 &dataoff, &protonum); 681 &dataoff, &protonum);
682 if (ret <= 0) { 682 if (ret <= 0) {
683 pr_debug("not prepared to track yet or error occured\n"); 683 pr_debug("not prepared to track yet or error occured\n");
684 NF_CT_STAT_INC_ATOMIC(error); 684 NF_CT_STAT_INC_ATOMIC(error);
685 NF_CT_STAT_INC_ATOMIC(invalid); 685 NF_CT_STAT_INC_ATOMIC(invalid);
686 return -ret; 686 return -ret;
687 } 687 }
688 688
689 l4proto = __nf_ct_l4proto_find((u_int16_t)pf, protonum); 689 l4proto = __nf_ct_l4proto_find((u_int16_t)pf, protonum);
690 690
691 /* It may be an special packet, error, unclean... 691 /* It may be an special packet, error, unclean...
692 * inverse of the return code tells to the netfilter 692 * inverse of the return code tells to the netfilter
693 * core what to do with the packet. */ 693 * core what to do with the packet. */
694 if (l4proto->error != NULL && 694 if (l4proto->error != NULL &&
695 (ret = l4proto->error(skb, dataoff, &ctinfo, pf, hooknum)) <= 0) { 695 (ret = l4proto->error(skb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
696 NF_CT_STAT_INC_ATOMIC(error); 696 NF_CT_STAT_INC_ATOMIC(error);
697 NF_CT_STAT_INC_ATOMIC(invalid); 697 NF_CT_STAT_INC_ATOMIC(invalid);
698 return -ret; 698 return -ret;
699 } 699 }
700 700
701 ct = resolve_normal_ct(skb, dataoff, pf, protonum, l3proto, l4proto, 701 ct = resolve_normal_ct(skb, dataoff, pf, protonum, l3proto, l4proto,
702 &set_reply, &ctinfo); 702 &set_reply, &ctinfo);
703 if (!ct) { 703 if (!ct) {
704 /* Not valid part of a connection */ 704 /* Not valid part of a connection */
705 NF_CT_STAT_INC_ATOMIC(invalid); 705 NF_CT_STAT_INC_ATOMIC(invalid);
706 return NF_ACCEPT; 706 return NF_ACCEPT;
707 } 707 }
708 708
709 if (IS_ERR(ct)) { 709 if (IS_ERR(ct)) {
710 /* Too stressed to deal. */ 710 /* Too stressed to deal. */
711 NF_CT_STAT_INC_ATOMIC(drop); 711 NF_CT_STAT_INC_ATOMIC(drop);
712 return NF_DROP; 712 return NF_DROP;
713 } 713 }
714 714
715 NF_CT_ASSERT(skb->nfct); 715 NF_CT_ASSERT(skb->nfct);
716 716
717 ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum); 717 ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum);
718 if (ret < 0) { 718 if (ret < 0) {
719 /* Invalid: inverse of the return code tells 719 /* Invalid: inverse of the return code tells
720 * the netfilter core what to do */ 720 * the netfilter core what to do */
721 pr_debug("nf_conntrack_in: Can't track with proto module\n"); 721 pr_debug("nf_conntrack_in: Can't track with proto module\n");
722 nf_conntrack_put(skb->nfct); 722 nf_conntrack_put(skb->nfct);
723 skb->nfct = NULL; 723 skb->nfct = NULL;
724 NF_CT_STAT_INC_ATOMIC(invalid); 724 NF_CT_STAT_INC_ATOMIC(invalid);
725 return -ret; 725 return -ret;
726 } 726 }
727 727
728 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) 728 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
729 nf_conntrack_event_cache(IPCT_STATUS, skb); 729 nf_conntrack_event_cache(IPCT_STATUS, skb);
730 730
731 return ret; 731 return ret;
732 } 732 }
733 EXPORT_SYMBOL_GPL(nf_conntrack_in); 733 EXPORT_SYMBOL_GPL(nf_conntrack_in);
734 734
735 int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, 735 int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
736 const struct nf_conntrack_tuple *orig) 736 const struct nf_conntrack_tuple *orig)
737 { 737 {
738 int ret; 738 int ret;
739 739
740 rcu_read_lock(); 740 rcu_read_lock();
741 ret = nf_ct_invert_tuple(inverse, orig, 741 ret = nf_ct_invert_tuple(inverse, orig,
742 __nf_ct_l3proto_find(orig->src.l3num), 742 __nf_ct_l3proto_find(orig->src.l3num),
743 __nf_ct_l4proto_find(orig->src.l3num, 743 __nf_ct_l4proto_find(orig->src.l3num,
744 orig->dst.protonum)); 744 orig->dst.protonum));
745 rcu_read_unlock(); 745 rcu_read_unlock();
746 return ret; 746 return ret;
747 } 747 }
748 EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr); 748 EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr);
749 749
750 /* Alter reply tuple (maybe alter helper). This is for NAT, and is 750 /* Alter reply tuple (maybe alter helper). This is for NAT, and is
751 implicitly racy: see __nf_conntrack_confirm */ 751 implicitly racy: see __nf_conntrack_confirm */
752 void nf_conntrack_alter_reply(struct nf_conn *ct, 752 void nf_conntrack_alter_reply(struct nf_conn *ct,
753 const struct nf_conntrack_tuple *newreply) 753 const struct nf_conntrack_tuple *newreply)
754 { 754 {
755 struct nf_conn_help *help = nfct_help(ct); 755 struct nf_conn_help *help = nfct_help(ct);
756 struct nf_conntrack_helper *helper; 756 struct nf_conntrack_helper *helper;
757 757
758 /* Should be unconfirmed, so not in hash table yet */ 758 /* Should be unconfirmed, so not in hash table yet */
759 NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); 759 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
760 760
761 pr_debug("Altering reply tuple of %p to ", ct); 761 pr_debug("Altering reply tuple of %p to ", ct);
762 NF_CT_DUMP_TUPLE(newreply); 762 NF_CT_DUMP_TUPLE(newreply);
763 763
764 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; 764 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
765 if (ct->master || (help && help->expecting != 0)) 765 if (ct->master || (help && help->expecting != 0))
766 return; 766 return;
767 767
768 rcu_read_lock(); 768 rcu_read_lock();
769 helper = __nf_ct_helper_find(newreply); 769 helper = __nf_ct_helper_find(newreply);
770 if (helper == NULL) { 770 if (helper == NULL) {
771 if (help) 771 if (help)
772 rcu_assign_pointer(help->helper, NULL); 772 rcu_assign_pointer(help->helper, NULL);
773 goto out; 773 goto out;
774 } 774 }
775 775
776 if (help == NULL) { 776 if (help == NULL) {
777 help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); 777 help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
778 if (help == NULL) 778 if (help == NULL)
779 goto out; 779 goto out;
780 } else { 780 } else {
781 memset(&help->help, 0, sizeof(help->help)); 781 memset(&help->help, 0, sizeof(help->help));
782 } 782 }
783 783
784 rcu_assign_pointer(help->helper, helper); 784 rcu_assign_pointer(help->helper, helper);
785 out: 785 out:
786 rcu_read_unlock(); 786 rcu_read_unlock();
787 } 787 }
788 EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); 788 EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
789 789
790 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ 790 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
791 void __nf_ct_refresh_acct(struct nf_conn *ct, 791 void __nf_ct_refresh_acct(struct nf_conn *ct,
792 enum ip_conntrack_info ctinfo, 792 enum ip_conntrack_info ctinfo,
793 const struct sk_buff *skb, 793 const struct sk_buff *skb,
794 unsigned long extra_jiffies, 794 unsigned long extra_jiffies,
795 int do_acct) 795 int do_acct)
796 { 796 {
797 int event = 0; 797 int event = 0;
798 798
799 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); 799 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
800 NF_CT_ASSERT(skb); 800 NF_CT_ASSERT(skb);
801 801
802 spin_lock_bh(&nf_conntrack_lock); 802 spin_lock_bh(&nf_conntrack_lock);
803 803
804 /* Only update if this is not a fixed timeout */ 804 /* Only update if this is not a fixed timeout */
805 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) 805 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
806 goto acct; 806 goto acct;
807 807
808 /* If not in hash table, timer will not be active yet */ 808 /* If not in hash table, timer will not be active yet */
809 if (!nf_ct_is_confirmed(ct)) { 809 if (!nf_ct_is_confirmed(ct)) {
810 ct->timeout.expires = extra_jiffies; 810 ct->timeout.expires = extra_jiffies;
811 event = IPCT_REFRESH; 811 event = IPCT_REFRESH;
812 } else { 812 } else {
813 unsigned long newtime = jiffies + extra_jiffies; 813 unsigned long newtime = jiffies + extra_jiffies;
814 814
815 /* Only update the timeout if the new timeout is at least 815 /* Only update the timeout if the new timeout is at least
816 HZ jiffies from the old timeout. Need del_timer for race 816 HZ jiffies from the old timeout. Need del_timer for race
817 avoidance (may already be dying). */ 817 avoidance (may already be dying). */
818 if (newtime - ct->timeout.expires >= HZ 818 if (newtime - ct->timeout.expires >= HZ
819 && del_timer(&ct->timeout)) { 819 && del_timer(&ct->timeout)) {
820 ct->timeout.expires = newtime; 820 ct->timeout.expires = newtime;
821 add_timer(&ct->timeout); 821 add_timer(&ct->timeout);
822 event = IPCT_REFRESH; 822 event = IPCT_REFRESH;
823 } 823 }
824 } 824 }
825 825
826 acct: 826 acct:
827 #ifdef CONFIG_NF_CT_ACCT 827 #ifdef CONFIG_NF_CT_ACCT
828 if (do_acct) { 828 if (do_acct) {
829 ct->counters[CTINFO2DIR(ctinfo)].packets++; 829 ct->counters[CTINFO2DIR(ctinfo)].packets++;
830 ct->counters[CTINFO2DIR(ctinfo)].bytes += 830 ct->counters[CTINFO2DIR(ctinfo)].bytes +=
831 skb->len - skb_network_offset(skb); 831 skb->len - skb_network_offset(skb);
832 832
833 if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) 833 if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
834 || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) 834 || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
835 event |= IPCT_COUNTER_FILLING; 835 event |= IPCT_COUNTER_FILLING;
836 } 836 }
837 #endif 837 #endif
838 838
839 spin_unlock_bh(&nf_conntrack_lock); 839 spin_unlock_bh(&nf_conntrack_lock);
840 840
841 /* must be unlocked when calling event cache */ 841 /* must be unlocked when calling event cache */
842 if (event) 842 if (event)
843 nf_conntrack_event_cache(event, skb); 843 nf_conntrack_event_cache(event, skb);
844 } 844 }
845 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); 845 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
846 846
847 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 847 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
848 848
849 #include <linux/netfilter/nfnetlink.h> 849 #include <linux/netfilter/nfnetlink.h>
850 #include <linux/netfilter/nfnetlink_conntrack.h> 850 #include <linux/netfilter/nfnetlink_conntrack.h>
851 #include <linux/mutex.h> 851 #include <linux/mutex.h>
852 852
853 /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be 853 /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
854 * in ip_conntrack_core, since we don't want the protocols to autoload 854 * in ip_conntrack_core, since we don't want the protocols to autoload
855 * or depend on ctnetlink */ 855 * or depend on ctnetlink */
856 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, 856 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
857 const struct nf_conntrack_tuple *tuple) 857 const struct nf_conntrack_tuple *tuple)
858 { 858 {
859 NLA_PUT_BE16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port); 859 NLA_PUT_BE16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port);
860 NLA_PUT_BE16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port); 860 NLA_PUT_BE16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port);
861 return 0; 861 return 0;
862 862
863 nla_put_failure: 863 nla_put_failure:
864 return -1; 864 return -1;
865 } 865 }
866 EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr); 866 EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr);
867 867
868 const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = { 868 const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = {
869 [CTA_PROTO_SRC_PORT] = { .type = NLA_U16 }, 869 [CTA_PROTO_SRC_PORT] = { .type = NLA_U16 },
870 [CTA_PROTO_DST_PORT] = { .type = NLA_U16 }, 870 [CTA_PROTO_DST_PORT] = { .type = NLA_U16 },
871 }; 871 };
872 EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy); 872 EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy);
873 873
874 int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], 874 int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
875 struct nf_conntrack_tuple *t) 875 struct nf_conntrack_tuple *t)
876 { 876 {
877 if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT]) 877 if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT])
878 return -EINVAL; 878 return -EINVAL;
879 879
880 t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]); 880 t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]);
881 t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]); 881 t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]);
882 882
883 return 0; 883 return 0;
884 } 884 }
885 EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple); 885 EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
886 #endif 886 #endif
887 887
888 /* Used by ipt_REJECT and ip6t_REJECT. */ 888 /* Used by ipt_REJECT and ip6t_REJECT. */
889 static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) 889 static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
890 { 890 {
891 struct nf_conn *ct; 891 struct nf_conn *ct;
892 enum ip_conntrack_info ctinfo; 892 enum ip_conntrack_info ctinfo;
893 893
894 /* This ICMP is in reverse direction to the packet which caused it */ 894 /* This ICMP is in reverse direction to the packet which caused it */
895 ct = nf_ct_get(skb, &ctinfo); 895 ct = nf_ct_get(skb, &ctinfo);
896 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) 896 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
897 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; 897 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
898 else 898 else
899 ctinfo = IP_CT_RELATED; 899 ctinfo = IP_CT_RELATED;
900 900
901 /* Attach to new skbuff, and increment count */ 901 /* Attach to new skbuff, and increment count */
902 nskb->nfct = &ct->ct_general; 902 nskb->nfct = &ct->ct_general;
903 nskb->nfctinfo = ctinfo; 903 nskb->nfctinfo = ctinfo;
904 nf_conntrack_get(nskb->nfct); 904 nf_conntrack_get(nskb->nfct);
905 } 905 }
906 906
907 static inline int 907 static inline int
908 do_iter(const struct nf_conntrack_tuple_hash *i, 908 do_iter(const struct nf_conntrack_tuple_hash *i,
909 int (*iter)(struct nf_conn *i, void *data), 909 int (*iter)(struct nf_conn *i, void *data),
910 void *data) 910 void *data)
911 { 911 {
912 return iter(nf_ct_tuplehash_to_ctrack(i), data); 912 return iter(nf_ct_tuplehash_to_ctrack(i), data);
913 } 913 }
914 914
915 /* Bring out ya dead! */ 915 /* Bring out ya dead! */
916 static struct nf_conn * 916 static struct nf_conn *
917 get_next_corpse(int (*iter)(struct nf_conn *i, void *data), 917 get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
918 void *data, unsigned int *bucket) 918 void *data, unsigned int *bucket)
919 { 919 {
920 struct nf_conntrack_tuple_hash *h; 920 struct nf_conntrack_tuple_hash *h;
921 struct nf_conn *ct; 921 struct nf_conn *ct;
922 struct hlist_node *n; 922 struct hlist_node *n;
923 923
924 spin_lock_bh(&nf_conntrack_lock); 924 spin_lock_bh(&nf_conntrack_lock);
925 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { 925 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
926 hlist_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnode) { 926 hlist_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnode) {
927 ct = nf_ct_tuplehash_to_ctrack(h); 927 ct = nf_ct_tuplehash_to_ctrack(h);
928 if (iter(ct, data)) 928 if (iter(ct, data))
929 goto found; 929 goto found;
930 } 930 }
931 } 931 }
932 hlist_for_each_entry(h, n, &unconfirmed, hnode) { 932 hlist_for_each_entry(h, n, &unconfirmed, hnode) {
933 ct = nf_ct_tuplehash_to_ctrack(h); 933 ct = nf_ct_tuplehash_to_ctrack(h);
934 if (iter(ct, data)) 934 if (iter(ct, data))
935 set_bit(IPS_DYING_BIT, &ct->status); 935 set_bit(IPS_DYING_BIT, &ct->status);
936 } 936 }
937 spin_unlock_bh(&nf_conntrack_lock); 937 spin_unlock_bh(&nf_conntrack_lock);
938 return NULL; 938 return NULL;
939 found: 939 found:
940 atomic_inc(&ct->ct_general.use); 940 atomic_inc(&ct->ct_general.use);
941 spin_unlock_bh(&nf_conntrack_lock); 941 spin_unlock_bh(&nf_conntrack_lock);
942 return ct; 942 return ct;
943 } 943 }
944 944
945 void 945 void
946 nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data) 946 nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data)
947 { 947 {
948 struct nf_conn *ct; 948 struct nf_conn *ct;
949 unsigned int bucket = 0; 949 unsigned int bucket = 0;
950 950
951 while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { 951 while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
952 /* Time to push up daises... */ 952 /* Time to push up daises... */
953 if (del_timer(&ct->timeout)) 953 if (del_timer(&ct->timeout))
954 death_by_timeout((unsigned long)ct); 954 death_by_timeout((unsigned long)ct);
955 /* ... else the timer will get him soon. */ 955 /* ... else the timer will get him soon. */
956 956
957 nf_ct_put(ct); 957 nf_ct_put(ct);
958 } 958 }
959 } 959 }
960 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); 960 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
961 961
962 static int kill_all(struct nf_conn *i, void *data) 962 static int kill_all(struct nf_conn *i, void *data)
963 { 963 {
964 return 1; 964 return 1;
965 } 965 }
966 966
967 void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int size) 967 void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int size)
968 { 968 {
969 if (vmalloced) 969 if (vmalloced)
970 vfree(hash); 970 vfree(hash);
971 else 971 else
972 free_pages((unsigned long)hash, 972 free_pages((unsigned long)hash,
973 get_order(sizeof(struct hlist_head) * size)); 973 get_order(sizeof(struct hlist_head) * size));
974 } 974 }
975 EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); 975 EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
976 976
977 void nf_conntrack_flush(void) 977 void nf_conntrack_flush(void)
978 { 978 {
979 nf_ct_iterate_cleanup(kill_all, NULL); 979 nf_ct_iterate_cleanup(kill_all, NULL);
980 } 980 }
981 EXPORT_SYMBOL_GPL(nf_conntrack_flush); 981 EXPORT_SYMBOL_GPL(nf_conntrack_flush);
982 982
983 /* Mishearing the voices in his head, our hero wonders how he's 983 /* Mishearing the voices in his head, our hero wonders how he's
984 supposed to kill the mall. */ 984 supposed to kill the mall. */
985 void nf_conntrack_cleanup(void) 985 void nf_conntrack_cleanup(void)
986 { 986 {
987 rcu_assign_pointer(ip_ct_attach, NULL); 987 rcu_assign_pointer(ip_ct_attach, NULL);
988 988
989 /* This makes sure all current packets have passed through 989 /* This makes sure all current packets have passed through
990 netfilter framework. Roll on, two-stage module 990 netfilter framework. Roll on, two-stage module
991 delete... */ 991 delete... */
992 synchronize_net(); 992 synchronize_net();
993 993
994 nf_ct_event_cache_flush(); 994 nf_ct_event_cache_flush();
995 i_see_dead_people: 995 i_see_dead_people:
996 nf_conntrack_flush(); 996 nf_conntrack_flush();
997 if (atomic_read(&nf_conntrack_count) != 0) { 997 if (atomic_read(&nf_conntrack_count) != 0) {
998 schedule(); 998 schedule();
999 goto i_see_dead_people; 999 goto i_see_dead_people;
1000 } 1000 }
1001 /* wait until all references to nf_conntrack_untracked are dropped */ 1001 /* wait until all references to nf_conntrack_untracked are dropped */
1002 while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1) 1002 while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
1003 schedule(); 1003 schedule();
1004 1004
1005 rcu_assign_pointer(nf_ct_destroy, NULL); 1005 rcu_assign_pointer(nf_ct_destroy, NULL);
1006 1006
1007 kmem_cache_destroy(nf_conntrack_cachep); 1007 kmem_cache_destroy(nf_conntrack_cachep);
1008 nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc, 1008 nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc,
1009 nf_conntrack_htable_size); 1009 nf_conntrack_htable_size);
1010 1010
1011 nf_conntrack_proto_fini(); 1011 nf_conntrack_proto_fini();
1012 nf_conntrack_helper_fini(); 1012 nf_conntrack_helper_fini();
1013 nf_conntrack_expect_fini(); 1013 nf_conntrack_expect_fini();
1014 } 1014 }
1015 1015
1016 struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced) 1016 struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced)
1017 { 1017 {
1018 struct hlist_head *hash; 1018 struct hlist_head *hash;
1019 unsigned int size, i; 1019 unsigned int size, i;
1020 1020
1021 *vmalloced = 0; 1021 *vmalloced = 0;
1022 1022
1023 size = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_head)); 1023 size = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_head));
1024 hash = (void*)__get_free_pages(GFP_KERNEL|__GFP_NOWARN, 1024 hash = (void*)__get_free_pages(GFP_KERNEL|__GFP_NOWARN,
1025 get_order(sizeof(struct hlist_head) 1025 get_order(sizeof(struct hlist_head)
1026 * size)); 1026 * size));
1027 if (!hash) { 1027 if (!hash) {
1028 *vmalloced = 1; 1028 *vmalloced = 1;
1029 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); 1029 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
1030 hash = vmalloc(sizeof(struct hlist_head) * size); 1030 hash = vmalloc(sizeof(struct hlist_head) * size);
1031 } 1031 }
1032 1032
1033 if (hash) 1033 if (hash)
1034 for (i = 0; i < size; i++) 1034 for (i = 0; i < size; i++)
1035 INIT_HLIST_HEAD(&hash[i]); 1035 INIT_HLIST_HEAD(&hash[i]);
1036 1036
1037 return hash; 1037 return hash;
1038 } 1038 }
1039 EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable); 1039 EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
1040 1040
1041 int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) 1041 int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1042 { 1042 {
1043 int i, bucket, vmalloced, old_vmalloced; 1043 int i, bucket, vmalloced, old_vmalloced;
1044 unsigned int hashsize, old_size; 1044 unsigned int hashsize, old_size;
1045 int rnd; 1045 int rnd;
1046 struct hlist_head *hash, *old_hash; 1046 struct hlist_head *hash, *old_hash;
1047 struct nf_conntrack_tuple_hash *h; 1047 struct nf_conntrack_tuple_hash *h;
1048 1048
1049 /* On boot, we can set this without any fancy locking. */ 1049 /* On boot, we can set this without any fancy locking. */
1050 if (!nf_conntrack_htable_size) 1050 if (!nf_conntrack_htable_size)
1051 return param_set_uint(val, kp); 1051 return param_set_uint(val, kp);
1052 1052
1053 hashsize = simple_strtoul(val, NULL, 0); 1053 hashsize = simple_strtoul(val, NULL, 0);
1054 if (!hashsize) 1054 if (!hashsize)
1055 return -EINVAL; 1055 return -EINVAL;
1056 1056
1057 hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced); 1057 hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced);
1058 if (!hash) 1058 if (!hash)
1059 return -ENOMEM; 1059 return -ENOMEM;
1060 1060
1061 /* We have to rehahs for the new table anyway, so we also can 1061 /* We have to rehahs for the new table anyway, so we also can
1062 * use a newrandom seed */ 1062 * use a newrandom seed */
1063 get_random_bytes(&rnd, 4); 1063 get_random_bytes(&rnd, 4);
1064 1064
1065 /* Lookups in the old hash might happen in parallel, which means we 1065 /* Lookups in the old hash might happen in parallel, which means we
1066 * might get false negatives during connection lookup. New connections 1066 * might get false negatives during connection lookup. New connections
1067 * created because of a false negative won't make it into the hash 1067 * created because of a false negative won't make it into the hash
1068 * though since that required taking the lock. 1068 * though since that required taking the lock.
1069 */ 1069 */
1070 spin_lock_bh(&nf_conntrack_lock); 1070 spin_lock_bh(&nf_conntrack_lock);
1071 for (i = 0; i < nf_conntrack_htable_size; i++) { 1071 for (i = 0; i < nf_conntrack_htable_size; i++) {
1072 while (!hlist_empty(&nf_conntrack_hash[i])) { 1072 while (!hlist_empty(&nf_conntrack_hash[i])) {
1073 h = hlist_entry(nf_conntrack_hash[i].first, 1073 h = hlist_entry(nf_conntrack_hash[i].first,
1074 struct nf_conntrack_tuple_hash, hnode); 1074 struct nf_conntrack_tuple_hash, hnode);
1075 hlist_del_rcu(&h->hnode); 1075 hlist_del_rcu(&h->hnode);
1076 bucket = __hash_conntrack(&h->tuple, hashsize, rnd); 1076 bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1077 hlist_add_head(&h->hnode, &hash[bucket]); 1077 hlist_add_head(&h->hnode, &hash[bucket]);
1078 } 1078 }
1079 } 1079 }
1080 old_size = nf_conntrack_htable_size; 1080 old_size = nf_conntrack_htable_size;
1081 old_vmalloced = nf_conntrack_vmalloc; 1081 old_vmalloced = nf_conntrack_vmalloc;
1082 old_hash = nf_conntrack_hash; 1082 old_hash = nf_conntrack_hash;
1083 1083
1084 nf_conntrack_htable_size = hashsize; 1084 nf_conntrack_htable_size = hashsize;
1085 nf_conntrack_vmalloc = vmalloced; 1085 nf_conntrack_vmalloc = vmalloced;
1086 nf_conntrack_hash = hash; 1086 nf_conntrack_hash = hash;
1087 nf_conntrack_hash_rnd = rnd; 1087 nf_conntrack_hash_rnd = rnd;
1088 spin_unlock_bh(&nf_conntrack_lock); 1088 spin_unlock_bh(&nf_conntrack_lock);
1089 1089
1090 nf_ct_free_hashtable(old_hash, old_vmalloced, old_size); 1090 nf_ct_free_hashtable(old_hash, old_vmalloced, old_size);
1091 return 0; 1091 return 0;
1092 } 1092 }
1093 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); 1093 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
1094 1094
1095 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, 1095 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
1096 &nf_conntrack_htable_size, 0600); 1096 &nf_conntrack_htable_size, 0600);
1097 1097
1098 int __init nf_conntrack_init(void) 1098 int __init nf_conntrack_init(void)
1099 { 1099 {
1100 int max_factor = 8; 1100 int max_factor = 8;
1101 int ret; 1101 int ret;
1102 1102
1103 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB 1103 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1104 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */ 1104 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
1105 if (!nf_conntrack_htable_size) { 1105 if (!nf_conntrack_htable_size) {
1106 nf_conntrack_htable_size 1106 nf_conntrack_htable_size
1107 = (((num_physpages << PAGE_SHIFT) / 16384) 1107 = (((num_physpages << PAGE_SHIFT) / 16384)
1108 / sizeof(struct hlist_head)); 1108 / sizeof(struct hlist_head));
1109 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) 1109 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1110 nf_conntrack_htable_size = 16384; 1110 nf_conntrack_htable_size = 16384;
1111 if (nf_conntrack_htable_size < 32) 1111 if (nf_conntrack_htable_size < 32)
1112 nf_conntrack_htable_size = 32; 1112 nf_conntrack_htable_size = 32;
1113 1113
1114 /* Use a max. factor of four by default to get the same max as 1114 /* Use a max. factor of four by default to get the same max as
1115 * with the old struct list_heads. When a table size is given 1115 * with the old struct list_heads. When a table size is given
1116 * we use the old value of 8 to avoid reducing the max. 1116 * we use the old value of 8 to avoid reducing the max.
1117 * entries. */ 1117 * entries. */
1118 max_factor = 4; 1118 max_factor = 4;
1119 } 1119 }
1120 nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1120 nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
1121 &nf_conntrack_vmalloc); 1121 &nf_conntrack_vmalloc);
1122 if (!nf_conntrack_hash) { 1122 if (!nf_conntrack_hash) {
1123 printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); 1123 printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
1124 goto err_out; 1124 goto err_out;
1125 } 1125 }
1126 1126
1127 nf_conntrack_max = max_factor * nf_conntrack_htable_size; 1127 nf_conntrack_max = max_factor * nf_conntrack_htable_size;
1128 1128
1129 printk("nf_conntrack version %s (%u buckets, %d max)\n", 1129 printk("nf_conntrack version %s (%u buckets, %d max)\n",
1130 NF_CONNTRACK_VERSION, nf_conntrack_htable_size, 1130 NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
1131 nf_conntrack_max); 1131 nf_conntrack_max);
1132 1132
1133 nf_conntrack_cachep = kmem_cache_create("nf_conntrack", 1133 nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
1134 sizeof(struct nf_conn), 1134 sizeof(struct nf_conn),
1135 0, 0, NULL); 1135 0, 0, NULL);
1136 if (!nf_conntrack_cachep) { 1136 if (!nf_conntrack_cachep) {
1137 printk(KERN_ERR "Unable to create nf_conn slab cache\n"); 1137 printk(KERN_ERR "Unable to create nf_conn slab cache\n");
1138 goto err_free_hash; 1138 goto err_free_hash;
1139 } 1139 }
1140 1140
1141 ret = nf_conntrack_proto_init(); 1141 ret = nf_conntrack_proto_init();
1142 if (ret < 0) 1142 if (ret < 0)
1143 goto err_free_conntrack_slab; 1143 goto err_free_conntrack_slab;
1144 1144
1145 ret = nf_conntrack_expect_init(); 1145 ret = nf_conntrack_expect_init();
1146 if (ret < 0) 1146 if (ret < 0)
1147 goto out_fini_proto; 1147 goto out_fini_proto;
1148 1148
1149 ret = nf_conntrack_helper_init(); 1149 ret = nf_conntrack_helper_init();
1150 if (ret < 0) 1150 if (ret < 0)
1151 goto out_fini_expect; 1151 goto out_fini_expect;
1152 1152
1153 /* For use by REJECT target */ 1153 /* For use by REJECT target */
1154 rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); 1154 rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach);
1155 rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); 1155 rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
1156 1156
1157 /* Set up fake conntrack: 1157 /* Set up fake conntrack:
1158 - to never be deleted, not in any hashes */ 1158 - to never be deleted, not in any hashes */
1159 atomic_set(&nf_conntrack_untracked.ct_general.use, 1); 1159 atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
1160 /* - and look it like as a confirmed connection */ 1160 /* - and look it like as a confirmed connection */
1161 set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); 1161 set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
1162 1162
1163 return ret; 1163 return ret;
1164 1164
1165 out_fini_expect: 1165 out_fini_expect:
1166 nf_conntrack_expect_fini(); 1166 nf_conntrack_expect_fini();
1167 out_fini_proto: 1167 out_fini_proto:
1168 nf_conntrack_proto_fini(); 1168 nf_conntrack_proto_fini();
1169 err_free_conntrack_slab: 1169 err_free_conntrack_slab:
1170 kmem_cache_destroy(nf_conntrack_cachep); 1170 kmem_cache_destroy(nf_conntrack_cachep);
1171 err_free_hash: 1171 err_free_hash:
1172 nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc, 1172 nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc,
1173 nf_conntrack_htable_size); 1173 nf_conntrack_htable_size);
1174 err_out: 1174 err_out:
1175 return -ENOMEM; 1175 return -ENOMEM;
1176 } 1176 }
1177 1177