Commit 08733a0cb7decce40bbbd0331a0449465f13c444

Authored by Pablo Neira Ayuso
1 parent 26dfab7216

netfilter: handle NF_REPEAT from nf_conntrack_in()

NF_REPEAT is only needed from nf_conntrack_in() under a very specific
case required by the TCP protocol tracker, we can handle this case
without returning to the core hook path. Handling of NF_REPEAT from the
nf_reinject() is left untouched.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

Showing 3 changed files with 8 additions and 13 deletions Inline Diff

net/netfilter/core.c
1 /* netfilter.c: look after the filters for various protocols. 1 /* netfilter.c: look after the filters for various protocols.
2 * Heavily influenced by the old firewall.c by David Bonn and Alan Cox. 2 * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
3 * 3 *
4 * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any 4 * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
5 * way. 5 * way.
6 * 6 *
7 * Rusty Russell (C)2000 -- This code is GPL. 7 * Rusty Russell (C)2000 -- This code is GPL.
8 * Patrick McHardy (c) 2006-2012 8 * Patrick McHardy (c) 2006-2012
9 */ 9 */
10 #include <linux/kernel.h> 10 #include <linux/kernel.h>
11 #include <linux/netfilter.h> 11 #include <linux/netfilter.h>
12 #include <net/protocol.h> 12 #include <net/protocol.h>
13 #include <linux/init.h> 13 #include <linux/init.h>
14 #include <linux/skbuff.h> 14 #include <linux/skbuff.h>
15 #include <linux/wait.h> 15 #include <linux/wait.h>
16 #include <linux/module.h> 16 #include <linux/module.h>
17 #include <linux/interrupt.h> 17 #include <linux/interrupt.h>
18 #include <linux/if.h> 18 #include <linux/if.h>
19 #include <linux/netdevice.h> 19 #include <linux/netdevice.h>
20 #include <linux/netfilter_ipv6.h> 20 #include <linux/netfilter_ipv6.h>
21 #include <linux/inetdevice.h> 21 #include <linux/inetdevice.h>
22 #include <linux/proc_fs.h> 22 #include <linux/proc_fs.h>
23 #include <linux/mutex.h> 23 #include <linux/mutex.h>
24 #include <linux/slab.h> 24 #include <linux/slab.h>
25 #include <linux/rcupdate.h> 25 #include <linux/rcupdate.h>
26 #include <net/net_namespace.h> 26 #include <net/net_namespace.h>
27 #include <net/sock.h> 27 #include <net/sock.h>
28 28
29 #include "nf_internals.h" 29 #include "nf_internals.h"
30 30
31 static DEFINE_MUTEX(afinfo_mutex); 31 static DEFINE_MUTEX(afinfo_mutex);
32 32
33 const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; 33 const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
34 EXPORT_SYMBOL(nf_afinfo); 34 EXPORT_SYMBOL(nf_afinfo);
35 const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly; 35 const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
36 EXPORT_SYMBOL_GPL(nf_ipv6_ops); 36 EXPORT_SYMBOL_GPL(nf_ipv6_ops);
37 37
38 DEFINE_PER_CPU(bool, nf_skb_duplicated); 38 DEFINE_PER_CPU(bool, nf_skb_duplicated);
39 EXPORT_SYMBOL_GPL(nf_skb_duplicated); 39 EXPORT_SYMBOL_GPL(nf_skb_duplicated);
40 40
41 int nf_register_afinfo(const struct nf_afinfo *afinfo) 41 int nf_register_afinfo(const struct nf_afinfo *afinfo)
42 { 42 {
43 mutex_lock(&afinfo_mutex); 43 mutex_lock(&afinfo_mutex);
44 RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo); 44 RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo);
45 mutex_unlock(&afinfo_mutex); 45 mutex_unlock(&afinfo_mutex);
46 return 0; 46 return 0;
47 } 47 }
48 EXPORT_SYMBOL_GPL(nf_register_afinfo); 48 EXPORT_SYMBOL_GPL(nf_register_afinfo);
49 49
50 void nf_unregister_afinfo(const struct nf_afinfo *afinfo) 50 void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
51 { 51 {
52 mutex_lock(&afinfo_mutex); 52 mutex_lock(&afinfo_mutex);
53 RCU_INIT_POINTER(nf_afinfo[afinfo->family], NULL); 53 RCU_INIT_POINTER(nf_afinfo[afinfo->family], NULL);
54 mutex_unlock(&afinfo_mutex); 54 mutex_unlock(&afinfo_mutex);
55 synchronize_rcu(); 55 synchronize_rcu();
56 } 56 }
57 EXPORT_SYMBOL_GPL(nf_unregister_afinfo); 57 EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
58 58
59 #ifdef HAVE_JUMP_LABEL 59 #ifdef HAVE_JUMP_LABEL
60 struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; 60 struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
61 EXPORT_SYMBOL(nf_hooks_needed); 61 EXPORT_SYMBOL(nf_hooks_needed);
62 #endif 62 #endif
63 63
64 static DEFINE_MUTEX(nf_hook_mutex); 64 static DEFINE_MUTEX(nf_hook_mutex);
65 #define nf_entry_dereference(e) \ 65 #define nf_entry_dereference(e) \
66 rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex)) 66 rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
67 67
68 static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg) 68 static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
69 { 69 {
70 if (reg->pf != NFPROTO_NETDEV) 70 if (reg->pf != NFPROTO_NETDEV)
71 return net->nf.hooks[reg->pf]+reg->hooknum; 71 return net->nf.hooks[reg->pf]+reg->hooknum;
72 72
73 #ifdef CONFIG_NETFILTER_INGRESS 73 #ifdef CONFIG_NETFILTER_INGRESS
74 if (reg->hooknum == NF_NETDEV_INGRESS) { 74 if (reg->hooknum == NF_NETDEV_INGRESS) {
75 if (reg->dev && dev_net(reg->dev) == net) 75 if (reg->dev && dev_net(reg->dev) == net)
76 return &reg->dev->nf_hooks_ingress; 76 return &reg->dev->nf_hooks_ingress;
77 } 77 }
78 #endif 78 #endif
79 return NULL; 79 return NULL;
80 } 80 }
81 81
82 int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) 82 int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
83 { 83 {
84 struct nf_hook_entry __rcu **pp; 84 struct nf_hook_entry __rcu **pp;
85 struct nf_hook_entry *entry, *p; 85 struct nf_hook_entry *entry, *p;
86 86
87 if (reg->pf == NFPROTO_NETDEV) { 87 if (reg->pf == NFPROTO_NETDEV) {
88 #ifndef CONFIG_NETFILTER_INGRESS 88 #ifndef CONFIG_NETFILTER_INGRESS
89 if (reg->hooknum == NF_NETDEV_INGRESS) 89 if (reg->hooknum == NF_NETDEV_INGRESS)
90 return -EOPNOTSUPP; 90 return -EOPNOTSUPP;
91 #endif 91 #endif
92 if (reg->hooknum != NF_NETDEV_INGRESS || 92 if (reg->hooknum != NF_NETDEV_INGRESS ||
93 !reg->dev || dev_net(reg->dev) != net) 93 !reg->dev || dev_net(reg->dev) != net)
94 return -EINVAL; 94 return -EINVAL;
95 } 95 }
96 96
97 pp = nf_hook_entry_head(net, reg); 97 pp = nf_hook_entry_head(net, reg);
98 if (!pp) 98 if (!pp)
99 return -EINVAL; 99 return -EINVAL;
100 100
101 entry = kmalloc(sizeof(*entry), GFP_KERNEL); 101 entry = kmalloc(sizeof(*entry), GFP_KERNEL);
102 if (!entry) 102 if (!entry)
103 return -ENOMEM; 103 return -ENOMEM;
104 104
105 entry->orig_ops = reg; 105 entry->orig_ops = reg;
106 entry->ops = *reg; 106 entry->ops = *reg;
107 entry->next = NULL; 107 entry->next = NULL;
108 108
109 mutex_lock(&nf_hook_mutex); 109 mutex_lock(&nf_hook_mutex);
110 110
111 /* Find the spot in the list */ 111 /* Find the spot in the list */
112 while ((p = nf_entry_dereference(*pp)) != NULL) { 112 while ((p = nf_entry_dereference(*pp)) != NULL) {
113 if (reg->priority < p->orig_ops->priority) 113 if (reg->priority < p->orig_ops->priority)
114 break; 114 break;
115 pp = &p->next; 115 pp = &p->next;
116 } 116 }
117 rcu_assign_pointer(entry->next, p); 117 rcu_assign_pointer(entry->next, p);
118 rcu_assign_pointer(*pp, entry); 118 rcu_assign_pointer(*pp, entry);
119 119
120 mutex_unlock(&nf_hook_mutex); 120 mutex_unlock(&nf_hook_mutex);
121 #ifdef CONFIG_NETFILTER_INGRESS 121 #ifdef CONFIG_NETFILTER_INGRESS
122 if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) 122 if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
123 net_inc_ingress_queue(); 123 net_inc_ingress_queue();
124 #endif 124 #endif
125 #ifdef HAVE_JUMP_LABEL 125 #ifdef HAVE_JUMP_LABEL
126 static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); 126 static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
127 #endif 127 #endif
128 return 0; 128 return 0;
129 } 129 }
130 EXPORT_SYMBOL(nf_register_net_hook); 130 EXPORT_SYMBOL(nf_register_net_hook);
131 131
132 void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) 132 void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
133 { 133 {
134 struct nf_hook_entry __rcu **pp; 134 struct nf_hook_entry __rcu **pp;
135 struct nf_hook_entry *p; 135 struct nf_hook_entry *p;
136 136
137 pp = nf_hook_entry_head(net, reg); 137 pp = nf_hook_entry_head(net, reg);
138 if (WARN_ON_ONCE(!pp)) 138 if (WARN_ON_ONCE(!pp))
139 return; 139 return;
140 140
141 mutex_lock(&nf_hook_mutex); 141 mutex_lock(&nf_hook_mutex);
142 while ((p = nf_entry_dereference(*pp)) != NULL) { 142 while ((p = nf_entry_dereference(*pp)) != NULL) {
143 if (p->orig_ops == reg) { 143 if (p->orig_ops == reg) {
144 rcu_assign_pointer(*pp, p->next); 144 rcu_assign_pointer(*pp, p->next);
145 break; 145 break;
146 } 146 }
147 pp = &p->next; 147 pp = &p->next;
148 } 148 }
149 mutex_unlock(&nf_hook_mutex); 149 mutex_unlock(&nf_hook_mutex);
150 if (!p) { 150 if (!p) {
151 WARN(1, "nf_unregister_net_hook: hook not found!\n"); 151 WARN(1, "nf_unregister_net_hook: hook not found!\n");
152 return; 152 return;
153 } 153 }
154 #ifdef CONFIG_NETFILTER_INGRESS 154 #ifdef CONFIG_NETFILTER_INGRESS
155 if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) 155 if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
156 net_dec_ingress_queue(); 156 net_dec_ingress_queue();
157 #endif 157 #endif
158 #ifdef HAVE_JUMP_LABEL 158 #ifdef HAVE_JUMP_LABEL
159 static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); 159 static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
160 #endif 160 #endif
161 synchronize_net(); 161 synchronize_net();
162 nf_queue_nf_hook_drop(net, p); 162 nf_queue_nf_hook_drop(net, p);
163 /* other cpu might still process nfqueue verdict that used reg */ 163 /* other cpu might still process nfqueue verdict that used reg */
164 synchronize_net(); 164 synchronize_net();
165 kfree(p); 165 kfree(p);
166 } 166 }
167 EXPORT_SYMBOL(nf_unregister_net_hook); 167 EXPORT_SYMBOL(nf_unregister_net_hook);
168 168
169 int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg, 169 int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
170 unsigned int n) 170 unsigned int n)
171 { 171 {
172 unsigned int i; 172 unsigned int i;
173 int err = 0; 173 int err = 0;
174 174
175 for (i = 0; i < n; i++) { 175 for (i = 0; i < n; i++) {
176 err = nf_register_net_hook(net, &reg[i]); 176 err = nf_register_net_hook(net, &reg[i]);
177 if (err) 177 if (err)
178 goto err; 178 goto err;
179 } 179 }
180 return err; 180 return err;
181 181
182 err: 182 err:
183 if (i > 0) 183 if (i > 0)
184 nf_unregister_net_hooks(net, reg, i); 184 nf_unregister_net_hooks(net, reg, i);
185 return err; 185 return err;
186 } 186 }
187 EXPORT_SYMBOL(nf_register_net_hooks); 187 EXPORT_SYMBOL(nf_register_net_hooks);
188 188
189 void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, 189 void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
190 unsigned int n) 190 unsigned int n)
191 { 191 {
192 while (n-- > 0) 192 while (n-- > 0)
193 nf_unregister_net_hook(net, &reg[n]); 193 nf_unregister_net_hook(net, &reg[n]);
194 } 194 }
195 EXPORT_SYMBOL(nf_unregister_net_hooks); 195 EXPORT_SYMBOL(nf_unregister_net_hooks);
196 196
197 static LIST_HEAD(nf_hook_list); 197 static LIST_HEAD(nf_hook_list);
198 198
199 static int _nf_register_hook(struct nf_hook_ops *reg) 199 static int _nf_register_hook(struct nf_hook_ops *reg)
200 { 200 {
201 struct net *net, *last; 201 struct net *net, *last;
202 int ret; 202 int ret;
203 203
204 for_each_net(net) { 204 for_each_net(net) {
205 ret = nf_register_net_hook(net, reg); 205 ret = nf_register_net_hook(net, reg);
206 if (ret && ret != -ENOENT) 206 if (ret && ret != -ENOENT)
207 goto rollback; 207 goto rollback;
208 } 208 }
209 list_add_tail(&reg->list, &nf_hook_list); 209 list_add_tail(&reg->list, &nf_hook_list);
210 210
211 return 0; 211 return 0;
212 rollback: 212 rollback:
213 last = net; 213 last = net;
214 for_each_net(net) { 214 for_each_net(net) {
215 if (net == last) 215 if (net == last)
216 break; 216 break;
217 nf_unregister_net_hook(net, reg); 217 nf_unregister_net_hook(net, reg);
218 } 218 }
219 return ret; 219 return ret;
220 } 220 }
221 221
222 int nf_register_hook(struct nf_hook_ops *reg) 222 int nf_register_hook(struct nf_hook_ops *reg)
223 { 223 {
224 int ret; 224 int ret;
225 225
226 rtnl_lock(); 226 rtnl_lock();
227 ret = _nf_register_hook(reg); 227 ret = _nf_register_hook(reg);
228 rtnl_unlock(); 228 rtnl_unlock();
229 229
230 return ret; 230 return ret;
231 } 231 }
232 EXPORT_SYMBOL(nf_register_hook); 232 EXPORT_SYMBOL(nf_register_hook);
233 233
234 static void _nf_unregister_hook(struct nf_hook_ops *reg) 234 static void _nf_unregister_hook(struct nf_hook_ops *reg)
235 { 235 {
236 struct net *net; 236 struct net *net;
237 237
238 list_del(&reg->list); 238 list_del(&reg->list);
239 for_each_net(net) 239 for_each_net(net)
240 nf_unregister_net_hook(net, reg); 240 nf_unregister_net_hook(net, reg);
241 } 241 }
242 242
243 void nf_unregister_hook(struct nf_hook_ops *reg) 243 void nf_unregister_hook(struct nf_hook_ops *reg)
244 { 244 {
245 rtnl_lock(); 245 rtnl_lock();
246 _nf_unregister_hook(reg); 246 _nf_unregister_hook(reg);
247 rtnl_unlock(); 247 rtnl_unlock();
248 } 248 }
249 EXPORT_SYMBOL(nf_unregister_hook); 249 EXPORT_SYMBOL(nf_unregister_hook);
250 250
251 int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n) 251 int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
252 { 252 {
253 unsigned int i; 253 unsigned int i;
254 int err = 0; 254 int err = 0;
255 255
256 for (i = 0; i < n; i++) { 256 for (i = 0; i < n; i++) {
257 err = nf_register_hook(&reg[i]); 257 err = nf_register_hook(&reg[i]);
258 if (err) 258 if (err)
259 goto err; 259 goto err;
260 } 260 }
261 return err; 261 return err;
262 262
263 err: 263 err:
264 if (i > 0) 264 if (i > 0)
265 nf_unregister_hooks(reg, i); 265 nf_unregister_hooks(reg, i);
266 return err; 266 return err;
267 } 267 }
268 EXPORT_SYMBOL(nf_register_hooks); 268 EXPORT_SYMBOL(nf_register_hooks);
269 269
270 /* Caller MUST take rtnl_lock() */ 270 /* Caller MUST take rtnl_lock() */
271 int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n) 271 int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
272 { 272 {
273 unsigned int i; 273 unsigned int i;
274 int err = 0; 274 int err = 0;
275 275
276 for (i = 0; i < n; i++) { 276 for (i = 0; i < n; i++) {
277 err = _nf_register_hook(&reg[i]); 277 err = _nf_register_hook(&reg[i]);
278 if (err) 278 if (err)
279 goto err; 279 goto err;
280 } 280 }
281 return err; 281 return err;
282 282
283 err: 283 err:
284 if (i > 0) 284 if (i > 0)
285 _nf_unregister_hooks(reg, i); 285 _nf_unregister_hooks(reg, i);
286 return err; 286 return err;
287 } 287 }
288 EXPORT_SYMBOL(_nf_register_hooks); 288 EXPORT_SYMBOL(_nf_register_hooks);
289 289
290 void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) 290 void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
291 { 291 {
292 while (n-- > 0) 292 while (n-- > 0)
293 nf_unregister_hook(&reg[n]); 293 nf_unregister_hook(&reg[n]);
294 } 294 }
295 EXPORT_SYMBOL(nf_unregister_hooks); 295 EXPORT_SYMBOL(nf_unregister_hooks);
296 296
297 /* Caller MUST take rtnl_lock */ 297 /* Caller MUST take rtnl_lock */
298 void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) 298 void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
299 { 299 {
300 while (n-- > 0) 300 while (n-- > 0)
301 _nf_unregister_hook(&reg[n]); 301 _nf_unregister_hook(&reg[n]);
302 } 302 }
303 EXPORT_SYMBOL(_nf_unregister_hooks); 303 EXPORT_SYMBOL(_nf_unregister_hooks);
304 304
305 /* Returns 1 if okfn() needs to be executed by the caller, 305 /* Returns 1 if okfn() needs to be executed by the caller,
306 * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */ 306 * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */
307 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, 307 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
308 struct nf_hook_entry *entry) 308 struct nf_hook_entry *entry)
309 { 309 {
310 unsigned int verdict; 310 unsigned int verdict;
311 int ret; 311 int ret;
312 312
313 do { 313 do {
314 verdict = entry->ops.hook(entry->ops.priv, skb, state); 314 verdict = entry->ops.hook(entry->ops.priv, skb, state);
315 switch (verdict & NF_VERDICT_MASK) { 315 switch (verdict & NF_VERDICT_MASK) {
316 case NF_ACCEPT: 316 case NF_ACCEPT:
317 entry = rcu_dereference(entry->next); 317 entry = rcu_dereference(entry->next);
318 break; 318 break;
319 case NF_DROP: 319 case NF_DROP:
320 kfree_skb(skb); 320 kfree_skb(skb);
321 ret = NF_DROP_GETERR(verdict); 321 ret = NF_DROP_GETERR(verdict);
322 if (ret == 0) 322 if (ret == 0)
323 ret = -EPERM; 323 ret = -EPERM;
324 return ret; 324 return ret;
325 case NF_REPEAT:
326 continue;
327 case NF_QUEUE: 325 case NF_QUEUE:
328 ret = nf_queue(skb, state, &entry, verdict); 326 ret = nf_queue(skb, state, &entry, verdict);
329 if (ret == 1 && entry) 327 if (ret == 1 && entry)
330 continue; 328 continue;
331 return ret; 329 return ret;
332 default: 330 default:
333 /* Implicit handling for NF_STOLEN, as well as any other 331 /* Implicit handling for NF_STOLEN, as well as any other
334 * non conventional verdicts. 332 * non conventional verdicts.
335 */ 333 */
336 return 0; 334 return 0;
337 } 335 }
338 } while (entry); 336 } while (entry);
339 337
340 return 1; 338 return 1;
341 } 339 }
342 EXPORT_SYMBOL(nf_hook_slow); 340 EXPORT_SYMBOL(nf_hook_slow);
343 341
344 342
345 int skb_make_writable(struct sk_buff *skb, unsigned int writable_len) 343 int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
346 { 344 {
347 if (writable_len > skb->len) 345 if (writable_len > skb->len)
348 return 0; 346 return 0;
349 347
350 /* Not exclusive use of packet? Must copy. */ 348 /* Not exclusive use of packet? Must copy. */
351 if (!skb_cloned(skb)) { 349 if (!skb_cloned(skb)) {
352 if (writable_len <= skb_headlen(skb)) 350 if (writable_len <= skb_headlen(skb))
353 return 1; 351 return 1;
354 } else if (skb_clone_writable(skb, writable_len)) 352 } else if (skb_clone_writable(skb, writable_len))
355 return 1; 353 return 1;
356 354
357 if (writable_len <= skb_headlen(skb)) 355 if (writable_len <= skb_headlen(skb))
358 writable_len = 0; 356 writable_len = 0;
359 else 357 else
360 writable_len -= skb_headlen(skb); 358 writable_len -= skb_headlen(skb);
361 359
362 return !!__pskb_pull_tail(skb, writable_len); 360 return !!__pskb_pull_tail(skb, writable_len);
363 } 361 }
364 EXPORT_SYMBOL(skb_make_writable); 362 EXPORT_SYMBOL(skb_make_writable);
365 363
366 /* This needs to be compiled in any case to avoid dependencies between the 364 /* This needs to be compiled in any case to avoid dependencies between the
367 * nfnetlink_queue code and nf_conntrack. 365 * nfnetlink_queue code and nf_conntrack.
368 */ 366 */
369 struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly; 367 struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
370 EXPORT_SYMBOL_GPL(nfnl_ct_hook); 368 EXPORT_SYMBOL_GPL(nfnl_ct_hook);
371 369
372 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 370 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
373 /* This does not belong here, but locally generated errors need it if connection 371 /* This does not belong here, but locally generated errors need it if connection
374 tracking in use: without this, connection may not be in hash table, and hence 372 tracking in use: without this, connection may not be in hash table, and hence
375 manufactured ICMP or RST packets will not be associated with it. */ 373 manufactured ICMP or RST packets will not be associated with it. */
376 void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) 374 void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
377 __rcu __read_mostly; 375 __rcu __read_mostly;
378 EXPORT_SYMBOL(ip_ct_attach); 376 EXPORT_SYMBOL(ip_ct_attach);
379 377
380 void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb) 378 void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
381 { 379 {
382 void (*attach)(struct sk_buff *, const struct sk_buff *); 380 void (*attach)(struct sk_buff *, const struct sk_buff *);
383 381
384 if (skb->nfct) { 382 if (skb->nfct) {
385 rcu_read_lock(); 383 rcu_read_lock();
386 attach = rcu_dereference(ip_ct_attach); 384 attach = rcu_dereference(ip_ct_attach);
387 if (attach) 385 if (attach)
388 attach(new, skb); 386 attach(new, skb);
389 rcu_read_unlock(); 387 rcu_read_unlock();
390 } 388 }
391 } 389 }
392 EXPORT_SYMBOL(nf_ct_attach); 390 EXPORT_SYMBOL(nf_ct_attach);
393 391
394 void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly; 392 void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;
395 EXPORT_SYMBOL(nf_ct_destroy); 393 EXPORT_SYMBOL(nf_ct_destroy);
396 394
397 void nf_conntrack_destroy(struct nf_conntrack *nfct) 395 void nf_conntrack_destroy(struct nf_conntrack *nfct)
398 { 396 {
399 void (*destroy)(struct nf_conntrack *); 397 void (*destroy)(struct nf_conntrack *);
400 398
401 rcu_read_lock(); 399 rcu_read_lock();
402 destroy = rcu_dereference(nf_ct_destroy); 400 destroy = rcu_dereference(nf_ct_destroy);
403 BUG_ON(destroy == NULL); 401 BUG_ON(destroy == NULL);
404 destroy(nfct); 402 destroy(nfct);
405 rcu_read_unlock(); 403 rcu_read_unlock();
406 } 404 }
407 EXPORT_SYMBOL(nf_conntrack_destroy); 405 EXPORT_SYMBOL(nf_conntrack_destroy);
408 406
409 /* Built-in default zone used e.g. by modules. */ 407 /* Built-in default zone used e.g. by modules. */
410 const struct nf_conntrack_zone nf_ct_zone_dflt = { 408 const struct nf_conntrack_zone nf_ct_zone_dflt = {
411 .id = NF_CT_DEFAULT_ZONE_ID, 409 .id = NF_CT_DEFAULT_ZONE_ID,
412 .dir = NF_CT_DEFAULT_ZONE_DIR, 410 .dir = NF_CT_DEFAULT_ZONE_DIR,
413 }; 411 };
414 EXPORT_SYMBOL_GPL(nf_ct_zone_dflt); 412 EXPORT_SYMBOL_GPL(nf_ct_zone_dflt);
415 #endif /* CONFIG_NF_CONNTRACK */ 413 #endif /* CONFIG_NF_CONNTRACK */
416 414
417 #ifdef CONFIG_NF_NAT_NEEDED 415 #ifdef CONFIG_NF_NAT_NEEDED
418 void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); 416 void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
419 EXPORT_SYMBOL(nf_nat_decode_session_hook); 417 EXPORT_SYMBOL(nf_nat_decode_session_hook);
420 #endif 418 #endif
421 419
422 static int nf_register_hook_list(struct net *net) 420 static int nf_register_hook_list(struct net *net)
423 { 421 {
424 struct nf_hook_ops *elem; 422 struct nf_hook_ops *elem;
425 int ret; 423 int ret;
426 424
427 rtnl_lock(); 425 rtnl_lock();
428 list_for_each_entry(elem, &nf_hook_list, list) { 426 list_for_each_entry(elem, &nf_hook_list, list) {
429 ret = nf_register_net_hook(net, elem); 427 ret = nf_register_net_hook(net, elem);
430 if (ret && ret != -ENOENT) 428 if (ret && ret != -ENOENT)
431 goto out_undo; 429 goto out_undo;
432 } 430 }
433 rtnl_unlock(); 431 rtnl_unlock();
434 return 0; 432 return 0;
435 433
436 out_undo: 434 out_undo:
437 list_for_each_entry_continue_reverse(elem, &nf_hook_list, list) 435 list_for_each_entry_continue_reverse(elem, &nf_hook_list, list)
438 nf_unregister_net_hook(net, elem); 436 nf_unregister_net_hook(net, elem);
439 rtnl_unlock(); 437 rtnl_unlock();
440 return ret; 438 return ret;
441 } 439 }
442 440
443 static void nf_unregister_hook_list(struct net *net) 441 static void nf_unregister_hook_list(struct net *net)
444 { 442 {
445 struct nf_hook_ops *elem; 443 struct nf_hook_ops *elem;
446 444
447 rtnl_lock(); 445 rtnl_lock();
448 list_for_each_entry(elem, &nf_hook_list, list) 446 list_for_each_entry(elem, &nf_hook_list, list)
449 nf_unregister_net_hook(net, elem); 447 nf_unregister_net_hook(net, elem);
450 rtnl_unlock(); 448 rtnl_unlock();
451 } 449 }
452 450
453 static int __net_init netfilter_net_init(struct net *net) 451 static int __net_init netfilter_net_init(struct net *net)
454 { 452 {
455 int i, h, ret; 453 int i, h, ret;
456 454
457 for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) { 455 for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) {
458 for (h = 0; h < NF_MAX_HOOKS; h++) 456 for (h = 0; h < NF_MAX_HOOKS; h++)
459 RCU_INIT_POINTER(net->nf.hooks[i][h], NULL); 457 RCU_INIT_POINTER(net->nf.hooks[i][h], NULL);
460 } 458 }
461 459
462 #ifdef CONFIG_PROC_FS 460 #ifdef CONFIG_PROC_FS
463 net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter", 461 net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
464 net->proc_net); 462 net->proc_net);
465 if (!net->nf.proc_netfilter) { 463 if (!net->nf.proc_netfilter) {
466 if (!net_eq(net, &init_net)) 464 if (!net_eq(net, &init_net))
467 pr_err("cannot create netfilter proc entry"); 465 pr_err("cannot create netfilter proc entry");
468 466
469 return -ENOMEM; 467 return -ENOMEM;
470 } 468 }
471 #endif 469 #endif
472 ret = nf_register_hook_list(net); 470 ret = nf_register_hook_list(net);
473 if (ret) 471 if (ret)
474 remove_proc_entry("netfilter", net->proc_net); 472 remove_proc_entry("netfilter", net->proc_net);
475 473
476 return ret; 474 return ret;
477 } 475 }
478 476
479 static void __net_exit netfilter_net_exit(struct net *net) 477 static void __net_exit netfilter_net_exit(struct net *net)
480 { 478 {
481 nf_unregister_hook_list(net); 479 nf_unregister_hook_list(net);
482 remove_proc_entry("netfilter", net->proc_net); 480 remove_proc_entry("netfilter", net->proc_net);
483 } 481 }
484 482
485 static struct pernet_operations netfilter_net_ops = { 483 static struct pernet_operations netfilter_net_ops = {
486 .init = netfilter_net_init, 484 .init = netfilter_net_init,
487 .exit = netfilter_net_exit, 485 .exit = netfilter_net_exit,
488 }; 486 };
489 487
490 int __init netfilter_init(void) 488 int __init netfilter_init(void)
491 { 489 {
492 int ret; 490 int ret;
493 491
494 ret = register_pernet_subsys(&netfilter_net_ops); 492 ret = register_pernet_subsys(&netfilter_net_ops);
495 if (ret < 0) 493 if (ret < 0)
496 goto err; 494 goto err;
497 495
498 ret = netfilter_log_init(); 496 ret = netfilter_log_init();
499 if (ret < 0) 497 if (ret < 0)
500 goto err_pernet; 498 goto err_pernet;
501 499
502 return 0; 500 return 0;
503 err_pernet: 501 err_pernet:
504 unregister_pernet_subsys(&netfilter_net_ops); 502 unregister_pernet_subsys(&netfilter_net_ops);
505 err: 503 err:
506 return ret; 504 return ret;
507 } 505 }
508 506
net/netfilter/nf_conntrack_core.c
1 /* Connection state tracking for netfilter. This is separated from, 1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables 2 but required by, the NAT layer; it can also be used by an iptables
3 extension. */ 3 extension. */
4 4
5 /* (C) 1999-2001 Paul `Rusty' Russell 5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 6 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
8 * (C) 2005-2012 Patrick McHardy <kaber@trash.net> 8 * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
9 * 9 *
10 * This program is free software; you can redistribute it and/or modify 10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as 11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation. 12 * published by the Free Software Foundation.
13 */ 13 */
14 14
15 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
16 16
17 #include <linux/types.h> 17 #include <linux/types.h>
18 #include <linux/netfilter.h> 18 #include <linux/netfilter.h>
19 #include <linux/module.h> 19 #include <linux/module.h>
20 #include <linux/sched.h> 20 #include <linux/sched.h>
21 #include <linux/skbuff.h> 21 #include <linux/skbuff.h>
22 #include <linux/proc_fs.h> 22 #include <linux/proc_fs.h>
23 #include <linux/vmalloc.h> 23 #include <linux/vmalloc.h>
24 #include <linux/stddef.h> 24 #include <linux/stddef.h>
25 #include <linux/slab.h> 25 #include <linux/slab.h>
26 #include <linux/random.h> 26 #include <linux/random.h>
27 #include <linux/jhash.h> 27 #include <linux/jhash.h>
28 #include <linux/err.h> 28 #include <linux/err.h>
29 #include <linux/percpu.h> 29 #include <linux/percpu.h>
30 #include <linux/moduleparam.h> 30 #include <linux/moduleparam.h>
31 #include <linux/notifier.h> 31 #include <linux/notifier.h>
32 #include <linux/kernel.h> 32 #include <linux/kernel.h>
33 #include <linux/netdevice.h> 33 #include <linux/netdevice.h>
34 #include <linux/socket.h> 34 #include <linux/socket.h>
35 #include <linux/mm.h> 35 #include <linux/mm.h>
36 #include <linux/nsproxy.h> 36 #include <linux/nsproxy.h>
37 #include <linux/rculist_nulls.h> 37 #include <linux/rculist_nulls.h>
38 38
39 #include <net/netfilter/nf_conntrack.h> 39 #include <net/netfilter/nf_conntrack.h>
40 #include <net/netfilter/nf_conntrack_l3proto.h> 40 #include <net/netfilter/nf_conntrack_l3proto.h>
41 #include <net/netfilter/nf_conntrack_l4proto.h> 41 #include <net/netfilter/nf_conntrack_l4proto.h>
42 #include <net/netfilter/nf_conntrack_expect.h> 42 #include <net/netfilter/nf_conntrack_expect.h>
43 #include <net/netfilter/nf_conntrack_helper.h> 43 #include <net/netfilter/nf_conntrack_helper.h>
44 #include <net/netfilter/nf_conntrack_seqadj.h> 44 #include <net/netfilter/nf_conntrack_seqadj.h>
45 #include <net/netfilter/nf_conntrack_core.h> 45 #include <net/netfilter/nf_conntrack_core.h>
46 #include <net/netfilter/nf_conntrack_extend.h> 46 #include <net/netfilter/nf_conntrack_extend.h>
47 #include <net/netfilter/nf_conntrack_acct.h> 47 #include <net/netfilter/nf_conntrack_acct.h>
48 #include <net/netfilter/nf_conntrack_ecache.h> 48 #include <net/netfilter/nf_conntrack_ecache.h>
49 #include <net/netfilter/nf_conntrack_zones.h> 49 #include <net/netfilter/nf_conntrack_zones.h>
50 #include <net/netfilter/nf_conntrack_timestamp.h> 50 #include <net/netfilter/nf_conntrack_timestamp.h>
51 #include <net/netfilter/nf_conntrack_timeout.h> 51 #include <net/netfilter/nf_conntrack_timeout.h>
52 #include <net/netfilter/nf_conntrack_labels.h> 52 #include <net/netfilter/nf_conntrack_labels.h>
53 #include <net/netfilter/nf_conntrack_synproxy.h> 53 #include <net/netfilter/nf_conntrack_synproxy.h>
54 #include <net/netfilter/nf_nat.h> 54 #include <net/netfilter/nf_nat.h>
55 #include <net/netfilter/nf_nat_core.h> 55 #include <net/netfilter/nf_nat_core.h>
56 #include <net/netfilter/nf_nat_helper.h> 56 #include <net/netfilter/nf_nat_helper.h>
57 #include <net/netns/hash.h> 57 #include <net/netns/hash.h>
58 58
59 #define NF_CONNTRACK_VERSION "0.5.0" 59 #define NF_CONNTRACK_VERSION "0.5.0"
60 60
61 int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, 61 int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
62 enum nf_nat_manip_type manip, 62 enum nf_nat_manip_type manip,
63 const struct nlattr *attr) __read_mostly; 63 const struct nlattr *attr) __read_mostly;
64 EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook); 64 EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook);
65 65
66 __cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; 66 __cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
67 EXPORT_SYMBOL_GPL(nf_conntrack_locks); 67 EXPORT_SYMBOL_GPL(nf_conntrack_locks);
68 68
69 __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock); 69 __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
70 EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); 70 EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
71 71
72 struct hlist_nulls_head *nf_conntrack_hash __read_mostly; 72 struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
73 EXPORT_SYMBOL_GPL(nf_conntrack_hash); 73 EXPORT_SYMBOL_GPL(nf_conntrack_hash);
74 74
75 struct conntrack_gc_work { 75 struct conntrack_gc_work {
76 struct delayed_work dwork; 76 struct delayed_work dwork;
77 u32 last_bucket; 77 u32 last_bucket;
78 bool exiting; 78 bool exiting;
79 }; 79 };
80 80
81 static __read_mostly struct kmem_cache *nf_conntrack_cachep; 81 static __read_mostly struct kmem_cache *nf_conntrack_cachep;
82 static __read_mostly spinlock_t nf_conntrack_locks_all_lock; 82 static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
83 static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); 83 static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
84 static __read_mostly bool nf_conntrack_locks_all; 84 static __read_mostly bool nf_conntrack_locks_all;
85 85
86 #define GC_MAX_BUCKETS_DIV 64u 86 #define GC_MAX_BUCKETS_DIV 64u
87 #define GC_MAX_BUCKETS 8192u 87 #define GC_MAX_BUCKETS 8192u
88 #define GC_INTERVAL (5 * HZ) 88 #define GC_INTERVAL (5 * HZ)
89 #define GC_MAX_EVICTS 256u 89 #define GC_MAX_EVICTS 256u
90 90
91 static struct conntrack_gc_work conntrack_gc_work; 91 static struct conntrack_gc_work conntrack_gc_work;
92 92
93 void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) 93 void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
94 { 94 {
95 spin_lock(lock); 95 spin_lock(lock);
96 while (unlikely(nf_conntrack_locks_all)) { 96 while (unlikely(nf_conntrack_locks_all)) {
97 spin_unlock(lock); 97 spin_unlock(lock);
98 98
99 /* 99 /*
100 * Order the 'nf_conntrack_locks_all' load vs. the 100 * Order the 'nf_conntrack_locks_all' load vs. the
101 * spin_unlock_wait() loads below, to ensure 101 * spin_unlock_wait() loads below, to ensure
102 * that 'nf_conntrack_locks_all_lock' is indeed held: 102 * that 'nf_conntrack_locks_all_lock' is indeed held:
103 */ 103 */
104 smp_rmb(); /* spin_lock(&nf_conntrack_locks_all_lock) */ 104 smp_rmb(); /* spin_lock(&nf_conntrack_locks_all_lock) */
105 spin_unlock_wait(&nf_conntrack_locks_all_lock); 105 spin_unlock_wait(&nf_conntrack_locks_all_lock);
106 spin_lock(lock); 106 spin_lock(lock);
107 } 107 }
108 } 108 }
109 EXPORT_SYMBOL_GPL(nf_conntrack_lock); 109 EXPORT_SYMBOL_GPL(nf_conntrack_lock);
110 110
111 static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2) 111 static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
112 { 112 {
113 h1 %= CONNTRACK_LOCKS; 113 h1 %= CONNTRACK_LOCKS;
114 h2 %= CONNTRACK_LOCKS; 114 h2 %= CONNTRACK_LOCKS;
115 spin_unlock(&nf_conntrack_locks[h1]); 115 spin_unlock(&nf_conntrack_locks[h1]);
116 if (h1 != h2) 116 if (h1 != h2)
117 spin_unlock(&nf_conntrack_locks[h2]); 117 spin_unlock(&nf_conntrack_locks[h2]);
118 } 118 }
119 119
120 /* return true if we need to recompute hashes (in case hash table was resized) */ 120 /* return true if we need to recompute hashes (in case hash table was resized) */
121 static bool nf_conntrack_double_lock(struct net *net, unsigned int h1, 121 static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
122 unsigned int h2, unsigned int sequence) 122 unsigned int h2, unsigned int sequence)
123 { 123 {
124 h1 %= CONNTRACK_LOCKS; 124 h1 %= CONNTRACK_LOCKS;
125 h2 %= CONNTRACK_LOCKS; 125 h2 %= CONNTRACK_LOCKS;
126 if (h1 <= h2) { 126 if (h1 <= h2) {
127 nf_conntrack_lock(&nf_conntrack_locks[h1]); 127 nf_conntrack_lock(&nf_conntrack_locks[h1]);
128 if (h1 != h2) 128 if (h1 != h2)
129 spin_lock_nested(&nf_conntrack_locks[h2], 129 spin_lock_nested(&nf_conntrack_locks[h2],
130 SINGLE_DEPTH_NESTING); 130 SINGLE_DEPTH_NESTING);
131 } else { 131 } else {
132 nf_conntrack_lock(&nf_conntrack_locks[h2]); 132 nf_conntrack_lock(&nf_conntrack_locks[h2]);
133 spin_lock_nested(&nf_conntrack_locks[h1], 133 spin_lock_nested(&nf_conntrack_locks[h1],
134 SINGLE_DEPTH_NESTING); 134 SINGLE_DEPTH_NESTING);
135 } 135 }
136 if (read_seqcount_retry(&nf_conntrack_generation, sequence)) { 136 if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
137 nf_conntrack_double_unlock(h1, h2); 137 nf_conntrack_double_unlock(h1, h2);
138 return true; 138 return true;
139 } 139 }
140 return false; 140 return false;
141 } 141 }
142 142
143 static void nf_conntrack_all_lock(void) 143 static void nf_conntrack_all_lock(void)
144 { 144 {
145 int i; 145 int i;
146 146
147 spin_lock(&nf_conntrack_locks_all_lock); 147 spin_lock(&nf_conntrack_locks_all_lock);
148 nf_conntrack_locks_all = true; 148 nf_conntrack_locks_all = true;
149 149
150 /* 150 /*
151 * Order the above store of 'nf_conntrack_locks_all' against 151 * Order the above store of 'nf_conntrack_locks_all' against
152 * the spin_unlock_wait() loads below, such that if 152 * the spin_unlock_wait() loads below, such that if
153 * nf_conntrack_lock() observes 'nf_conntrack_locks_all' 153 * nf_conntrack_lock() observes 'nf_conntrack_locks_all'
154 * we must observe nf_conntrack_locks[] held: 154 * we must observe nf_conntrack_locks[] held:
155 */ 155 */
156 smp_mb(); /* spin_lock(&nf_conntrack_locks_all_lock) */ 156 smp_mb(); /* spin_lock(&nf_conntrack_locks_all_lock) */
157 157
158 for (i = 0; i < CONNTRACK_LOCKS; i++) { 158 for (i = 0; i < CONNTRACK_LOCKS; i++) {
159 spin_unlock_wait(&nf_conntrack_locks[i]); 159 spin_unlock_wait(&nf_conntrack_locks[i]);
160 } 160 }
161 } 161 }
162 162
163 static void nf_conntrack_all_unlock(void) 163 static void nf_conntrack_all_unlock(void)
164 { 164 {
165 /* 165 /*
166 * All prior stores must be complete before we clear 166 * All prior stores must be complete before we clear
167 * 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock() 167 * 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock()
168 * might observe the false value but not the entire 168 * might observe the false value but not the entire
169 * critical section: 169 * critical section:
170 */ 170 */
171 smp_store_release(&nf_conntrack_locks_all, false); 171 smp_store_release(&nf_conntrack_locks_all, false);
172 spin_unlock(&nf_conntrack_locks_all_lock); 172 spin_unlock(&nf_conntrack_locks_all_lock);
173 } 173 }
174 174
175 unsigned int nf_conntrack_htable_size __read_mostly; 175 unsigned int nf_conntrack_htable_size __read_mostly;
176 EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); 176 EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
177 177
178 unsigned int nf_conntrack_max __read_mostly; 178 unsigned int nf_conntrack_max __read_mostly;
179 seqcount_t nf_conntrack_generation __read_mostly; 179 seqcount_t nf_conntrack_generation __read_mostly;
180 180
181 DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); 181 DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
182 EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); 182 EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
183 183
184 static unsigned int nf_conntrack_hash_rnd __read_mostly; 184 static unsigned int nf_conntrack_hash_rnd __read_mostly;
185 185
186 static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, 186 static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
187 const struct net *net) 187 const struct net *net)
188 { 188 {
189 unsigned int n; 189 unsigned int n;
190 u32 seed; 190 u32 seed;
191 191
192 get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd)); 192 get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
193 193
194 /* The direction must be ignored, so we hash everything up to the 194 /* The direction must be ignored, so we hash everything up to the
195 * destination ports (which is a multiple of 4) and treat the last 195 * destination ports (which is a multiple of 4) and treat the last
196 * three bytes manually. 196 * three bytes manually.
197 */ 197 */
198 seed = nf_conntrack_hash_rnd ^ net_hash_mix(net); 198 seed = nf_conntrack_hash_rnd ^ net_hash_mix(net);
199 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); 199 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
200 return jhash2((u32 *)tuple, n, seed ^ 200 return jhash2((u32 *)tuple, n, seed ^
201 (((__force __u16)tuple->dst.u.all << 16) | 201 (((__force __u16)tuple->dst.u.all << 16) |
202 tuple->dst.protonum)); 202 tuple->dst.protonum));
203 } 203 }
204 204
205 static u32 scale_hash(u32 hash) 205 static u32 scale_hash(u32 hash)
206 { 206 {
207 return reciprocal_scale(hash, nf_conntrack_htable_size); 207 return reciprocal_scale(hash, nf_conntrack_htable_size);
208 } 208 }
209 209
210 static u32 __hash_conntrack(const struct net *net, 210 static u32 __hash_conntrack(const struct net *net,
211 const struct nf_conntrack_tuple *tuple, 211 const struct nf_conntrack_tuple *tuple,
212 unsigned int size) 212 unsigned int size)
213 { 213 {
214 return reciprocal_scale(hash_conntrack_raw(tuple, net), size); 214 return reciprocal_scale(hash_conntrack_raw(tuple, net), size);
215 } 215 }
216 216
217 static u32 hash_conntrack(const struct net *net, 217 static u32 hash_conntrack(const struct net *net,
218 const struct nf_conntrack_tuple *tuple) 218 const struct nf_conntrack_tuple *tuple)
219 { 219 {
220 return scale_hash(hash_conntrack_raw(tuple, net)); 220 return scale_hash(hash_conntrack_raw(tuple, net));
221 } 221 }
222 222
223 bool 223 bool
224 nf_ct_get_tuple(const struct sk_buff *skb, 224 nf_ct_get_tuple(const struct sk_buff *skb,
225 unsigned int nhoff, 225 unsigned int nhoff,
226 unsigned int dataoff, 226 unsigned int dataoff,
227 u_int16_t l3num, 227 u_int16_t l3num,
228 u_int8_t protonum, 228 u_int8_t protonum,
229 struct net *net, 229 struct net *net,
230 struct nf_conntrack_tuple *tuple, 230 struct nf_conntrack_tuple *tuple,
231 const struct nf_conntrack_l3proto *l3proto, 231 const struct nf_conntrack_l3proto *l3proto,
232 const struct nf_conntrack_l4proto *l4proto) 232 const struct nf_conntrack_l4proto *l4proto)
233 { 233 {
234 memset(tuple, 0, sizeof(*tuple)); 234 memset(tuple, 0, sizeof(*tuple));
235 235
236 tuple->src.l3num = l3num; 236 tuple->src.l3num = l3num;
237 if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0) 237 if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
238 return false; 238 return false;
239 239
240 tuple->dst.protonum = protonum; 240 tuple->dst.protonum = protonum;
241 tuple->dst.dir = IP_CT_DIR_ORIGINAL; 241 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
242 242
243 return l4proto->pkt_to_tuple(skb, dataoff, net, tuple); 243 return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
244 } 244 }
245 EXPORT_SYMBOL_GPL(nf_ct_get_tuple); 245 EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
246 246
247 bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, 247 bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
248 u_int16_t l3num, 248 u_int16_t l3num,
249 struct net *net, struct nf_conntrack_tuple *tuple) 249 struct net *net, struct nf_conntrack_tuple *tuple)
250 { 250 {
251 struct nf_conntrack_l3proto *l3proto; 251 struct nf_conntrack_l3proto *l3proto;
252 struct nf_conntrack_l4proto *l4proto; 252 struct nf_conntrack_l4proto *l4proto;
253 unsigned int protoff; 253 unsigned int protoff;
254 u_int8_t protonum; 254 u_int8_t protonum;
255 int ret; 255 int ret;
256 256
257 rcu_read_lock(); 257 rcu_read_lock();
258 258
259 l3proto = __nf_ct_l3proto_find(l3num); 259 l3proto = __nf_ct_l3proto_find(l3num);
260 ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum); 260 ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum);
261 if (ret != NF_ACCEPT) { 261 if (ret != NF_ACCEPT) {
262 rcu_read_unlock(); 262 rcu_read_unlock();
263 return false; 263 return false;
264 } 264 }
265 265
266 l4proto = __nf_ct_l4proto_find(l3num, protonum); 266 l4proto = __nf_ct_l4proto_find(l3num, protonum);
267 267
268 ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple, 268 ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
269 l3proto, l4proto); 269 l3proto, l4proto);
270 270
271 rcu_read_unlock(); 271 rcu_read_unlock();
272 return ret; 272 return ret;
273 } 273 }
274 EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr); 274 EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
275 275
276 bool 276 bool
277 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, 277 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
278 const struct nf_conntrack_tuple *orig, 278 const struct nf_conntrack_tuple *orig,
279 const struct nf_conntrack_l3proto *l3proto, 279 const struct nf_conntrack_l3proto *l3proto,
280 const struct nf_conntrack_l4proto *l4proto) 280 const struct nf_conntrack_l4proto *l4proto)
281 { 281 {
282 memset(inverse, 0, sizeof(*inverse)); 282 memset(inverse, 0, sizeof(*inverse));
283 283
284 inverse->src.l3num = orig->src.l3num; 284 inverse->src.l3num = orig->src.l3num;
285 if (l3proto->invert_tuple(inverse, orig) == 0) 285 if (l3proto->invert_tuple(inverse, orig) == 0)
286 return false; 286 return false;
287 287
288 inverse->dst.dir = !orig->dst.dir; 288 inverse->dst.dir = !orig->dst.dir;
289 289
290 inverse->dst.protonum = orig->dst.protonum; 290 inverse->dst.protonum = orig->dst.protonum;
291 return l4proto->invert_tuple(inverse, orig); 291 return l4proto->invert_tuple(inverse, orig);
292 } 292 }
293 EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); 293 EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
294 294
295 static void 295 static void
296 clean_from_lists(struct nf_conn *ct) 296 clean_from_lists(struct nf_conn *ct)
297 { 297 {
298 pr_debug("clean_from_lists(%p)\n", ct); 298 pr_debug("clean_from_lists(%p)\n", ct);
299 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); 299 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
300 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode); 300 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode);
301 301
302 /* Destroy all pending expectations */ 302 /* Destroy all pending expectations */
303 nf_ct_remove_expectations(ct); 303 nf_ct_remove_expectations(ct);
304 } 304 }
305 305
306 /* must be called with local_bh_disable */ 306 /* must be called with local_bh_disable */
307 static void nf_ct_add_to_dying_list(struct nf_conn *ct) 307 static void nf_ct_add_to_dying_list(struct nf_conn *ct)
308 { 308 {
309 struct ct_pcpu *pcpu; 309 struct ct_pcpu *pcpu;
310 310
311 /* add this conntrack to the (per cpu) dying list */ 311 /* add this conntrack to the (per cpu) dying list */
312 ct->cpu = smp_processor_id(); 312 ct->cpu = smp_processor_id();
313 pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); 313 pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
314 314
315 spin_lock(&pcpu->lock); 315 spin_lock(&pcpu->lock);
316 hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, 316 hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
317 &pcpu->dying); 317 &pcpu->dying);
318 spin_unlock(&pcpu->lock); 318 spin_unlock(&pcpu->lock);
319 } 319 }
320 320
321 /* must be called with local_bh_disable */ 321 /* must be called with local_bh_disable */
322 static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct) 322 static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct)
323 { 323 {
324 struct ct_pcpu *pcpu; 324 struct ct_pcpu *pcpu;
325 325
326 /* add this conntrack to the (per cpu) unconfirmed list */ 326 /* add this conntrack to the (per cpu) unconfirmed list */
327 ct->cpu = smp_processor_id(); 327 ct->cpu = smp_processor_id();
328 pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); 328 pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
329 329
330 spin_lock(&pcpu->lock); 330 spin_lock(&pcpu->lock);
331 hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, 331 hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
332 &pcpu->unconfirmed); 332 &pcpu->unconfirmed);
333 spin_unlock(&pcpu->lock); 333 spin_unlock(&pcpu->lock);
334 } 334 }
335 335
336 /* must be called with local_bh_disable */ 336 /* must be called with local_bh_disable */
337 static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) 337 static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
338 { 338 {
339 struct ct_pcpu *pcpu; 339 struct ct_pcpu *pcpu;
340 340
341 /* We overload first tuple to link into unconfirmed or dying list.*/ 341 /* We overload first tuple to link into unconfirmed or dying list.*/
342 pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu); 342 pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
343 343
344 spin_lock(&pcpu->lock); 344 spin_lock(&pcpu->lock);
345 BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode)); 345 BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
346 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); 346 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
347 spin_unlock(&pcpu->lock); 347 spin_unlock(&pcpu->lock);
348 } 348 }
349 349
350 /* Released via destroy_conntrack() */ 350 /* Released via destroy_conntrack() */
351 struct nf_conn *nf_ct_tmpl_alloc(struct net *net, 351 struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
352 const struct nf_conntrack_zone *zone, 352 const struct nf_conntrack_zone *zone,
353 gfp_t flags) 353 gfp_t flags)
354 { 354 {
355 struct nf_conn *tmpl; 355 struct nf_conn *tmpl;
356 356
357 tmpl = kzalloc(sizeof(*tmpl), flags); 357 tmpl = kzalloc(sizeof(*tmpl), flags);
358 if (tmpl == NULL) 358 if (tmpl == NULL)
359 return NULL; 359 return NULL;
360 360
361 tmpl->status = IPS_TEMPLATE; 361 tmpl->status = IPS_TEMPLATE;
362 write_pnet(&tmpl->ct_net, net); 362 write_pnet(&tmpl->ct_net, net);
363 nf_ct_zone_add(tmpl, zone); 363 nf_ct_zone_add(tmpl, zone);
364 atomic_set(&tmpl->ct_general.use, 0); 364 atomic_set(&tmpl->ct_general.use, 0);
365 365
366 return tmpl; 366 return tmpl;
367 } 367 }
368 EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc); 368 EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
369 369
370 void nf_ct_tmpl_free(struct nf_conn *tmpl) 370 void nf_ct_tmpl_free(struct nf_conn *tmpl)
371 { 371 {
372 nf_ct_ext_destroy(tmpl); 372 nf_ct_ext_destroy(tmpl);
373 nf_ct_ext_free(tmpl); 373 nf_ct_ext_free(tmpl);
374 kfree(tmpl); 374 kfree(tmpl);
375 } 375 }
376 EXPORT_SYMBOL_GPL(nf_ct_tmpl_free); 376 EXPORT_SYMBOL_GPL(nf_ct_tmpl_free);
377 377
378 static void 378 static void
379 destroy_conntrack(struct nf_conntrack *nfct) 379 destroy_conntrack(struct nf_conntrack *nfct)
380 { 380 {
381 struct nf_conn *ct = (struct nf_conn *)nfct; 381 struct nf_conn *ct = (struct nf_conn *)nfct;
382 struct nf_conntrack_l4proto *l4proto; 382 struct nf_conntrack_l4proto *l4proto;
383 383
384 pr_debug("destroy_conntrack(%p)\n", ct); 384 pr_debug("destroy_conntrack(%p)\n", ct);
385 NF_CT_ASSERT(atomic_read(&nfct->use) == 0); 385 NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
386 386
387 if (unlikely(nf_ct_is_template(ct))) { 387 if (unlikely(nf_ct_is_template(ct))) {
388 nf_ct_tmpl_free(ct); 388 nf_ct_tmpl_free(ct);
389 return; 389 return;
390 } 390 }
391 rcu_read_lock(); 391 rcu_read_lock();
392 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 392 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
393 if (l4proto->destroy) 393 if (l4proto->destroy)
394 l4proto->destroy(ct); 394 l4proto->destroy(ct);
395 395
396 rcu_read_unlock(); 396 rcu_read_unlock();
397 397
398 local_bh_disable(); 398 local_bh_disable();
399 /* Expectations will have been removed in clean_from_lists, 399 /* Expectations will have been removed in clean_from_lists,
400 * except TFTP can create an expectation on the first packet, 400 * except TFTP can create an expectation on the first packet,
401 * before connection is in the list, so we need to clean here, 401 * before connection is in the list, so we need to clean here,
402 * too. 402 * too.
403 */ 403 */
404 nf_ct_remove_expectations(ct); 404 nf_ct_remove_expectations(ct);
405 405
406 nf_ct_del_from_dying_or_unconfirmed_list(ct); 406 nf_ct_del_from_dying_or_unconfirmed_list(ct);
407 407
408 local_bh_enable(); 408 local_bh_enable();
409 409
410 if (ct->master) 410 if (ct->master)
411 nf_ct_put(ct->master); 411 nf_ct_put(ct->master);
412 412
413 pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct); 413 pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct);
414 nf_conntrack_free(ct); 414 nf_conntrack_free(ct);
415 } 415 }
416 416
417 static void nf_ct_delete_from_lists(struct nf_conn *ct) 417 static void nf_ct_delete_from_lists(struct nf_conn *ct)
418 { 418 {
419 struct net *net = nf_ct_net(ct); 419 struct net *net = nf_ct_net(ct);
420 unsigned int hash, reply_hash; 420 unsigned int hash, reply_hash;
421 unsigned int sequence; 421 unsigned int sequence;
422 422
423 nf_ct_helper_destroy(ct); 423 nf_ct_helper_destroy(ct);
424 424
425 local_bh_disable(); 425 local_bh_disable();
426 do { 426 do {
427 sequence = read_seqcount_begin(&nf_conntrack_generation); 427 sequence = read_seqcount_begin(&nf_conntrack_generation);
428 hash = hash_conntrack(net, 428 hash = hash_conntrack(net,
429 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 429 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
430 reply_hash = hash_conntrack(net, 430 reply_hash = hash_conntrack(net,
431 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 431 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
432 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); 432 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
433 433
434 clean_from_lists(ct); 434 clean_from_lists(ct);
435 nf_conntrack_double_unlock(hash, reply_hash); 435 nf_conntrack_double_unlock(hash, reply_hash);
436 436
437 nf_ct_add_to_dying_list(ct); 437 nf_ct_add_to_dying_list(ct);
438 438
439 local_bh_enable(); 439 local_bh_enable();
440 } 440 }
441 441
442 bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) 442 bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
443 { 443 {
444 struct nf_conn_tstamp *tstamp; 444 struct nf_conn_tstamp *tstamp;
445 445
446 if (test_and_set_bit(IPS_DYING_BIT, &ct->status)) 446 if (test_and_set_bit(IPS_DYING_BIT, &ct->status))
447 return false; 447 return false;
448 448
449 tstamp = nf_conn_tstamp_find(ct); 449 tstamp = nf_conn_tstamp_find(ct);
450 if (tstamp && tstamp->stop == 0) 450 if (tstamp && tstamp->stop == 0)
451 tstamp->stop = ktime_get_real_ns(); 451 tstamp->stop = ktime_get_real_ns();
452 452
453 if (nf_conntrack_event_report(IPCT_DESTROY, ct, 453 if (nf_conntrack_event_report(IPCT_DESTROY, ct,
454 portid, report) < 0) { 454 portid, report) < 0) {
455 /* destroy event was not delivered. nf_ct_put will 455 /* destroy event was not delivered. nf_ct_put will
456 * be done by event cache worker on redelivery. 456 * be done by event cache worker on redelivery.
457 */ 457 */
458 nf_ct_delete_from_lists(ct); 458 nf_ct_delete_from_lists(ct);
459 nf_conntrack_ecache_delayed_work(nf_ct_net(ct)); 459 nf_conntrack_ecache_delayed_work(nf_ct_net(ct));
460 return false; 460 return false;
461 } 461 }
462 462
463 nf_conntrack_ecache_work(nf_ct_net(ct)); 463 nf_conntrack_ecache_work(nf_ct_net(ct));
464 nf_ct_delete_from_lists(ct); 464 nf_ct_delete_from_lists(ct);
465 nf_ct_put(ct); 465 nf_ct_put(ct);
466 return true; 466 return true;
467 } 467 }
468 EXPORT_SYMBOL_GPL(nf_ct_delete); 468 EXPORT_SYMBOL_GPL(nf_ct_delete);
469 469
470 static inline bool 470 static inline bool
471 nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, 471 nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
472 const struct nf_conntrack_tuple *tuple, 472 const struct nf_conntrack_tuple *tuple,
473 const struct nf_conntrack_zone *zone, 473 const struct nf_conntrack_zone *zone,
474 const struct net *net) 474 const struct net *net)
475 { 475 {
476 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 476 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
477 477
478 /* A conntrack can be recreated with the equal tuple, 478 /* A conntrack can be recreated with the equal tuple,
479 * so we need to check that the conntrack is confirmed 479 * so we need to check that the conntrack is confirmed
480 */ 480 */
481 return nf_ct_tuple_equal(tuple, &h->tuple) && 481 return nf_ct_tuple_equal(tuple, &h->tuple) &&
482 nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) && 482 nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) &&
483 nf_ct_is_confirmed(ct) && 483 nf_ct_is_confirmed(ct) &&
484 net_eq(net, nf_ct_net(ct)); 484 net_eq(net, nf_ct_net(ct));
485 } 485 }
486 486
487 /* caller must hold rcu readlock and none of the nf_conntrack_locks */ 487 /* caller must hold rcu readlock and none of the nf_conntrack_locks */
488 static void nf_ct_gc_expired(struct nf_conn *ct) 488 static void nf_ct_gc_expired(struct nf_conn *ct)
489 { 489 {
490 if (!atomic_inc_not_zero(&ct->ct_general.use)) 490 if (!atomic_inc_not_zero(&ct->ct_general.use))
491 return; 491 return;
492 492
493 if (nf_ct_should_gc(ct)) 493 if (nf_ct_should_gc(ct))
494 nf_ct_kill(ct); 494 nf_ct_kill(ct);
495 495
496 nf_ct_put(ct); 496 nf_ct_put(ct);
497 } 497 }
498 498
499 /* 499 /*
500 * Warning : 500 * Warning :
501 * - Caller must take a reference on returned object 501 * - Caller must take a reference on returned object
502 * and recheck nf_ct_tuple_equal(tuple, &h->tuple) 502 * and recheck nf_ct_tuple_equal(tuple, &h->tuple)
503 */ 503 */
504 static struct nf_conntrack_tuple_hash * 504 static struct nf_conntrack_tuple_hash *
505 ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone, 505 ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
506 const struct nf_conntrack_tuple *tuple, u32 hash) 506 const struct nf_conntrack_tuple *tuple, u32 hash)
507 { 507 {
508 struct nf_conntrack_tuple_hash *h; 508 struct nf_conntrack_tuple_hash *h;
509 struct hlist_nulls_head *ct_hash; 509 struct hlist_nulls_head *ct_hash;
510 struct hlist_nulls_node *n; 510 struct hlist_nulls_node *n;
511 unsigned int bucket, hsize; 511 unsigned int bucket, hsize;
512 512
513 begin: 513 begin:
514 nf_conntrack_get_ht(&ct_hash, &hsize); 514 nf_conntrack_get_ht(&ct_hash, &hsize);
515 bucket = reciprocal_scale(hash, hsize); 515 bucket = reciprocal_scale(hash, hsize);
516 516
517 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) { 517 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) {
518 struct nf_conn *ct; 518 struct nf_conn *ct;
519 519
520 ct = nf_ct_tuplehash_to_ctrack(h); 520 ct = nf_ct_tuplehash_to_ctrack(h);
521 if (nf_ct_is_expired(ct)) { 521 if (nf_ct_is_expired(ct)) {
522 nf_ct_gc_expired(ct); 522 nf_ct_gc_expired(ct);
523 continue; 523 continue;
524 } 524 }
525 525
526 if (nf_ct_is_dying(ct)) 526 if (nf_ct_is_dying(ct))
527 continue; 527 continue;
528 528
529 if (nf_ct_key_equal(h, tuple, zone, net)) 529 if (nf_ct_key_equal(h, tuple, zone, net))
530 return h; 530 return h;
531 } 531 }
532 /* 532 /*
533 * if the nulls value we got at the end of this lookup is 533 * if the nulls value we got at the end of this lookup is
534 * not the expected one, we must restart lookup. 534 * not the expected one, we must restart lookup.
535 * We probably met an item that was moved to another chain. 535 * We probably met an item that was moved to another chain.
536 */ 536 */
537 if (get_nulls_value(n) != bucket) { 537 if (get_nulls_value(n) != bucket) {
538 NF_CT_STAT_INC_ATOMIC(net, search_restart); 538 NF_CT_STAT_INC_ATOMIC(net, search_restart);
539 goto begin; 539 goto begin;
540 } 540 }
541 541
542 return NULL; 542 return NULL;
543 } 543 }
544 544
545 /* Find a connection corresponding to a tuple. */ 545 /* Find a connection corresponding to a tuple. */
546 static struct nf_conntrack_tuple_hash * 546 static struct nf_conntrack_tuple_hash *
547 __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, 547 __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
548 const struct nf_conntrack_tuple *tuple, u32 hash) 548 const struct nf_conntrack_tuple *tuple, u32 hash)
549 { 549 {
550 struct nf_conntrack_tuple_hash *h; 550 struct nf_conntrack_tuple_hash *h;
551 struct nf_conn *ct; 551 struct nf_conn *ct;
552 552
553 rcu_read_lock(); 553 rcu_read_lock();
554 begin: 554 begin:
555 h = ____nf_conntrack_find(net, zone, tuple, hash); 555 h = ____nf_conntrack_find(net, zone, tuple, hash);
556 if (h) { 556 if (h) {
557 ct = nf_ct_tuplehash_to_ctrack(h); 557 ct = nf_ct_tuplehash_to_ctrack(h);
558 if (unlikely(nf_ct_is_dying(ct) || 558 if (unlikely(nf_ct_is_dying(ct) ||
559 !atomic_inc_not_zero(&ct->ct_general.use))) 559 !atomic_inc_not_zero(&ct->ct_general.use)))
560 h = NULL; 560 h = NULL;
561 else { 561 else {
562 if (unlikely(!nf_ct_key_equal(h, tuple, zone, net))) { 562 if (unlikely(!nf_ct_key_equal(h, tuple, zone, net))) {
563 nf_ct_put(ct); 563 nf_ct_put(ct);
564 goto begin; 564 goto begin;
565 } 565 }
566 } 566 }
567 } 567 }
568 rcu_read_unlock(); 568 rcu_read_unlock();
569 569
570 return h; 570 return h;
571 } 571 }
572 572
573 struct nf_conntrack_tuple_hash * 573 struct nf_conntrack_tuple_hash *
574 nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, 574 nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
575 const struct nf_conntrack_tuple *tuple) 575 const struct nf_conntrack_tuple *tuple)
576 { 576 {
577 return __nf_conntrack_find_get(net, zone, tuple, 577 return __nf_conntrack_find_get(net, zone, tuple,
578 hash_conntrack_raw(tuple, net)); 578 hash_conntrack_raw(tuple, net));
579 } 579 }
580 EXPORT_SYMBOL_GPL(nf_conntrack_find_get); 580 EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
581 581
582 static void __nf_conntrack_hash_insert(struct nf_conn *ct, 582 static void __nf_conntrack_hash_insert(struct nf_conn *ct,
583 unsigned int hash, 583 unsigned int hash,
584 unsigned int reply_hash) 584 unsigned int reply_hash)
585 { 585 {
586 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, 586 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
587 &nf_conntrack_hash[hash]); 587 &nf_conntrack_hash[hash]);
588 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, 588 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
589 &nf_conntrack_hash[reply_hash]); 589 &nf_conntrack_hash[reply_hash]);
590 } 590 }
591 591
592 int 592 int
593 nf_conntrack_hash_check_insert(struct nf_conn *ct) 593 nf_conntrack_hash_check_insert(struct nf_conn *ct)
594 { 594 {
595 const struct nf_conntrack_zone *zone; 595 const struct nf_conntrack_zone *zone;
596 struct net *net = nf_ct_net(ct); 596 struct net *net = nf_ct_net(ct);
597 unsigned int hash, reply_hash; 597 unsigned int hash, reply_hash;
598 struct nf_conntrack_tuple_hash *h; 598 struct nf_conntrack_tuple_hash *h;
599 struct hlist_nulls_node *n; 599 struct hlist_nulls_node *n;
600 unsigned int sequence; 600 unsigned int sequence;
601 601
602 zone = nf_ct_zone(ct); 602 zone = nf_ct_zone(ct);
603 603
604 local_bh_disable(); 604 local_bh_disable();
605 do { 605 do {
606 sequence = read_seqcount_begin(&nf_conntrack_generation); 606 sequence = read_seqcount_begin(&nf_conntrack_generation);
607 hash = hash_conntrack(net, 607 hash = hash_conntrack(net,
608 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 608 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
609 reply_hash = hash_conntrack(net, 609 reply_hash = hash_conntrack(net,
610 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 610 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
611 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); 611 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
612 612
613 /* See if there's one in the list already, including reverse */ 613 /* See if there's one in the list already, including reverse */
614 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) 614 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
615 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 615 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
616 zone, net)) 616 zone, net))
617 goto out; 617 goto out;
618 618
619 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) 619 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
620 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, 620 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
621 zone, net)) 621 zone, net))
622 goto out; 622 goto out;
623 623
624 smp_wmb(); 624 smp_wmb();
625 /* The caller holds a reference to this object */ 625 /* The caller holds a reference to this object */
626 atomic_set(&ct->ct_general.use, 2); 626 atomic_set(&ct->ct_general.use, 2);
627 __nf_conntrack_hash_insert(ct, hash, reply_hash); 627 __nf_conntrack_hash_insert(ct, hash, reply_hash);
628 nf_conntrack_double_unlock(hash, reply_hash); 628 nf_conntrack_double_unlock(hash, reply_hash);
629 NF_CT_STAT_INC(net, insert); 629 NF_CT_STAT_INC(net, insert);
630 local_bh_enable(); 630 local_bh_enable();
631 return 0; 631 return 0;
632 632
633 out: 633 out:
634 nf_conntrack_double_unlock(hash, reply_hash); 634 nf_conntrack_double_unlock(hash, reply_hash);
635 NF_CT_STAT_INC(net, insert_failed); 635 NF_CT_STAT_INC(net, insert_failed);
636 local_bh_enable(); 636 local_bh_enable();
637 return -EEXIST; 637 return -EEXIST;
638 } 638 }
639 EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); 639 EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
640 640
641 static inline void nf_ct_acct_update(struct nf_conn *ct, 641 static inline void nf_ct_acct_update(struct nf_conn *ct,
642 enum ip_conntrack_info ctinfo, 642 enum ip_conntrack_info ctinfo,
643 unsigned int len) 643 unsigned int len)
644 { 644 {
645 struct nf_conn_acct *acct; 645 struct nf_conn_acct *acct;
646 646
647 acct = nf_conn_acct_find(ct); 647 acct = nf_conn_acct_find(ct);
648 if (acct) { 648 if (acct) {
649 struct nf_conn_counter *counter = acct->counter; 649 struct nf_conn_counter *counter = acct->counter;
650 650
651 atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets); 651 atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
652 atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes); 652 atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
653 } 653 }
654 } 654 }
655 655
656 static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo, 656 static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
657 const struct nf_conn *loser_ct) 657 const struct nf_conn *loser_ct)
658 { 658 {
659 struct nf_conn_acct *acct; 659 struct nf_conn_acct *acct;
660 660
661 acct = nf_conn_acct_find(loser_ct); 661 acct = nf_conn_acct_find(loser_ct);
662 if (acct) { 662 if (acct) {
663 struct nf_conn_counter *counter = acct->counter; 663 struct nf_conn_counter *counter = acct->counter;
664 unsigned int bytes; 664 unsigned int bytes;
665 665
666 /* u32 should be fine since we must have seen one packet. */ 666 /* u32 should be fine since we must have seen one packet. */
667 bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes); 667 bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
668 nf_ct_acct_update(ct, ctinfo, bytes); 668 nf_ct_acct_update(ct, ctinfo, bytes);
669 } 669 }
670 } 670 }
671 671
672 /* Resolve race on insertion if this protocol allows this. */ 672 /* Resolve race on insertion if this protocol allows this. */
673 static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, 673 static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
674 enum ip_conntrack_info ctinfo, 674 enum ip_conntrack_info ctinfo,
675 struct nf_conntrack_tuple_hash *h) 675 struct nf_conntrack_tuple_hash *h)
676 { 676 {
677 /* This is the conntrack entry already in hashes that won race. */ 677 /* This is the conntrack entry already in hashes that won race. */
678 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 678 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
679 struct nf_conntrack_l4proto *l4proto; 679 struct nf_conntrack_l4proto *l4proto;
680 680
681 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 681 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
682 if (l4proto->allow_clash && 682 if (l4proto->allow_clash &&
683 !nfct_nat(ct) && 683 !nfct_nat(ct) &&
684 !nf_ct_is_dying(ct) && 684 !nf_ct_is_dying(ct) &&
685 atomic_inc_not_zero(&ct->ct_general.use)) { 685 atomic_inc_not_zero(&ct->ct_general.use)) {
686 nf_ct_acct_merge(ct, ctinfo, (struct nf_conn *)skb->nfct); 686 nf_ct_acct_merge(ct, ctinfo, (struct nf_conn *)skb->nfct);
687 nf_conntrack_put(skb->nfct); 687 nf_conntrack_put(skb->nfct);
688 /* Assign conntrack already in hashes to this skbuff. Don't 688 /* Assign conntrack already in hashes to this skbuff. Don't
689 * modify skb->nfctinfo to ensure consistent stateful filtering. 689 * modify skb->nfctinfo to ensure consistent stateful filtering.
690 */ 690 */
691 skb->nfct = &ct->ct_general; 691 skb->nfct = &ct->ct_general;
692 return NF_ACCEPT; 692 return NF_ACCEPT;
693 } 693 }
694 NF_CT_STAT_INC(net, drop); 694 NF_CT_STAT_INC(net, drop);
695 return NF_DROP; 695 return NF_DROP;
696 } 696 }
697 697
698 /* Confirm a connection given skb; places it in hash table */ 698 /* Confirm a connection given skb; places it in hash table */
699 int 699 int
700 __nf_conntrack_confirm(struct sk_buff *skb) 700 __nf_conntrack_confirm(struct sk_buff *skb)
701 { 701 {
702 const struct nf_conntrack_zone *zone; 702 const struct nf_conntrack_zone *zone;
703 unsigned int hash, reply_hash; 703 unsigned int hash, reply_hash;
704 struct nf_conntrack_tuple_hash *h; 704 struct nf_conntrack_tuple_hash *h;
705 struct nf_conn *ct; 705 struct nf_conn *ct;
706 struct nf_conn_help *help; 706 struct nf_conn_help *help;
707 struct nf_conn_tstamp *tstamp; 707 struct nf_conn_tstamp *tstamp;
708 struct hlist_nulls_node *n; 708 struct hlist_nulls_node *n;
709 enum ip_conntrack_info ctinfo; 709 enum ip_conntrack_info ctinfo;
710 struct net *net; 710 struct net *net;
711 unsigned int sequence; 711 unsigned int sequence;
712 int ret = NF_DROP; 712 int ret = NF_DROP;
713 713
714 ct = nf_ct_get(skb, &ctinfo); 714 ct = nf_ct_get(skb, &ctinfo);
715 net = nf_ct_net(ct); 715 net = nf_ct_net(ct);
716 716
717 /* ipt_REJECT uses nf_conntrack_attach to attach related 717 /* ipt_REJECT uses nf_conntrack_attach to attach related
718 ICMP/TCP RST packets in other direction. Actual packet 718 ICMP/TCP RST packets in other direction. Actual packet
719 which created connection will be IP_CT_NEW or for an 719 which created connection will be IP_CT_NEW or for an
720 expected connection, IP_CT_RELATED. */ 720 expected connection, IP_CT_RELATED. */
721 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) 721 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
722 return NF_ACCEPT; 722 return NF_ACCEPT;
723 723
724 zone = nf_ct_zone(ct); 724 zone = nf_ct_zone(ct);
725 local_bh_disable(); 725 local_bh_disable();
726 726
727 do { 727 do {
728 sequence = read_seqcount_begin(&nf_conntrack_generation); 728 sequence = read_seqcount_begin(&nf_conntrack_generation);
729 /* reuse the hash saved before */ 729 /* reuse the hash saved before */
730 hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; 730 hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
731 hash = scale_hash(hash); 731 hash = scale_hash(hash);
732 reply_hash = hash_conntrack(net, 732 reply_hash = hash_conntrack(net,
733 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 733 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
734 734
735 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); 735 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
736 736
737 /* We're not in hash table, and we refuse to set up related 737 /* We're not in hash table, and we refuse to set up related
738 * connections for unconfirmed conns. But packet copies and 738 * connections for unconfirmed conns. But packet copies and
739 * REJECT will give spurious warnings here. 739 * REJECT will give spurious warnings here.
740 */ 740 */
741 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ 741 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
742 742
743 /* No external references means no one else could have 743 /* No external references means no one else could have
744 * confirmed us. 744 * confirmed us.
745 */ 745 */
746 NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); 746 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
747 pr_debug("Confirming conntrack %p\n", ct); 747 pr_debug("Confirming conntrack %p\n", ct);
748 /* We have to check the DYING flag after unlink to prevent 748 /* We have to check the DYING flag after unlink to prevent
749 * a race against nf_ct_get_next_corpse() possibly called from 749 * a race against nf_ct_get_next_corpse() possibly called from
750 * user context, else we insert an already 'dead' hash, blocking 750 * user context, else we insert an already 'dead' hash, blocking
751 * further use of that particular connection -JM. 751 * further use of that particular connection -JM.
752 */ 752 */
753 nf_ct_del_from_dying_or_unconfirmed_list(ct); 753 nf_ct_del_from_dying_or_unconfirmed_list(ct);
754 754
755 if (unlikely(nf_ct_is_dying(ct))) { 755 if (unlikely(nf_ct_is_dying(ct))) {
756 nf_ct_add_to_dying_list(ct); 756 nf_ct_add_to_dying_list(ct);
757 goto dying; 757 goto dying;
758 } 758 }
759 759
760 /* See if there's one in the list already, including reverse: 760 /* See if there's one in the list already, including reverse:
761 NAT could have grabbed it without realizing, since we're 761 NAT could have grabbed it without realizing, since we're
762 not in the hash. If there is, we lost race. */ 762 not in the hash. If there is, we lost race. */
763 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) 763 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
764 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 764 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
765 zone, net)) 765 zone, net))
766 goto out; 766 goto out;
767 767
768 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) 768 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
769 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, 769 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
770 zone, net)) 770 zone, net))
771 goto out; 771 goto out;
772 772
773 /* Timer relative to confirmation time, not original 773 /* Timer relative to confirmation time, not original
774 setting time, otherwise we'd get timer wrap in 774 setting time, otherwise we'd get timer wrap in
775 weird delay cases. */ 775 weird delay cases. */
776 ct->timeout += nfct_time_stamp; 776 ct->timeout += nfct_time_stamp;
777 atomic_inc(&ct->ct_general.use); 777 atomic_inc(&ct->ct_general.use);
778 ct->status |= IPS_CONFIRMED; 778 ct->status |= IPS_CONFIRMED;
779 779
780 /* set conntrack timestamp, if enabled. */ 780 /* set conntrack timestamp, if enabled. */
781 tstamp = nf_conn_tstamp_find(ct); 781 tstamp = nf_conn_tstamp_find(ct);
782 if (tstamp) { 782 if (tstamp) {
783 if (skb->tstamp.tv64 == 0) 783 if (skb->tstamp.tv64 == 0)
784 __net_timestamp(skb); 784 __net_timestamp(skb);
785 785
786 tstamp->start = ktime_to_ns(skb->tstamp); 786 tstamp->start = ktime_to_ns(skb->tstamp);
787 } 787 }
788 /* Since the lookup is lockless, hash insertion must be done after 788 /* Since the lookup is lockless, hash insertion must be done after
789 * starting the timer and setting the CONFIRMED bit. The RCU barriers 789 * starting the timer and setting the CONFIRMED bit. The RCU barriers
790 * guarantee that no other CPU can find the conntrack before the above 790 * guarantee that no other CPU can find the conntrack before the above
791 * stores are visible. 791 * stores are visible.
792 */ 792 */
793 __nf_conntrack_hash_insert(ct, hash, reply_hash); 793 __nf_conntrack_hash_insert(ct, hash, reply_hash);
794 nf_conntrack_double_unlock(hash, reply_hash); 794 nf_conntrack_double_unlock(hash, reply_hash);
795 local_bh_enable(); 795 local_bh_enable();
796 796
797 help = nfct_help(ct); 797 help = nfct_help(ct);
798 if (help && help->helper) 798 if (help && help->helper)
799 nf_conntrack_event_cache(IPCT_HELPER, ct); 799 nf_conntrack_event_cache(IPCT_HELPER, ct);
800 800
801 nf_conntrack_event_cache(master_ct(ct) ? 801 nf_conntrack_event_cache(master_ct(ct) ?
802 IPCT_RELATED : IPCT_NEW, ct); 802 IPCT_RELATED : IPCT_NEW, ct);
803 return NF_ACCEPT; 803 return NF_ACCEPT;
804 804
805 out: 805 out:
806 nf_ct_add_to_dying_list(ct); 806 nf_ct_add_to_dying_list(ct);
807 ret = nf_ct_resolve_clash(net, skb, ctinfo, h); 807 ret = nf_ct_resolve_clash(net, skb, ctinfo, h);
808 dying: 808 dying:
809 nf_conntrack_double_unlock(hash, reply_hash); 809 nf_conntrack_double_unlock(hash, reply_hash);
810 NF_CT_STAT_INC(net, insert_failed); 810 NF_CT_STAT_INC(net, insert_failed);
811 local_bh_enable(); 811 local_bh_enable();
812 return ret; 812 return ret;
813 } 813 }
814 EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); 814 EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
815 815
816 /* Returns true if a connection correspondings to the tuple (required 816 /* Returns true if a connection correspondings to the tuple (required
817 for NAT). */ 817 for NAT). */
818 int 818 int
819 nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, 819 nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
820 const struct nf_conn *ignored_conntrack) 820 const struct nf_conn *ignored_conntrack)
821 { 821 {
822 struct net *net = nf_ct_net(ignored_conntrack); 822 struct net *net = nf_ct_net(ignored_conntrack);
823 const struct nf_conntrack_zone *zone; 823 const struct nf_conntrack_zone *zone;
824 struct nf_conntrack_tuple_hash *h; 824 struct nf_conntrack_tuple_hash *h;
825 struct hlist_nulls_head *ct_hash; 825 struct hlist_nulls_head *ct_hash;
826 unsigned int hash, hsize; 826 unsigned int hash, hsize;
827 struct hlist_nulls_node *n; 827 struct hlist_nulls_node *n;
828 struct nf_conn *ct; 828 struct nf_conn *ct;
829 829
830 zone = nf_ct_zone(ignored_conntrack); 830 zone = nf_ct_zone(ignored_conntrack);
831 831
832 rcu_read_lock(); 832 rcu_read_lock();
833 begin: 833 begin:
834 nf_conntrack_get_ht(&ct_hash, &hsize); 834 nf_conntrack_get_ht(&ct_hash, &hsize);
835 hash = __hash_conntrack(net, tuple, hsize); 835 hash = __hash_conntrack(net, tuple, hsize);
836 836
837 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) { 837 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
838 ct = nf_ct_tuplehash_to_ctrack(h); 838 ct = nf_ct_tuplehash_to_ctrack(h);
839 839
840 if (ct == ignored_conntrack) 840 if (ct == ignored_conntrack)
841 continue; 841 continue;
842 842
843 if (nf_ct_is_expired(ct)) { 843 if (nf_ct_is_expired(ct)) {
844 nf_ct_gc_expired(ct); 844 nf_ct_gc_expired(ct);
845 continue; 845 continue;
846 } 846 }
847 847
848 if (nf_ct_key_equal(h, tuple, zone, net)) { 848 if (nf_ct_key_equal(h, tuple, zone, net)) {
849 NF_CT_STAT_INC_ATOMIC(net, found); 849 NF_CT_STAT_INC_ATOMIC(net, found);
850 rcu_read_unlock(); 850 rcu_read_unlock();
851 return 1; 851 return 1;
852 } 852 }
853 } 853 }
854 854
855 if (get_nulls_value(n) != hash) { 855 if (get_nulls_value(n) != hash) {
856 NF_CT_STAT_INC_ATOMIC(net, search_restart); 856 NF_CT_STAT_INC_ATOMIC(net, search_restart);
857 goto begin; 857 goto begin;
858 } 858 }
859 859
860 rcu_read_unlock(); 860 rcu_read_unlock();
861 861
862 return 0; 862 return 0;
863 } 863 }
864 EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); 864 EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
865 865
866 #define NF_CT_EVICTION_RANGE 8 866 #define NF_CT_EVICTION_RANGE 8
867 867
868 /* There's a small race here where we may free a just-assured 868 /* There's a small race here where we may free a just-assured
869 connection. Too bad: we're in trouble anyway. */ 869 connection. Too bad: we're in trouble anyway. */
870 static unsigned int early_drop_list(struct net *net, 870 static unsigned int early_drop_list(struct net *net,
871 struct hlist_nulls_head *head) 871 struct hlist_nulls_head *head)
872 { 872 {
873 struct nf_conntrack_tuple_hash *h; 873 struct nf_conntrack_tuple_hash *h;
874 struct hlist_nulls_node *n; 874 struct hlist_nulls_node *n;
875 unsigned int drops = 0; 875 unsigned int drops = 0;
876 struct nf_conn *tmp; 876 struct nf_conn *tmp;
877 877
878 hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) { 878 hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
879 tmp = nf_ct_tuplehash_to_ctrack(h); 879 tmp = nf_ct_tuplehash_to_ctrack(h);
880 880
881 if (nf_ct_is_expired(tmp)) { 881 if (nf_ct_is_expired(tmp)) {
882 nf_ct_gc_expired(tmp); 882 nf_ct_gc_expired(tmp);
883 continue; 883 continue;
884 } 884 }
885 885
886 if (test_bit(IPS_ASSURED_BIT, &tmp->status) || 886 if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
887 !net_eq(nf_ct_net(tmp), net) || 887 !net_eq(nf_ct_net(tmp), net) ||
888 nf_ct_is_dying(tmp)) 888 nf_ct_is_dying(tmp))
889 continue; 889 continue;
890 890
891 if (!atomic_inc_not_zero(&tmp->ct_general.use)) 891 if (!atomic_inc_not_zero(&tmp->ct_general.use))
892 continue; 892 continue;
893 893
894 /* kill only if still in same netns -- might have moved due to 894 /* kill only if still in same netns -- might have moved due to
895 * SLAB_DESTROY_BY_RCU rules. 895 * SLAB_DESTROY_BY_RCU rules.
896 * 896 *
897 * We steal the timer reference. If that fails timer has 897 * We steal the timer reference. If that fails timer has
898 * already fired or someone else deleted it. Just drop ref 898 * already fired or someone else deleted it. Just drop ref
899 * and move to next entry. 899 * and move to next entry.
900 */ 900 */
901 if (net_eq(nf_ct_net(tmp), net) && 901 if (net_eq(nf_ct_net(tmp), net) &&
902 nf_ct_is_confirmed(tmp) && 902 nf_ct_is_confirmed(tmp) &&
903 nf_ct_delete(tmp, 0, 0)) 903 nf_ct_delete(tmp, 0, 0))
904 drops++; 904 drops++;
905 905
906 nf_ct_put(tmp); 906 nf_ct_put(tmp);
907 } 907 }
908 908
909 return drops; 909 return drops;
910 } 910 }
911 911
912 static noinline int early_drop(struct net *net, unsigned int _hash) 912 static noinline int early_drop(struct net *net, unsigned int _hash)
913 { 913 {
914 unsigned int i; 914 unsigned int i;
915 915
916 for (i = 0; i < NF_CT_EVICTION_RANGE; i++) { 916 for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
917 struct hlist_nulls_head *ct_hash; 917 struct hlist_nulls_head *ct_hash;
918 unsigned int hash, hsize, drops; 918 unsigned int hash, hsize, drops;
919 919
920 rcu_read_lock(); 920 rcu_read_lock();
921 nf_conntrack_get_ht(&ct_hash, &hsize); 921 nf_conntrack_get_ht(&ct_hash, &hsize);
922 hash = reciprocal_scale(_hash++, hsize); 922 hash = reciprocal_scale(_hash++, hsize);
923 923
924 drops = early_drop_list(net, &ct_hash[hash]); 924 drops = early_drop_list(net, &ct_hash[hash]);
925 rcu_read_unlock(); 925 rcu_read_unlock();
926 926
927 if (drops) { 927 if (drops) {
928 NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops); 928 NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops);
929 return true; 929 return true;
930 } 930 }
931 } 931 }
932 932
933 return false; 933 return false;
934 } 934 }
935 935
936 static void gc_worker(struct work_struct *work) 936 static void gc_worker(struct work_struct *work)
937 { 937 {
938 unsigned int i, goal, buckets = 0, expired_count = 0; 938 unsigned int i, goal, buckets = 0, expired_count = 0;
939 unsigned long next_run = GC_INTERVAL; 939 unsigned long next_run = GC_INTERVAL;
940 unsigned int ratio, scanned = 0; 940 unsigned int ratio, scanned = 0;
941 struct conntrack_gc_work *gc_work; 941 struct conntrack_gc_work *gc_work;
942 942
943 gc_work = container_of(work, struct conntrack_gc_work, dwork.work); 943 gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
944 944
945 goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS); 945 goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS);
946 i = gc_work->last_bucket; 946 i = gc_work->last_bucket;
947 947
948 do { 948 do {
949 struct nf_conntrack_tuple_hash *h; 949 struct nf_conntrack_tuple_hash *h;
950 struct hlist_nulls_head *ct_hash; 950 struct hlist_nulls_head *ct_hash;
951 struct hlist_nulls_node *n; 951 struct hlist_nulls_node *n;
952 unsigned int hashsz; 952 unsigned int hashsz;
953 struct nf_conn *tmp; 953 struct nf_conn *tmp;
954 954
955 i++; 955 i++;
956 rcu_read_lock(); 956 rcu_read_lock();
957 957
958 nf_conntrack_get_ht(&ct_hash, &hashsz); 958 nf_conntrack_get_ht(&ct_hash, &hashsz);
959 if (i >= hashsz) 959 if (i >= hashsz)
960 i = 0; 960 i = 0;
961 961
962 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { 962 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
963 tmp = nf_ct_tuplehash_to_ctrack(h); 963 tmp = nf_ct_tuplehash_to_ctrack(h);
964 964
965 scanned++; 965 scanned++;
966 if (nf_ct_is_expired(tmp)) { 966 if (nf_ct_is_expired(tmp)) {
967 nf_ct_gc_expired(tmp); 967 nf_ct_gc_expired(tmp);
968 expired_count++; 968 expired_count++;
969 continue; 969 continue;
970 } 970 }
971 } 971 }
972 972
973 /* could check get_nulls_value() here and restart if ct 973 /* could check get_nulls_value() here and restart if ct
974 * was moved to another chain. But given gc is best-effort 974 * was moved to another chain. But given gc is best-effort
975 * we will just continue with next hash slot. 975 * we will just continue with next hash slot.
976 */ 976 */
977 rcu_read_unlock(); 977 rcu_read_unlock();
978 cond_resched_rcu_qs(); 978 cond_resched_rcu_qs();
979 } while (++buckets < goal && 979 } while (++buckets < goal &&
980 expired_count < GC_MAX_EVICTS); 980 expired_count < GC_MAX_EVICTS);
981 981
982 if (gc_work->exiting) 982 if (gc_work->exiting)
983 return; 983 return;
984 984
985 ratio = scanned ? expired_count * 100 / scanned : 0; 985 ratio = scanned ? expired_count * 100 / scanned : 0;
986 if (ratio >= 90 || expired_count == GC_MAX_EVICTS) 986 if (ratio >= 90 || expired_count == GC_MAX_EVICTS)
987 next_run = 0; 987 next_run = 0;
988 988
989 gc_work->last_bucket = i; 989 gc_work->last_bucket = i;
990 schedule_delayed_work(&gc_work->dwork, next_run); 990 schedule_delayed_work(&gc_work->dwork, next_run);
991 } 991 }
992 992
993 static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) 993 static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
994 { 994 {
995 INIT_DELAYED_WORK(&gc_work->dwork, gc_worker); 995 INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
996 gc_work->exiting = false; 996 gc_work->exiting = false;
997 } 997 }
998 998
999 static struct nf_conn * 999 static struct nf_conn *
1000 __nf_conntrack_alloc(struct net *net, 1000 __nf_conntrack_alloc(struct net *net,
1001 const struct nf_conntrack_zone *zone, 1001 const struct nf_conntrack_zone *zone,
1002 const struct nf_conntrack_tuple *orig, 1002 const struct nf_conntrack_tuple *orig,
1003 const struct nf_conntrack_tuple *repl, 1003 const struct nf_conntrack_tuple *repl,
1004 gfp_t gfp, u32 hash) 1004 gfp_t gfp, u32 hash)
1005 { 1005 {
1006 struct nf_conn *ct; 1006 struct nf_conn *ct;
1007 1007
1008 /* We don't want any race condition at early drop stage */ 1008 /* We don't want any race condition at early drop stage */
1009 atomic_inc(&net->ct.count); 1009 atomic_inc(&net->ct.count);
1010 1010
1011 if (nf_conntrack_max && 1011 if (nf_conntrack_max &&
1012 unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { 1012 unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
1013 if (!early_drop(net, hash)) { 1013 if (!early_drop(net, hash)) {
1014 atomic_dec(&net->ct.count); 1014 atomic_dec(&net->ct.count);
1015 net_warn_ratelimited("nf_conntrack: table full, dropping packet\n"); 1015 net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
1016 return ERR_PTR(-ENOMEM); 1016 return ERR_PTR(-ENOMEM);
1017 } 1017 }
1018 } 1018 }
1019 1019
1020 /* 1020 /*
1021 * Do not use kmem_cache_zalloc(), as this cache uses 1021 * Do not use kmem_cache_zalloc(), as this cache uses
1022 * SLAB_DESTROY_BY_RCU. 1022 * SLAB_DESTROY_BY_RCU.
1023 */ 1023 */
1024 ct = kmem_cache_alloc(nf_conntrack_cachep, gfp); 1024 ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
1025 if (ct == NULL) 1025 if (ct == NULL)
1026 goto out; 1026 goto out;
1027 1027
1028 spin_lock_init(&ct->lock); 1028 spin_lock_init(&ct->lock);
1029 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; 1029 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
1030 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; 1030 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
1031 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; 1031 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
1032 /* save hash for reusing when confirming */ 1032 /* save hash for reusing when confirming */
1033 *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; 1033 *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
1034 ct->status = 0; 1034 ct->status = 0;
1035 write_pnet(&ct->ct_net, net); 1035 write_pnet(&ct->ct_net, net);
1036 memset(&ct->__nfct_init_offset[0], 0, 1036 memset(&ct->__nfct_init_offset[0], 0,
1037 offsetof(struct nf_conn, proto) - 1037 offsetof(struct nf_conn, proto) -
1038 offsetof(struct nf_conn, __nfct_init_offset[0])); 1038 offsetof(struct nf_conn, __nfct_init_offset[0]));
1039 1039
1040 nf_ct_zone_add(ct, zone); 1040 nf_ct_zone_add(ct, zone);
1041 1041
1042 /* Because we use RCU lookups, we set ct_general.use to zero before 1042 /* Because we use RCU lookups, we set ct_general.use to zero before
1043 * this is inserted in any list. 1043 * this is inserted in any list.
1044 */ 1044 */
1045 atomic_set(&ct->ct_general.use, 0); 1045 atomic_set(&ct->ct_general.use, 0);
1046 return ct; 1046 return ct;
1047 out: 1047 out:
1048 atomic_dec(&net->ct.count); 1048 atomic_dec(&net->ct.count);
1049 return ERR_PTR(-ENOMEM); 1049 return ERR_PTR(-ENOMEM);
1050 } 1050 }
1051 1051
1052 struct nf_conn *nf_conntrack_alloc(struct net *net, 1052 struct nf_conn *nf_conntrack_alloc(struct net *net,
1053 const struct nf_conntrack_zone *zone, 1053 const struct nf_conntrack_zone *zone,
1054 const struct nf_conntrack_tuple *orig, 1054 const struct nf_conntrack_tuple *orig,
1055 const struct nf_conntrack_tuple *repl, 1055 const struct nf_conntrack_tuple *repl,
1056 gfp_t gfp) 1056 gfp_t gfp)
1057 { 1057 {
1058 return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0); 1058 return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0);
1059 } 1059 }
1060 EXPORT_SYMBOL_GPL(nf_conntrack_alloc); 1060 EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
1061 1061
1062 void nf_conntrack_free(struct nf_conn *ct) 1062 void nf_conntrack_free(struct nf_conn *ct)
1063 { 1063 {
1064 struct net *net = nf_ct_net(ct); 1064 struct net *net = nf_ct_net(ct);
1065 1065
1066 /* A freed object has refcnt == 0, that's 1066 /* A freed object has refcnt == 0, that's
1067 * the golden rule for SLAB_DESTROY_BY_RCU 1067 * the golden rule for SLAB_DESTROY_BY_RCU
1068 */ 1068 */
1069 NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0); 1069 NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
1070 1070
1071 nf_ct_ext_destroy(ct); 1071 nf_ct_ext_destroy(ct);
1072 nf_ct_ext_free(ct); 1072 nf_ct_ext_free(ct);
1073 kmem_cache_free(nf_conntrack_cachep, ct); 1073 kmem_cache_free(nf_conntrack_cachep, ct);
1074 smp_mb__before_atomic(); 1074 smp_mb__before_atomic();
1075 atomic_dec(&net->ct.count); 1075 atomic_dec(&net->ct.count);
1076 } 1076 }
1077 EXPORT_SYMBOL_GPL(nf_conntrack_free); 1077 EXPORT_SYMBOL_GPL(nf_conntrack_free);
1078 1078
1079 1079
1080 /* Allocate a new conntrack: we return -ENOMEM if classification 1080 /* Allocate a new conntrack: we return -ENOMEM if classification
1081 failed due to stress. Otherwise it really is unclassifiable. */ 1081 failed due to stress. Otherwise it really is unclassifiable. */
1082 static struct nf_conntrack_tuple_hash * 1082 static struct nf_conntrack_tuple_hash *
1083 init_conntrack(struct net *net, struct nf_conn *tmpl, 1083 init_conntrack(struct net *net, struct nf_conn *tmpl,
1084 const struct nf_conntrack_tuple *tuple, 1084 const struct nf_conntrack_tuple *tuple,
1085 struct nf_conntrack_l3proto *l3proto, 1085 struct nf_conntrack_l3proto *l3proto,
1086 struct nf_conntrack_l4proto *l4proto, 1086 struct nf_conntrack_l4proto *l4proto,
1087 struct sk_buff *skb, 1087 struct sk_buff *skb,
1088 unsigned int dataoff, u32 hash) 1088 unsigned int dataoff, u32 hash)
1089 { 1089 {
1090 struct nf_conn *ct; 1090 struct nf_conn *ct;
1091 struct nf_conn_help *help; 1091 struct nf_conn_help *help;
1092 struct nf_conntrack_tuple repl_tuple; 1092 struct nf_conntrack_tuple repl_tuple;
1093 struct nf_conntrack_ecache *ecache; 1093 struct nf_conntrack_ecache *ecache;
1094 struct nf_conntrack_expect *exp = NULL; 1094 struct nf_conntrack_expect *exp = NULL;
1095 const struct nf_conntrack_zone *zone; 1095 const struct nf_conntrack_zone *zone;
1096 struct nf_conn_timeout *timeout_ext; 1096 struct nf_conn_timeout *timeout_ext;
1097 struct nf_conntrack_zone tmp; 1097 struct nf_conntrack_zone tmp;
1098 unsigned int *timeouts; 1098 unsigned int *timeouts;
1099 1099
1100 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { 1100 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
1101 pr_debug("Can't invert tuple.\n"); 1101 pr_debug("Can't invert tuple.\n");
1102 return NULL; 1102 return NULL;
1103 } 1103 }
1104 1104
1105 zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); 1105 zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
1106 ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC, 1106 ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
1107 hash); 1107 hash);
1108 if (IS_ERR(ct)) 1108 if (IS_ERR(ct))
1109 return (struct nf_conntrack_tuple_hash *)ct; 1109 return (struct nf_conntrack_tuple_hash *)ct;
1110 1110
1111 if (!nf_ct_add_synproxy(ct, tmpl)) { 1111 if (!nf_ct_add_synproxy(ct, tmpl)) {
1112 nf_conntrack_free(ct); 1112 nf_conntrack_free(ct);
1113 return ERR_PTR(-ENOMEM); 1113 return ERR_PTR(-ENOMEM);
1114 } 1114 }
1115 1115
1116 timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL; 1116 timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
1117 if (timeout_ext) { 1117 if (timeout_ext) {
1118 timeouts = nf_ct_timeout_data(timeout_ext); 1118 timeouts = nf_ct_timeout_data(timeout_ext);
1119 if (unlikely(!timeouts)) 1119 if (unlikely(!timeouts))
1120 timeouts = l4proto->get_timeouts(net); 1120 timeouts = l4proto->get_timeouts(net);
1121 } else { 1121 } else {
1122 timeouts = l4proto->get_timeouts(net); 1122 timeouts = l4proto->get_timeouts(net);
1123 } 1123 }
1124 1124
1125 if (!l4proto->new(ct, skb, dataoff, timeouts)) { 1125 if (!l4proto->new(ct, skb, dataoff, timeouts)) {
1126 nf_conntrack_free(ct); 1126 nf_conntrack_free(ct);
1127 pr_debug("can't track with proto module\n"); 1127 pr_debug("can't track with proto module\n");
1128 return NULL; 1128 return NULL;
1129 } 1129 }
1130 1130
1131 if (timeout_ext) 1131 if (timeout_ext)
1132 nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout), 1132 nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout),
1133 GFP_ATOMIC); 1133 GFP_ATOMIC);
1134 1134
1135 nf_ct_acct_ext_add(ct, GFP_ATOMIC); 1135 nf_ct_acct_ext_add(ct, GFP_ATOMIC);
1136 nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); 1136 nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
1137 nf_ct_labels_ext_add(ct); 1137 nf_ct_labels_ext_add(ct);
1138 1138
1139 ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; 1139 ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
1140 nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, 1140 nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
1141 ecache ? ecache->expmask : 0, 1141 ecache ? ecache->expmask : 0,
1142 GFP_ATOMIC); 1142 GFP_ATOMIC);
1143 1143
1144 local_bh_disable(); 1144 local_bh_disable();
1145 if (net->ct.expect_count) { 1145 if (net->ct.expect_count) {
1146 spin_lock(&nf_conntrack_expect_lock); 1146 spin_lock(&nf_conntrack_expect_lock);
1147 exp = nf_ct_find_expectation(net, zone, tuple); 1147 exp = nf_ct_find_expectation(net, zone, tuple);
1148 if (exp) { 1148 if (exp) {
1149 pr_debug("expectation arrives ct=%p exp=%p\n", 1149 pr_debug("expectation arrives ct=%p exp=%p\n",
1150 ct, exp); 1150 ct, exp);
1151 /* Welcome, Mr. Bond. We've been expecting you... */ 1151 /* Welcome, Mr. Bond. We've been expecting you... */
1152 __set_bit(IPS_EXPECTED_BIT, &ct->status); 1152 __set_bit(IPS_EXPECTED_BIT, &ct->status);
1153 /* exp->master safe, refcnt bumped in nf_ct_find_expectation */ 1153 /* exp->master safe, refcnt bumped in nf_ct_find_expectation */
1154 ct->master = exp->master; 1154 ct->master = exp->master;
1155 if (exp->helper) { 1155 if (exp->helper) {
1156 help = nf_ct_helper_ext_add(ct, exp->helper, 1156 help = nf_ct_helper_ext_add(ct, exp->helper,
1157 GFP_ATOMIC); 1157 GFP_ATOMIC);
1158 if (help) 1158 if (help)
1159 rcu_assign_pointer(help->helper, exp->helper); 1159 rcu_assign_pointer(help->helper, exp->helper);
1160 } 1160 }
1161 1161
1162 #ifdef CONFIG_NF_CONNTRACK_MARK 1162 #ifdef CONFIG_NF_CONNTRACK_MARK
1163 ct->mark = exp->master->mark; 1163 ct->mark = exp->master->mark;
1164 #endif 1164 #endif
1165 #ifdef CONFIG_NF_CONNTRACK_SECMARK 1165 #ifdef CONFIG_NF_CONNTRACK_SECMARK
1166 ct->secmark = exp->master->secmark; 1166 ct->secmark = exp->master->secmark;
1167 #endif 1167 #endif
1168 NF_CT_STAT_INC(net, expect_new); 1168 NF_CT_STAT_INC(net, expect_new);
1169 } 1169 }
1170 spin_unlock(&nf_conntrack_expect_lock); 1170 spin_unlock(&nf_conntrack_expect_lock);
1171 } 1171 }
1172 if (!exp) 1172 if (!exp)
1173 __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); 1173 __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
1174 1174
1175 /* Now it is inserted into the unconfirmed list, bump refcount */ 1175 /* Now it is inserted into the unconfirmed list, bump refcount */
1176 nf_conntrack_get(&ct->ct_general); 1176 nf_conntrack_get(&ct->ct_general);
1177 nf_ct_add_to_unconfirmed_list(ct); 1177 nf_ct_add_to_unconfirmed_list(ct);
1178 1178
1179 local_bh_enable(); 1179 local_bh_enable();
1180 1180
1181 if (exp) { 1181 if (exp) {
1182 if (exp->expectfn) 1182 if (exp->expectfn)
1183 exp->expectfn(ct, exp); 1183 exp->expectfn(ct, exp);
1184 nf_ct_expect_put(exp); 1184 nf_ct_expect_put(exp);
1185 } 1185 }
1186 1186
1187 return &ct->tuplehash[IP_CT_DIR_ORIGINAL]; 1187 return &ct->tuplehash[IP_CT_DIR_ORIGINAL];
1188 } 1188 }
1189 1189
1190 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ 1190 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
1191 static inline struct nf_conn * 1191 static inline struct nf_conn *
1192 resolve_normal_ct(struct net *net, struct nf_conn *tmpl, 1192 resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
1193 struct sk_buff *skb, 1193 struct sk_buff *skb,
1194 unsigned int dataoff, 1194 unsigned int dataoff,
1195 u_int16_t l3num, 1195 u_int16_t l3num,
1196 u_int8_t protonum, 1196 u_int8_t protonum,
1197 struct nf_conntrack_l3proto *l3proto, 1197 struct nf_conntrack_l3proto *l3proto,
1198 struct nf_conntrack_l4proto *l4proto, 1198 struct nf_conntrack_l4proto *l4proto,
1199 int *set_reply, 1199 int *set_reply,
1200 enum ip_conntrack_info *ctinfo) 1200 enum ip_conntrack_info *ctinfo)
1201 { 1201 {
1202 const struct nf_conntrack_zone *zone; 1202 const struct nf_conntrack_zone *zone;
1203 struct nf_conntrack_tuple tuple; 1203 struct nf_conntrack_tuple tuple;
1204 struct nf_conntrack_tuple_hash *h; 1204 struct nf_conntrack_tuple_hash *h;
1205 struct nf_conntrack_zone tmp; 1205 struct nf_conntrack_zone tmp;
1206 struct nf_conn *ct; 1206 struct nf_conn *ct;
1207 u32 hash; 1207 u32 hash;
1208 1208
1209 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), 1209 if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
1210 dataoff, l3num, protonum, net, &tuple, l3proto, 1210 dataoff, l3num, protonum, net, &tuple, l3proto,
1211 l4proto)) { 1211 l4proto)) {
1212 pr_debug("Can't get tuple\n"); 1212 pr_debug("Can't get tuple\n");
1213 return NULL; 1213 return NULL;
1214 } 1214 }
1215 1215
1216 /* look for tuple match */ 1216 /* look for tuple match */
1217 zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); 1217 zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
1218 hash = hash_conntrack_raw(&tuple, net); 1218 hash = hash_conntrack_raw(&tuple, net);
1219 h = __nf_conntrack_find_get(net, zone, &tuple, hash); 1219 h = __nf_conntrack_find_get(net, zone, &tuple, hash);
1220 if (!h) { 1220 if (!h) {
1221 h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, 1221 h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
1222 skb, dataoff, hash); 1222 skb, dataoff, hash);
1223 if (!h) 1223 if (!h)
1224 return NULL; 1224 return NULL;
1225 if (IS_ERR(h)) 1225 if (IS_ERR(h))
1226 return (void *)h; 1226 return (void *)h;
1227 } 1227 }
1228 ct = nf_ct_tuplehash_to_ctrack(h); 1228 ct = nf_ct_tuplehash_to_ctrack(h);
1229 1229
1230 /* It exists; we have (non-exclusive) reference. */ 1230 /* It exists; we have (non-exclusive) reference. */
1231 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { 1231 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
1232 *ctinfo = IP_CT_ESTABLISHED_REPLY; 1232 *ctinfo = IP_CT_ESTABLISHED_REPLY;
1233 /* Please set reply bit if this packet OK */ 1233 /* Please set reply bit if this packet OK */
1234 *set_reply = 1; 1234 *set_reply = 1;
1235 } else { 1235 } else {
1236 /* Once we've had two way comms, always ESTABLISHED. */ 1236 /* Once we've had two way comms, always ESTABLISHED. */
1237 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { 1237 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1238 pr_debug("normal packet for %p\n", ct); 1238 pr_debug("normal packet for %p\n", ct);
1239 *ctinfo = IP_CT_ESTABLISHED; 1239 *ctinfo = IP_CT_ESTABLISHED;
1240 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { 1240 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
1241 pr_debug("related packet for %p\n", ct); 1241 pr_debug("related packet for %p\n", ct);
1242 *ctinfo = IP_CT_RELATED; 1242 *ctinfo = IP_CT_RELATED;
1243 } else { 1243 } else {
1244 pr_debug("new packet for %p\n", ct); 1244 pr_debug("new packet for %p\n", ct);
1245 *ctinfo = IP_CT_NEW; 1245 *ctinfo = IP_CT_NEW;
1246 } 1246 }
1247 *set_reply = 0; 1247 *set_reply = 0;
1248 } 1248 }
1249 skb->nfct = &ct->ct_general; 1249 skb->nfct = &ct->ct_general;
1250 skb->nfctinfo = *ctinfo; 1250 skb->nfctinfo = *ctinfo;
1251 return ct; 1251 return ct;
1252 } 1252 }
1253 1253
1254 unsigned int 1254 unsigned int
1255 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, 1255 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
1256 struct sk_buff *skb) 1256 struct sk_buff *skb)
1257 { 1257 {
1258 struct nf_conn *ct, *tmpl = NULL; 1258 struct nf_conn *ct, *tmpl = NULL;
1259 enum ip_conntrack_info ctinfo; 1259 enum ip_conntrack_info ctinfo;
1260 struct nf_conntrack_l3proto *l3proto; 1260 struct nf_conntrack_l3proto *l3proto;
1261 struct nf_conntrack_l4proto *l4proto; 1261 struct nf_conntrack_l4proto *l4proto;
1262 unsigned int *timeouts; 1262 unsigned int *timeouts;
1263 unsigned int dataoff; 1263 unsigned int dataoff;
1264 u_int8_t protonum; 1264 u_int8_t protonum;
1265 int set_reply = 0; 1265 int set_reply = 0;
1266 int ret; 1266 int ret;
1267 1267
1268 if (skb->nfct) { 1268 if (skb->nfct) {
1269 /* Previously seen (loopback or untracked)? Ignore. */ 1269 /* Previously seen (loopback or untracked)? Ignore. */
1270 tmpl = (struct nf_conn *)skb->nfct; 1270 tmpl = (struct nf_conn *)skb->nfct;
1271 if (!nf_ct_is_template(tmpl)) { 1271 if (!nf_ct_is_template(tmpl)) {
1272 NF_CT_STAT_INC_ATOMIC(net, ignore); 1272 NF_CT_STAT_INC_ATOMIC(net, ignore);
1273 return NF_ACCEPT; 1273 return NF_ACCEPT;
1274 } 1274 }
1275 skb->nfct = NULL; 1275 skb->nfct = NULL;
1276 } 1276 }
1277 1277
1278 /* rcu_read_lock()ed by nf_hook_thresh */ 1278 /* rcu_read_lock()ed by nf_hook_thresh */
1279 l3proto = __nf_ct_l3proto_find(pf); 1279 l3proto = __nf_ct_l3proto_find(pf);
1280 ret = l3proto->get_l4proto(skb, skb_network_offset(skb), 1280 ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
1281 &dataoff, &protonum); 1281 &dataoff, &protonum);
1282 if (ret <= 0) { 1282 if (ret <= 0) {
1283 pr_debug("not prepared to track yet or error occurred\n"); 1283 pr_debug("not prepared to track yet or error occurred\n");
1284 NF_CT_STAT_INC_ATOMIC(net, error); 1284 NF_CT_STAT_INC_ATOMIC(net, error);
1285 NF_CT_STAT_INC_ATOMIC(net, invalid); 1285 NF_CT_STAT_INC_ATOMIC(net, invalid);
1286 ret = -ret; 1286 ret = -ret;
1287 goto out; 1287 goto out;
1288 } 1288 }
1289 1289
1290 l4proto = __nf_ct_l4proto_find(pf, protonum); 1290 l4proto = __nf_ct_l4proto_find(pf, protonum);
1291 1291
1292 /* It may be an special packet, error, unclean... 1292 /* It may be an special packet, error, unclean...
1293 * inverse of the return code tells to the netfilter 1293 * inverse of the return code tells to the netfilter
1294 * core what to do with the packet. */ 1294 * core what to do with the packet. */
1295 if (l4proto->error != NULL) { 1295 if (l4proto->error != NULL) {
1296 ret = l4proto->error(net, tmpl, skb, dataoff, &ctinfo, 1296 ret = l4proto->error(net, tmpl, skb, dataoff, &ctinfo,
1297 pf, hooknum); 1297 pf, hooknum);
1298 if (ret <= 0) { 1298 if (ret <= 0) {
1299 NF_CT_STAT_INC_ATOMIC(net, error); 1299 NF_CT_STAT_INC_ATOMIC(net, error);
1300 NF_CT_STAT_INC_ATOMIC(net, invalid); 1300 NF_CT_STAT_INC_ATOMIC(net, invalid);
1301 ret = -ret; 1301 ret = -ret;
1302 goto out; 1302 goto out;
1303 } 1303 }
1304 /* ICMP[v6] protocol trackers may assign one conntrack. */ 1304 /* ICMP[v6] protocol trackers may assign one conntrack. */
1305 if (skb->nfct) 1305 if (skb->nfct)
1306 goto out; 1306 goto out;
1307 } 1307 }
1308 1308 repeat:
1309 ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, 1309 ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
1310 l3proto, l4proto, &set_reply, &ctinfo); 1310 l3proto, l4proto, &set_reply, &ctinfo);
1311 if (!ct) { 1311 if (!ct) {
1312 /* Not valid part of a connection */ 1312 /* Not valid part of a connection */
1313 NF_CT_STAT_INC_ATOMIC(net, invalid); 1313 NF_CT_STAT_INC_ATOMIC(net, invalid);
1314 ret = NF_ACCEPT; 1314 ret = NF_ACCEPT;
1315 goto out; 1315 goto out;
1316 } 1316 }
1317 1317
1318 if (IS_ERR(ct)) { 1318 if (IS_ERR(ct)) {
1319 /* Too stressed to deal. */ 1319 /* Too stressed to deal. */
1320 NF_CT_STAT_INC_ATOMIC(net, drop); 1320 NF_CT_STAT_INC_ATOMIC(net, drop);
1321 ret = NF_DROP; 1321 ret = NF_DROP;
1322 goto out; 1322 goto out;
1323 } 1323 }
1324 1324
1325 NF_CT_ASSERT(skb->nfct); 1325 NF_CT_ASSERT(skb->nfct);
1326 1326
1327 /* Decide what timeout policy we want to apply to this flow. */ 1327 /* Decide what timeout policy we want to apply to this flow. */
1328 timeouts = nf_ct_timeout_lookup(net, ct, l4proto); 1328 timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
1329 1329
1330 ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts); 1330 ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts);
1331 if (ret <= 0) { 1331 if (ret <= 0) {
1332 /* Invalid: inverse of the return code tells 1332 /* Invalid: inverse of the return code tells
1333 * the netfilter core what to do */ 1333 * the netfilter core what to do */
1334 pr_debug("nf_conntrack_in: Can't track with proto module\n"); 1334 pr_debug("nf_conntrack_in: Can't track with proto module\n");
1335 nf_conntrack_put(skb->nfct); 1335 nf_conntrack_put(skb->nfct);
1336 skb->nfct = NULL; 1336 skb->nfct = NULL;
1337 NF_CT_STAT_INC_ATOMIC(net, invalid); 1337 NF_CT_STAT_INC_ATOMIC(net, invalid);
1338 if (ret == -NF_DROP) 1338 if (ret == -NF_DROP)
1339 NF_CT_STAT_INC_ATOMIC(net, drop); 1339 NF_CT_STAT_INC_ATOMIC(net, drop);
1340 ret = -ret; 1340 ret = -ret;
1341 goto out; 1341 goto out;
1342 } 1342 }
1343 1343
1344 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) 1344 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
1345 nf_conntrack_event_cache(IPCT_REPLY, ct); 1345 nf_conntrack_event_cache(IPCT_REPLY, ct);
1346 out: 1346 out:
1347 if (tmpl) { 1347 if (tmpl) {
1348 /* Special case: we have to repeat this hook, assign the 1348 /* Special case: TCP tracker reports an attempt to reopen a
1349 * template again to this packet. We assume that this packet 1349 * closed/aborted connection. We have to go back and create a
1350 * has no conntrack assigned. This is used by nf_ct_tcp. */ 1350 * fresh conntrack.
1351 */
1351 if (ret == NF_REPEAT) 1352 if (ret == NF_REPEAT)
1352 skb->nfct = (struct nf_conntrack *)tmpl; 1353 goto repeat;
1353 else 1354 else
1354 nf_ct_put(tmpl); 1355 nf_ct_put(tmpl);
1355 } 1356 }
1356 1357
1357 return ret; 1358 return ret;
1358 } 1359 }
1359 EXPORT_SYMBOL_GPL(nf_conntrack_in); 1360 EXPORT_SYMBOL_GPL(nf_conntrack_in);
1360 1361
1361 bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, 1362 bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
1362 const struct nf_conntrack_tuple *orig) 1363 const struct nf_conntrack_tuple *orig)
1363 { 1364 {
1364 bool ret; 1365 bool ret;
1365 1366
1366 rcu_read_lock(); 1367 rcu_read_lock();
1367 ret = nf_ct_invert_tuple(inverse, orig, 1368 ret = nf_ct_invert_tuple(inverse, orig,
1368 __nf_ct_l3proto_find(orig->src.l3num), 1369 __nf_ct_l3proto_find(orig->src.l3num),
1369 __nf_ct_l4proto_find(orig->src.l3num, 1370 __nf_ct_l4proto_find(orig->src.l3num,
1370 orig->dst.protonum)); 1371 orig->dst.protonum));
1371 rcu_read_unlock(); 1372 rcu_read_unlock();
1372 return ret; 1373 return ret;
1373 } 1374 }
1374 EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr); 1375 EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr);
1375 1376
1376 /* Alter reply tuple (maybe alter helper). This is for NAT, and is 1377 /* Alter reply tuple (maybe alter helper). This is for NAT, and is
1377 implicitly racy: see __nf_conntrack_confirm */ 1378 implicitly racy: see __nf_conntrack_confirm */
1378 void nf_conntrack_alter_reply(struct nf_conn *ct, 1379 void nf_conntrack_alter_reply(struct nf_conn *ct,
1379 const struct nf_conntrack_tuple *newreply) 1380 const struct nf_conntrack_tuple *newreply)
1380 { 1381 {
1381 struct nf_conn_help *help = nfct_help(ct); 1382 struct nf_conn_help *help = nfct_help(ct);
1382 1383
1383 /* Should be unconfirmed, so not in hash table yet */ 1384 /* Should be unconfirmed, so not in hash table yet */
1384 NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); 1385 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
1385 1386
1386 pr_debug("Altering reply tuple of %p to ", ct); 1387 pr_debug("Altering reply tuple of %p to ", ct);
1387 nf_ct_dump_tuple(newreply); 1388 nf_ct_dump_tuple(newreply);
1388 1389
1389 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; 1390 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1390 if (ct->master || (help && !hlist_empty(&help->expectations))) 1391 if (ct->master || (help && !hlist_empty(&help->expectations)))
1391 return; 1392 return;
1392 1393
1393 rcu_read_lock(); 1394 rcu_read_lock();
1394 __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC); 1395 __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC);
1395 rcu_read_unlock(); 1396 rcu_read_unlock();
1396 } 1397 }
1397 EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); 1398 EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
1398 1399
1399 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ 1400 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1400 void __nf_ct_refresh_acct(struct nf_conn *ct, 1401 void __nf_ct_refresh_acct(struct nf_conn *ct,
1401 enum ip_conntrack_info ctinfo, 1402 enum ip_conntrack_info ctinfo,
1402 const struct sk_buff *skb, 1403 const struct sk_buff *skb,
1403 unsigned long extra_jiffies, 1404 unsigned long extra_jiffies,
1404 int do_acct) 1405 int do_acct)
1405 { 1406 {
1406 NF_CT_ASSERT(skb); 1407 NF_CT_ASSERT(skb);
1407 1408
1408 /* Only update if this is not a fixed timeout */ 1409 /* Only update if this is not a fixed timeout */
1409 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) 1410 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
1410 goto acct; 1411 goto acct;
1411 1412
1412 /* If not in hash table, timer will not be active yet */ 1413 /* If not in hash table, timer will not be active yet */
1413 if (nf_ct_is_confirmed(ct)) 1414 if (nf_ct_is_confirmed(ct))
1414 extra_jiffies += nfct_time_stamp; 1415 extra_jiffies += nfct_time_stamp;
1415 1416
1416 ct->timeout = extra_jiffies; 1417 ct->timeout = extra_jiffies;
1417 acct: 1418 acct:
1418 if (do_acct) 1419 if (do_acct)
1419 nf_ct_acct_update(ct, ctinfo, skb->len); 1420 nf_ct_acct_update(ct, ctinfo, skb->len);
1420 } 1421 }
1421 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); 1422 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
1422 1423
1423 bool nf_ct_kill_acct(struct nf_conn *ct, 1424 bool nf_ct_kill_acct(struct nf_conn *ct,
1424 enum ip_conntrack_info ctinfo, 1425 enum ip_conntrack_info ctinfo,
1425 const struct sk_buff *skb) 1426 const struct sk_buff *skb)
1426 { 1427 {
1427 nf_ct_acct_update(ct, ctinfo, skb->len); 1428 nf_ct_acct_update(ct, ctinfo, skb->len);
1428 1429
1429 return nf_ct_delete(ct, 0, 0); 1430 return nf_ct_delete(ct, 0, 0);
1430 } 1431 }
1431 EXPORT_SYMBOL_GPL(nf_ct_kill_acct); 1432 EXPORT_SYMBOL_GPL(nf_ct_kill_acct);
1432 1433
1433 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 1434 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1434 1435
1435 #include <linux/netfilter/nfnetlink.h> 1436 #include <linux/netfilter/nfnetlink.h>
1436 #include <linux/netfilter/nfnetlink_conntrack.h> 1437 #include <linux/netfilter/nfnetlink_conntrack.h>
1437 #include <linux/mutex.h> 1438 #include <linux/mutex.h>
1438 1439
1439 /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be 1440 /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1440 * in ip_conntrack_core, since we don't want the protocols to autoload 1441 * in ip_conntrack_core, since we don't want the protocols to autoload
1441 * or depend on ctnetlink */ 1442 * or depend on ctnetlink */
1442 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, 1443 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
1443 const struct nf_conntrack_tuple *tuple) 1444 const struct nf_conntrack_tuple *tuple)
1444 { 1445 {
1445 if (nla_put_be16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port) || 1446 if (nla_put_be16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port) ||
1446 nla_put_be16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port)) 1447 nla_put_be16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port))
1447 goto nla_put_failure; 1448 goto nla_put_failure;
1448 return 0; 1449 return 0;
1449 1450
1450 nla_put_failure: 1451 nla_put_failure:
1451 return -1; 1452 return -1;
1452 } 1453 }
1453 EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr); 1454 EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr);
1454 1455
1455 const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = { 1456 const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = {
1456 [CTA_PROTO_SRC_PORT] = { .type = NLA_U16 }, 1457 [CTA_PROTO_SRC_PORT] = { .type = NLA_U16 },
1457 [CTA_PROTO_DST_PORT] = { .type = NLA_U16 }, 1458 [CTA_PROTO_DST_PORT] = { .type = NLA_U16 },
1458 }; 1459 };
1459 EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy); 1460 EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy);
1460 1461
1461 int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], 1462 int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
1462 struct nf_conntrack_tuple *t) 1463 struct nf_conntrack_tuple *t)
1463 { 1464 {
1464 if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT]) 1465 if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT])
1465 return -EINVAL; 1466 return -EINVAL;
1466 1467
1467 t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]); 1468 t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]);
1468 t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]); 1469 t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]);
1469 1470
1470 return 0; 1471 return 0;
1471 } 1472 }
1472 EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple); 1473 EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
1473 1474
1474 int nf_ct_port_nlattr_tuple_size(void) 1475 int nf_ct_port_nlattr_tuple_size(void)
1475 { 1476 {
1476 return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); 1477 return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1477 } 1478 }
1478 EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size); 1479 EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
1479 #endif 1480 #endif
1480 1481
1481 /* Used by ipt_REJECT and ip6t_REJECT. */ 1482 /* Used by ipt_REJECT and ip6t_REJECT. */
1482 static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb) 1483 static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
1483 { 1484 {
1484 struct nf_conn *ct; 1485 struct nf_conn *ct;
1485 enum ip_conntrack_info ctinfo; 1486 enum ip_conntrack_info ctinfo;
1486 1487
1487 /* This ICMP is in reverse direction to the packet which caused it */ 1488 /* This ICMP is in reverse direction to the packet which caused it */
1488 ct = nf_ct_get(skb, &ctinfo); 1489 ct = nf_ct_get(skb, &ctinfo);
1489 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) 1490 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1490 ctinfo = IP_CT_RELATED_REPLY; 1491 ctinfo = IP_CT_RELATED_REPLY;
1491 else 1492 else
1492 ctinfo = IP_CT_RELATED; 1493 ctinfo = IP_CT_RELATED;
1493 1494
1494 /* Attach to new skbuff, and increment count */ 1495 /* Attach to new skbuff, and increment count */
1495 nskb->nfct = &ct->ct_general; 1496 nskb->nfct = &ct->ct_general;
1496 nskb->nfctinfo = ctinfo; 1497 nskb->nfctinfo = ctinfo;
1497 nf_conntrack_get(nskb->nfct); 1498 nf_conntrack_get(nskb->nfct);
1498 } 1499 }
1499 1500
1500 /* Bring out ya dead! */ 1501 /* Bring out ya dead! */
1501 static struct nf_conn * 1502 static struct nf_conn *
1502 get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), 1503 get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
1503 void *data, unsigned int *bucket) 1504 void *data, unsigned int *bucket)
1504 { 1505 {
1505 struct nf_conntrack_tuple_hash *h; 1506 struct nf_conntrack_tuple_hash *h;
1506 struct nf_conn *ct; 1507 struct nf_conn *ct;
1507 struct hlist_nulls_node *n; 1508 struct hlist_nulls_node *n;
1508 int cpu; 1509 int cpu;
1509 spinlock_t *lockp; 1510 spinlock_t *lockp;
1510 1511
1511 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { 1512 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
1512 lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS]; 1513 lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
1513 local_bh_disable(); 1514 local_bh_disable();
1514 nf_conntrack_lock(lockp); 1515 nf_conntrack_lock(lockp);
1515 if (*bucket < nf_conntrack_htable_size) { 1516 if (*bucket < nf_conntrack_htable_size) {
1516 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) { 1517 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
1517 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) 1518 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
1518 continue; 1519 continue;
1519 ct = nf_ct_tuplehash_to_ctrack(h); 1520 ct = nf_ct_tuplehash_to_ctrack(h);
1520 if (net_eq(nf_ct_net(ct), net) && 1521 if (net_eq(nf_ct_net(ct), net) &&
1521 iter(ct, data)) 1522 iter(ct, data))
1522 goto found; 1523 goto found;
1523 } 1524 }
1524 } 1525 }
1525 spin_unlock(lockp); 1526 spin_unlock(lockp);
1526 local_bh_enable(); 1527 local_bh_enable();
1527 cond_resched(); 1528 cond_resched();
1528 } 1529 }
1529 1530
1530 for_each_possible_cpu(cpu) { 1531 for_each_possible_cpu(cpu) {
1531 struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); 1532 struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
1532 1533
1533 spin_lock_bh(&pcpu->lock); 1534 spin_lock_bh(&pcpu->lock);
1534 hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) { 1535 hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) {
1535 ct = nf_ct_tuplehash_to_ctrack(h); 1536 ct = nf_ct_tuplehash_to_ctrack(h);
1536 if (iter(ct, data)) 1537 if (iter(ct, data))
1537 set_bit(IPS_DYING_BIT, &ct->status); 1538 set_bit(IPS_DYING_BIT, &ct->status);
1538 } 1539 }
1539 spin_unlock_bh(&pcpu->lock); 1540 spin_unlock_bh(&pcpu->lock);
1540 cond_resched(); 1541 cond_resched();
1541 } 1542 }
1542 return NULL; 1543 return NULL;
1543 found: 1544 found:
1544 atomic_inc(&ct->ct_general.use); 1545 atomic_inc(&ct->ct_general.use);
1545 spin_unlock(lockp); 1546 spin_unlock(lockp);
1546 local_bh_enable(); 1547 local_bh_enable();
1547 return ct; 1548 return ct;
1548 } 1549 }
1549 1550
1550 void nf_ct_iterate_cleanup(struct net *net, 1551 void nf_ct_iterate_cleanup(struct net *net,
1551 int (*iter)(struct nf_conn *i, void *data), 1552 int (*iter)(struct nf_conn *i, void *data),
1552 void *data, u32 portid, int report) 1553 void *data, u32 portid, int report)
1553 { 1554 {
1554 struct nf_conn *ct; 1555 struct nf_conn *ct;
1555 unsigned int bucket = 0; 1556 unsigned int bucket = 0;
1556 1557
1557 might_sleep(); 1558 might_sleep();
1558 1559
1559 if (atomic_read(&net->ct.count) == 0) 1560 if (atomic_read(&net->ct.count) == 0)
1560 return; 1561 return;
1561 1562
1562 while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { 1563 while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
1563 /* Time to push up daises... */ 1564 /* Time to push up daises... */
1564 1565
1565 nf_ct_delete(ct, portid, report); 1566 nf_ct_delete(ct, portid, report);
1566 nf_ct_put(ct); 1567 nf_ct_put(ct);
1567 cond_resched(); 1568 cond_resched();
1568 } 1569 }
1569 } 1570 }
1570 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); 1571 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
1571 1572
1572 static int kill_all(struct nf_conn *i, void *data) 1573 static int kill_all(struct nf_conn *i, void *data)
1573 { 1574 {
1574 return 1; 1575 return 1;
1575 } 1576 }
1576 1577
1577 void nf_ct_free_hashtable(void *hash, unsigned int size) 1578 void nf_ct_free_hashtable(void *hash, unsigned int size)
1578 { 1579 {
1579 if (is_vmalloc_addr(hash)) 1580 if (is_vmalloc_addr(hash))
1580 vfree(hash); 1581 vfree(hash);
1581 else 1582 else
1582 free_pages((unsigned long)hash, 1583 free_pages((unsigned long)hash,
1583 get_order(sizeof(struct hlist_head) * size)); 1584 get_order(sizeof(struct hlist_head) * size));
1584 } 1585 }
1585 EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); 1586 EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
1586 1587
1587 static int untrack_refs(void) 1588 static int untrack_refs(void)
1588 { 1589 {
1589 int cnt = 0, cpu; 1590 int cnt = 0, cpu;
1590 1591
1591 for_each_possible_cpu(cpu) { 1592 for_each_possible_cpu(cpu) {
1592 struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu); 1593 struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
1593 1594
1594 cnt += atomic_read(&ct->ct_general.use) - 1; 1595 cnt += atomic_read(&ct->ct_general.use) - 1;
1595 } 1596 }
1596 return cnt; 1597 return cnt;
1597 } 1598 }
1598 1599
1599 void nf_conntrack_cleanup_start(void) 1600 void nf_conntrack_cleanup_start(void)
1600 { 1601 {
1601 conntrack_gc_work.exiting = true; 1602 conntrack_gc_work.exiting = true;
1602 RCU_INIT_POINTER(ip_ct_attach, NULL); 1603 RCU_INIT_POINTER(ip_ct_attach, NULL);
1603 } 1604 }
1604 1605
1605 void nf_conntrack_cleanup_end(void) 1606 void nf_conntrack_cleanup_end(void)
1606 { 1607 {
1607 RCU_INIT_POINTER(nf_ct_destroy, NULL); 1608 RCU_INIT_POINTER(nf_ct_destroy, NULL);
1608 while (untrack_refs() > 0) 1609 while (untrack_refs() > 0)
1609 schedule(); 1610 schedule();
1610 1611
1611 cancel_delayed_work_sync(&conntrack_gc_work.dwork); 1612 cancel_delayed_work_sync(&conntrack_gc_work.dwork);
1612 nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); 1613 nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
1613 1614
1614 nf_conntrack_proto_fini(); 1615 nf_conntrack_proto_fini();
1615 nf_conntrack_seqadj_fini(); 1616 nf_conntrack_seqadj_fini();
1616 nf_conntrack_labels_fini(); 1617 nf_conntrack_labels_fini();
1617 nf_conntrack_helper_fini(); 1618 nf_conntrack_helper_fini();
1618 nf_conntrack_timeout_fini(); 1619 nf_conntrack_timeout_fini();
1619 nf_conntrack_ecache_fini(); 1620 nf_conntrack_ecache_fini();
1620 nf_conntrack_tstamp_fini(); 1621 nf_conntrack_tstamp_fini();
1621 nf_conntrack_acct_fini(); 1622 nf_conntrack_acct_fini();
1622 nf_conntrack_expect_fini(); 1623 nf_conntrack_expect_fini();
1623 1624
1624 kmem_cache_destroy(nf_conntrack_cachep); 1625 kmem_cache_destroy(nf_conntrack_cachep);
1625 } 1626 }
1626 1627
1627 /* 1628 /*
1628 * Mishearing the voices in his head, our hero wonders how he's 1629 * Mishearing the voices in his head, our hero wonders how he's
1629 * supposed to kill the mall. 1630 * supposed to kill the mall.
1630 */ 1631 */
1631 void nf_conntrack_cleanup_net(struct net *net) 1632 void nf_conntrack_cleanup_net(struct net *net)
1632 { 1633 {
1633 LIST_HEAD(single); 1634 LIST_HEAD(single);
1634 1635
1635 list_add(&net->exit_list, &single); 1636 list_add(&net->exit_list, &single);
1636 nf_conntrack_cleanup_net_list(&single); 1637 nf_conntrack_cleanup_net_list(&single);
1637 } 1638 }
1638 1639
1639 void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) 1640 void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
1640 { 1641 {
1641 int busy; 1642 int busy;
1642 struct net *net; 1643 struct net *net;
1643 1644
1644 /* 1645 /*
1645 * This makes sure all current packets have passed through 1646 * This makes sure all current packets have passed through
1646 * netfilter framework. Roll on, two-stage module 1647 * netfilter framework. Roll on, two-stage module
1647 * delete... 1648 * delete...
1648 */ 1649 */
1649 synchronize_net(); 1650 synchronize_net();
1650 i_see_dead_people: 1651 i_see_dead_people:
1651 busy = 0; 1652 busy = 0;
1652 list_for_each_entry(net, net_exit_list, exit_list) { 1653 list_for_each_entry(net, net_exit_list, exit_list) {
1653 nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0); 1654 nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0);
1654 if (atomic_read(&net->ct.count) != 0) 1655 if (atomic_read(&net->ct.count) != 0)
1655 busy = 1; 1656 busy = 1;
1656 } 1657 }
1657 if (busy) { 1658 if (busy) {
1658 schedule(); 1659 schedule();
1659 goto i_see_dead_people; 1660 goto i_see_dead_people;
1660 } 1661 }
1661 1662
1662 list_for_each_entry(net, net_exit_list, exit_list) { 1663 list_for_each_entry(net, net_exit_list, exit_list) {
1663 nf_conntrack_proto_pernet_fini(net); 1664 nf_conntrack_proto_pernet_fini(net);
1664 nf_conntrack_helper_pernet_fini(net); 1665 nf_conntrack_helper_pernet_fini(net);
1665 nf_conntrack_ecache_pernet_fini(net); 1666 nf_conntrack_ecache_pernet_fini(net);
1666 nf_conntrack_tstamp_pernet_fini(net); 1667 nf_conntrack_tstamp_pernet_fini(net);
1667 nf_conntrack_acct_pernet_fini(net); 1668 nf_conntrack_acct_pernet_fini(net);
1668 nf_conntrack_expect_pernet_fini(net); 1669 nf_conntrack_expect_pernet_fini(net);
1669 free_percpu(net->ct.stat); 1670 free_percpu(net->ct.stat);
1670 free_percpu(net->ct.pcpu_lists); 1671 free_percpu(net->ct.pcpu_lists);
1671 } 1672 }
1672 } 1673 }
1673 1674
1674 void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) 1675 void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
1675 { 1676 {
1676 struct hlist_nulls_head *hash; 1677 struct hlist_nulls_head *hash;
1677 unsigned int nr_slots, i; 1678 unsigned int nr_slots, i;
1678 size_t sz; 1679 size_t sz;
1679 1680
1680 if (*sizep > (UINT_MAX / sizeof(struct hlist_nulls_head))) 1681 if (*sizep > (UINT_MAX / sizeof(struct hlist_nulls_head)))
1681 return NULL; 1682 return NULL;
1682 1683
1683 BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); 1684 BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
1684 nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); 1685 nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
1685 1686
1686 if (nr_slots > (UINT_MAX / sizeof(struct hlist_nulls_head))) 1687 if (nr_slots > (UINT_MAX / sizeof(struct hlist_nulls_head)))
1687 return NULL; 1688 return NULL;
1688 1689
1689 sz = nr_slots * sizeof(struct hlist_nulls_head); 1690 sz = nr_slots * sizeof(struct hlist_nulls_head);
1690 hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1691 hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1691 get_order(sz)); 1692 get_order(sz));
1692 if (!hash) 1693 if (!hash)
1693 hash = vzalloc(sz); 1694 hash = vzalloc(sz);
1694 1695
1695 if (hash && nulls) 1696 if (hash && nulls)
1696 for (i = 0; i < nr_slots; i++) 1697 for (i = 0; i < nr_slots; i++)
1697 INIT_HLIST_NULLS_HEAD(&hash[i], i); 1698 INIT_HLIST_NULLS_HEAD(&hash[i], i);
1698 1699
1699 return hash; 1700 return hash;
1700 } 1701 }
1701 EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable); 1702 EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
1702 1703
1703 int nf_conntrack_hash_resize(unsigned int hashsize) 1704 int nf_conntrack_hash_resize(unsigned int hashsize)
1704 { 1705 {
1705 int i, bucket; 1706 int i, bucket;
1706 unsigned int old_size; 1707 unsigned int old_size;
1707 struct hlist_nulls_head *hash, *old_hash; 1708 struct hlist_nulls_head *hash, *old_hash;
1708 struct nf_conntrack_tuple_hash *h; 1709 struct nf_conntrack_tuple_hash *h;
1709 struct nf_conn *ct; 1710 struct nf_conn *ct;
1710 1711
1711 if (!hashsize) 1712 if (!hashsize)
1712 return -EINVAL; 1713 return -EINVAL;
1713 1714
1714 hash = nf_ct_alloc_hashtable(&hashsize, 1); 1715 hash = nf_ct_alloc_hashtable(&hashsize, 1);
1715 if (!hash) 1716 if (!hash)
1716 return -ENOMEM; 1717 return -ENOMEM;
1717 1718
1718 old_size = nf_conntrack_htable_size; 1719 old_size = nf_conntrack_htable_size;
1719 if (old_size == hashsize) { 1720 if (old_size == hashsize) {
1720 nf_ct_free_hashtable(hash, hashsize); 1721 nf_ct_free_hashtable(hash, hashsize);
1721 return 0; 1722 return 0;
1722 } 1723 }
1723 1724
1724 local_bh_disable(); 1725 local_bh_disable();
1725 nf_conntrack_all_lock(); 1726 nf_conntrack_all_lock();
1726 write_seqcount_begin(&nf_conntrack_generation); 1727 write_seqcount_begin(&nf_conntrack_generation);
1727 1728
1728 /* Lookups in the old hash might happen in parallel, which means we 1729 /* Lookups in the old hash might happen in parallel, which means we
1729 * might get false negatives during connection lookup. New connections 1730 * might get false negatives during connection lookup. New connections
1730 * created because of a false negative won't make it into the hash 1731 * created because of a false negative won't make it into the hash
1731 * though since that required taking the locks. 1732 * though since that required taking the locks.
1732 */ 1733 */
1733 1734
1734 for (i = 0; i < nf_conntrack_htable_size; i++) { 1735 for (i = 0; i < nf_conntrack_htable_size; i++) {
1735 while (!hlist_nulls_empty(&nf_conntrack_hash[i])) { 1736 while (!hlist_nulls_empty(&nf_conntrack_hash[i])) {
1736 h = hlist_nulls_entry(nf_conntrack_hash[i].first, 1737 h = hlist_nulls_entry(nf_conntrack_hash[i].first,
1737 struct nf_conntrack_tuple_hash, hnnode); 1738 struct nf_conntrack_tuple_hash, hnnode);
1738 ct = nf_ct_tuplehash_to_ctrack(h); 1739 ct = nf_ct_tuplehash_to_ctrack(h);
1739 hlist_nulls_del_rcu(&h->hnnode); 1740 hlist_nulls_del_rcu(&h->hnnode);
1740 bucket = __hash_conntrack(nf_ct_net(ct), 1741 bucket = __hash_conntrack(nf_ct_net(ct),
1741 &h->tuple, hashsize); 1742 &h->tuple, hashsize);
1742 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); 1743 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
1743 } 1744 }
1744 } 1745 }
1745 old_size = nf_conntrack_htable_size; 1746 old_size = nf_conntrack_htable_size;
1746 old_hash = nf_conntrack_hash; 1747 old_hash = nf_conntrack_hash;
1747 1748
1748 nf_conntrack_hash = hash; 1749 nf_conntrack_hash = hash;
1749 nf_conntrack_htable_size = hashsize; 1750 nf_conntrack_htable_size = hashsize;
1750 1751
1751 write_seqcount_end(&nf_conntrack_generation); 1752 write_seqcount_end(&nf_conntrack_generation);
1752 nf_conntrack_all_unlock(); 1753 nf_conntrack_all_unlock();
1753 local_bh_enable(); 1754 local_bh_enable();
1754 1755
1755 synchronize_net(); 1756 synchronize_net();
1756 nf_ct_free_hashtable(old_hash, old_size); 1757 nf_ct_free_hashtable(old_hash, old_size);
1757 return 0; 1758 return 0;
1758 } 1759 }
1759 1760
1760 int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) 1761 int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1761 { 1762 {
1762 unsigned int hashsize; 1763 unsigned int hashsize;
1763 int rc; 1764 int rc;
1764 1765
1765 if (current->nsproxy->net_ns != &init_net) 1766 if (current->nsproxy->net_ns != &init_net)
1766 return -EOPNOTSUPP; 1767 return -EOPNOTSUPP;
1767 1768
1768 /* On boot, we can set this without any fancy locking. */ 1769 /* On boot, we can set this without any fancy locking. */
1769 if (!nf_conntrack_htable_size) 1770 if (!nf_conntrack_htable_size)
1770 return param_set_uint(val, kp); 1771 return param_set_uint(val, kp);
1771 1772
1772 rc = kstrtouint(val, 0, &hashsize); 1773 rc = kstrtouint(val, 0, &hashsize);
1773 if (rc) 1774 if (rc)
1774 return rc; 1775 return rc;
1775 1776
1776 return nf_conntrack_hash_resize(hashsize); 1777 return nf_conntrack_hash_resize(hashsize);
1777 } 1778 }
1778 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); 1779 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
1779 1780
1780 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, 1781 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
1781 &nf_conntrack_htable_size, 0600); 1782 &nf_conntrack_htable_size, 0600);
1782 1783
1783 void nf_ct_untracked_status_or(unsigned long bits) 1784 void nf_ct_untracked_status_or(unsigned long bits)
1784 { 1785 {
1785 int cpu; 1786 int cpu;
1786 1787
1787 for_each_possible_cpu(cpu) 1788 for_each_possible_cpu(cpu)
1788 per_cpu(nf_conntrack_untracked, cpu).status |= bits; 1789 per_cpu(nf_conntrack_untracked, cpu).status |= bits;
1789 } 1790 }
1790 EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or); 1791 EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
1791 1792
1792 int nf_conntrack_init_start(void) 1793 int nf_conntrack_init_start(void)
1793 { 1794 {
1794 int max_factor = 8; 1795 int max_factor = 8;
1795 int ret = -ENOMEM; 1796 int ret = -ENOMEM;
1796 int i, cpu; 1797 int i, cpu;
1797 1798
1798 seqcount_init(&nf_conntrack_generation); 1799 seqcount_init(&nf_conntrack_generation);
1799 1800
1800 for (i = 0; i < CONNTRACK_LOCKS; i++) 1801 for (i = 0; i < CONNTRACK_LOCKS; i++)
1801 spin_lock_init(&nf_conntrack_locks[i]); 1802 spin_lock_init(&nf_conntrack_locks[i]);
1802 1803
1803 if (!nf_conntrack_htable_size) { 1804 if (!nf_conntrack_htable_size) {
1804 /* Idea from tcp.c: use 1/16384 of memory. 1805 /* Idea from tcp.c: use 1/16384 of memory.
1805 * On i386: 32MB machine has 512 buckets. 1806 * On i386: 32MB machine has 512 buckets.
1806 * >= 1GB machines have 16384 buckets. 1807 * >= 1GB machines have 16384 buckets.
1807 * >= 4GB machines have 65536 buckets. 1808 * >= 4GB machines have 65536 buckets.
1808 */ 1809 */
1809 nf_conntrack_htable_size 1810 nf_conntrack_htable_size
1810 = (((totalram_pages << PAGE_SHIFT) / 16384) 1811 = (((totalram_pages << PAGE_SHIFT) / 16384)
1811 / sizeof(struct hlist_head)); 1812 / sizeof(struct hlist_head));
1812 if (totalram_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE))) 1813 if (totalram_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
1813 nf_conntrack_htable_size = 65536; 1814 nf_conntrack_htable_size = 65536;
1814 else if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE)) 1815 else if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
1815 nf_conntrack_htable_size = 16384; 1816 nf_conntrack_htable_size = 16384;
1816 if (nf_conntrack_htable_size < 32) 1817 if (nf_conntrack_htable_size < 32)
1817 nf_conntrack_htable_size = 32; 1818 nf_conntrack_htable_size = 32;
1818 1819
1819 /* Use a max. factor of four by default to get the same max as 1820 /* Use a max. factor of four by default to get the same max as
1820 * with the old struct list_heads. When a table size is given 1821 * with the old struct list_heads. When a table size is given
1821 * we use the old value of 8 to avoid reducing the max. 1822 * we use the old value of 8 to avoid reducing the max.
1822 * entries. */ 1823 * entries. */
1823 max_factor = 4; 1824 max_factor = 4;
1824 } 1825 }
1825 1826
1826 nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1); 1827 nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1);
1827 if (!nf_conntrack_hash) 1828 if (!nf_conntrack_hash)
1828 return -ENOMEM; 1829 return -ENOMEM;
1829 1830
1830 nf_conntrack_max = max_factor * nf_conntrack_htable_size; 1831 nf_conntrack_max = max_factor * nf_conntrack_htable_size;
1831 1832
1832 nf_conntrack_cachep = kmem_cache_create("nf_conntrack", 1833 nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
1833 sizeof(struct nf_conn), 0, 1834 sizeof(struct nf_conn), 0,
1834 SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL); 1835 SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
1835 if (!nf_conntrack_cachep) 1836 if (!nf_conntrack_cachep)
1836 goto err_cachep; 1837 goto err_cachep;
1837 1838
1838 printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n", 1839 printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
1839 NF_CONNTRACK_VERSION, nf_conntrack_htable_size, 1840 NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
1840 nf_conntrack_max); 1841 nf_conntrack_max);
1841 1842
1842 ret = nf_conntrack_expect_init(); 1843 ret = nf_conntrack_expect_init();
1843 if (ret < 0) 1844 if (ret < 0)
1844 goto err_expect; 1845 goto err_expect;
1845 1846
1846 ret = nf_conntrack_acct_init(); 1847 ret = nf_conntrack_acct_init();
1847 if (ret < 0) 1848 if (ret < 0)
1848 goto err_acct; 1849 goto err_acct;
1849 1850
1850 ret = nf_conntrack_tstamp_init(); 1851 ret = nf_conntrack_tstamp_init();
1851 if (ret < 0) 1852 if (ret < 0)
1852 goto err_tstamp; 1853 goto err_tstamp;
1853 1854
1854 ret = nf_conntrack_ecache_init(); 1855 ret = nf_conntrack_ecache_init();
1855 if (ret < 0) 1856 if (ret < 0)
1856 goto err_ecache; 1857 goto err_ecache;
1857 1858
1858 ret = nf_conntrack_timeout_init(); 1859 ret = nf_conntrack_timeout_init();
1859 if (ret < 0) 1860 if (ret < 0)
1860 goto err_timeout; 1861 goto err_timeout;
1861 1862
1862 ret = nf_conntrack_helper_init(); 1863 ret = nf_conntrack_helper_init();
1863 if (ret < 0) 1864 if (ret < 0)
1864 goto err_helper; 1865 goto err_helper;
1865 1866
1866 ret = nf_conntrack_labels_init(); 1867 ret = nf_conntrack_labels_init();
1867 if (ret < 0) 1868 if (ret < 0)
1868 goto err_labels; 1869 goto err_labels;
1869 1870
1870 ret = nf_conntrack_seqadj_init(); 1871 ret = nf_conntrack_seqadj_init();
1871 if (ret < 0) 1872 if (ret < 0)
1872 goto err_seqadj; 1873 goto err_seqadj;
1873 1874
1874 ret = nf_conntrack_proto_init(); 1875 ret = nf_conntrack_proto_init();
1875 if (ret < 0) 1876 if (ret < 0)
1876 goto err_proto; 1877 goto err_proto;
1877 1878
1878 /* Set up fake conntrack: to never be deleted, not in any hashes */ 1879 /* Set up fake conntrack: to never be deleted, not in any hashes */
1879 for_each_possible_cpu(cpu) { 1880 for_each_possible_cpu(cpu) {
1880 struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu); 1881 struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
1881 write_pnet(&ct->ct_net, &init_net); 1882 write_pnet(&ct->ct_net, &init_net);
1882 atomic_set(&ct->ct_general.use, 1); 1883 atomic_set(&ct->ct_general.use, 1);
1883 } 1884 }
1884 /* - and look it like as a confirmed connection */ 1885 /* - and look it like as a confirmed connection */
1885 nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); 1886 nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
1886 1887
1887 conntrack_gc_work_init(&conntrack_gc_work); 1888 conntrack_gc_work_init(&conntrack_gc_work);
1888 schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL); 1889 schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL);
1889 1890
1890 return 0; 1891 return 0;
1891 1892
1892 err_proto: 1893 err_proto:
1893 nf_conntrack_seqadj_fini(); 1894 nf_conntrack_seqadj_fini();
1894 err_seqadj: 1895 err_seqadj:
1895 nf_conntrack_labels_fini(); 1896 nf_conntrack_labels_fini();
1896 err_labels: 1897 err_labels:
1897 nf_conntrack_helper_fini(); 1898 nf_conntrack_helper_fini();
1898 err_helper: 1899 err_helper:
1899 nf_conntrack_timeout_fini(); 1900 nf_conntrack_timeout_fini();
1900 err_timeout: 1901 err_timeout:
1901 nf_conntrack_ecache_fini(); 1902 nf_conntrack_ecache_fini();
1902 err_ecache: 1903 err_ecache:
1903 nf_conntrack_tstamp_fini(); 1904 nf_conntrack_tstamp_fini();
1904 err_tstamp: 1905 err_tstamp:
1905 nf_conntrack_acct_fini(); 1906 nf_conntrack_acct_fini();
1906 err_acct: 1907 err_acct:
1907 nf_conntrack_expect_fini(); 1908 nf_conntrack_expect_fini();
1908 err_expect: 1909 err_expect:
1909 kmem_cache_destroy(nf_conntrack_cachep); 1910 kmem_cache_destroy(nf_conntrack_cachep);
1910 err_cachep: 1911 err_cachep:
1911 nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); 1912 nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
1912 return ret; 1913 return ret;
1913 } 1914 }
1914 1915
1915 void nf_conntrack_init_end(void) 1916 void nf_conntrack_init_end(void)
1916 { 1917 {
1917 /* For use by REJECT target */ 1918 /* For use by REJECT target */
1918 RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach); 1919 RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
1919 RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack); 1920 RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack);
1920 } 1921 }
1921 1922
1922 /* 1923 /*
1923 * We need to use special "null" values, not used in hash table 1924 * We need to use special "null" values, not used in hash table
1924 */ 1925 */
1925 #define UNCONFIRMED_NULLS_VAL ((1<<30)+0) 1926 #define UNCONFIRMED_NULLS_VAL ((1<<30)+0)
1926 #define DYING_NULLS_VAL ((1<<30)+1) 1927 #define DYING_NULLS_VAL ((1<<30)+1)
1927 #define TEMPLATE_NULLS_VAL ((1<<30)+2) 1928 #define TEMPLATE_NULLS_VAL ((1<<30)+2)
1928 1929
1929 int nf_conntrack_init_net(struct net *net) 1930 int nf_conntrack_init_net(struct net *net)
1930 { 1931 {
1931 int ret = -ENOMEM; 1932 int ret = -ENOMEM;
1932 int cpu; 1933 int cpu;
1933 1934
1934 atomic_set(&net->ct.count, 0); 1935 atomic_set(&net->ct.count, 0);
1935 1936
1936 net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu); 1937 net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
1937 if (!net->ct.pcpu_lists) 1938 if (!net->ct.pcpu_lists)
1938 goto err_stat; 1939 goto err_stat;
1939 1940
1940 for_each_possible_cpu(cpu) { 1941 for_each_possible_cpu(cpu) {
1941 struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); 1942 struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
1942 1943
1943 spin_lock_init(&pcpu->lock); 1944 spin_lock_init(&pcpu->lock);
1944 INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL); 1945 INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL);
1945 INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL); 1946 INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL);
1946 } 1947 }
1947 1948
1948 net->ct.stat = alloc_percpu(struct ip_conntrack_stat); 1949 net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
1949 if (!net->ct.stat) 1950 if (!net->ct.stat)
1950 goto err_pcpu_lists; 1951 goto err_pcpu_lists;
1951 1952
1952 ret = nf_conntrack_expect_pernet_init(net); 1953 ret = nf_conntrack_expect_pernet_init(net);
1953 if (ret < 0) 1954 if (ret < 0)
1954 goto err_expect; 1955 goto err_expect;
1955 ret = nf_conntrack_acct_pernet_init(net); 1956 ret = nf_conntrack_acct_pernet_init(net);
1956 if (ret < 0) 1957 if (ret < 0)
1957 goto err_acct; 1958 goto err_acct;
1958 ret = nf_conntrack_tstamp_pernet_init(net); 1959 ret = nf_conntrack_tstamp_pernet_init(net);
1959 if (ret < 0) 1960 if (ret < 0)
1960 goto err_tstamp; 1961 goto err_tstamp;
1961 ret = nf_conntrack_ecache_pernet_init(net); 1962 ret = nf_conntrack_ecache_pernet_init(net);
1962 if (ret < 0) 1963 if (ret < 0)
1963 goto err_ecache; 1964 goto err_ecache;
1964 ret = nf_conntrack_helper_pernet_init(net); 1965 ret = nf_conntrack_helper_pernet_init(net);
1965 if (ret < 0) 1966 if (ret < 0)
1966 goto err_helper; 1967 goto err_helper;
1967 ret = nf_conntrack_proto_pernet_init(net); 1968 ret = nf_conntrack_proto_pernet_init(net);
1968 if (ret < 0) 1969 if (ret < 0)
1969 goto err_proto; 1970 goto err_proto;
1970 return 0; 1971 return 0;
1971 1972
1972 err_proto: 1973 err_proto:
1973 nf_conntrack_helper_pernet_fini(net); 1974 nf_conntrack_helper_pernet_fini(net);
1974 err_helper: 1975 err_helper:
1975 nf_conntrack_ecache_pernet_fini(net); 1976 nf_conntrack_ecache_pernet_fini(net);
1976 err_ecache: 1977 err_ecache:
1977 nf_conntrack_tstamp_pernet_fini(net); 1978 nf_conntrack_tstamp_pernet_fini(net);
1978 err_tstamp: 1979 err_tstamp:
1979 nf_conntrack_acct_pernet_fini(net); 1980 nf_conntrack_acct_pernet_fini(net);
1980 err_acct: 1981 err_acct:
1981 nf_conntrack_expect_pernet_fini(net); 1982 nf_conntrack_expect_pernet_fini(net);
1982 err_expect: 1983 err_expect:
1983 free_percpu(net->ct.stat); 1984 free_percpu(net->ct.stat);
1984 err_pcpu_lists: 1985 err_pcpu_lists:
1985 free_percpu(net->ct.pcpu_lists); 1986 free_percpu(net->ct.pcpu_lists);
1986 err_stat: 1987 err_stat:
1987 return ret; 1988 return ret;
1988 } 1989 }
1989 1990
net/openvswitch/conntrack.c
1 /* 1 /*
2 * Copyright (c) 2015 Nicira, Inc. 2 * Copyright (c) 2015 Nicira, Inc.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public 5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation. 6 * License as published by the Free Software Foundation.
7 * 7 *
8 * This program is distributed in the hope that it will be useful, but 8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of 9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details. 11 * General Public License for more details.
12 */ 12 */
13 13
14 #include <linux/module.h> 14 #include <linux/module.h>
15 #include <linux/openvswitch.h> 15 #include <linux/openvswitch.h>
16 #include <linux/tcp.h> 16 #include <linux/tcp.h>
17 #include <linux/udp.h> 17 #include <linux/udp.h>
18 #include <linux/sctp.h> 18 #include <linux/sctp.h>
19 #include <net/ip.h> 19 #include <net/ip.h>
20 #include <net/netfilter/nf_conntrack_core.h> 20 #include <net/netfilter/nf_conntrack_core.h>
21 #include <net/netfilter/nf_conntrack_helper.h> 21 #include <net/netfilter/nf_conntrack_helper.h>
22 #include <net/netfilter/nf_conntrack_labels.h> 22 #include <net/netfilter/nf_conntrack_labels.h>
23 #include <net/netfilter/nf_conntrack_seqadj.h> 23 #include <net/netfilter/nf_conntrack_seqadj.h>
24 #include <net/netfilter/nf_conntrack_zones.h> 24 #include <net/netfilter/nf_conntrack_zones.h>
25 #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 25 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
26 26
27 #ifdef CONFIG_NF_NAT_NEEDED 27 #ifdef CONFIG_NF_NAT_NEEDED
28 #include <linux/netfilter/nf_nat.h> 28 #include <linux/netfilter/nf_nat.h>
29 #include <net/netfilter/nf_nat_core.h> 29 #include <net/netfilter/nf_nat_core.h>
30 #include <net/netfilter/nf_nat_l3proto.h> 30 #include <net/netfilter/nf_nat_l3proto.h>
31 #endif 31 #endif
32 32
33 #include "datapath.h" 33 #include "datapath.h"
34 #include "conntrack.h" 34 #include "conntrack.h"
35 #include "flow.h" 35 #include "flow.h"
36 #include "flow_netlink.h" 36 #include "flow_netlink.h"
37 37
38 struct ovs_ct_len_tbl { 38 struct ovs_ct_len_tbl {
39 int maxlen; 39 int maxlen;
40 int minlen; 40 int minlen;
41 }; 41 };
42 42
43 /* Metadata mark for masked write to conntrack mark */ 43 /* Metadata mark for masked write to conntrack mark */
44 struct md_mark { 44 struct md_mark {
45 u32 value; 45 u32 value;
46 u32 mask; 46 u32 mask;
47 }; 47 };
48 48
49 /* Metadata label for masked write to conntrack label. */ 49 /* Metadata label for masked write to conntrack label. */
50 struct md_labels { 50 struct md_labels {
51 struct ovs_key_ct_labels value; 51 struct ovs_key_ct_labels value;
52 struct ovs_key_ct_labels mask; 52 struct ovs_key_ct_labels mask;
53 }; 53 };
54 54
55 enum ovs_ct_nat { 55 enum ovs_ct_nat {
56 OVS_CT_NAT = 1 << 0, /* NAT for committed connections only. */ 56 OVS_CT_NAT = 1 << 0, /* NAT for committed connections only. */
57 OVS_CT_SRC_NAT = 1 << 1, /* Source NAT for NEW connections. */ 57 OVS_CT_SRC_NAT = 1 << 1, /* Source NAT for NEW connections. */
58 OVS_CT_DST_NAT = 1 << 2, /* Destination NAT for NEW connections. */ 58 OVS_CT_DST_NAT = 1 << 2, /* Destination NAT for NEW connections. */
59 }; 59 };
60 60
61 /* Conntrack action context for execution. */ 61 /* Conntrack action context for execution. */
62 struct ovs_conntrack_info { 62 struct ovs_conntrack_info {
63 struct nf_conntrack_helper *helper; 63 struct nf_conntrack_helper *helper;
64 struct nf_conntrack_zone zone; 64 struct nf_conntrack_zone zone;
65 struct nf_conn *ct; 65 struct nf_conn *ct;
66 u8 commit : 1; 66 u8 commit : 1;
67 u8 nat : 3; /* enum ovs_ct_nat */ 67 u8 nat : 3; /* enum ovs_ct_nat */
68 u16 family; 68 u16 family;
69 struct md_mark mark; 69 struct md_mark mark;
70 struct md_labels labels; 70 struct md_labels labels;
71 #ifdef CONFIG_NF_NAT_NEEDED 71 #ifdef CONFIG_NF_NAT_NEEDED
72 struct nf_nat_range range; /* Only present for SRC NAT and DST NAT. */ 72 struct nf_nat_range range; /* Only present for SRC NAT and DST NAT. */
73 #endif 73 #endif
74 }; 74 };
75 75
76 static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info); 76 static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info);
77 77
78 static u16 key_to_nfproto(const struct sw_flow_key *key) 78 static u16 key_to_nfproto(const struct sw_flow_key *key)
79 { 79 {
80 switch (ntohs(key->eth.type)) { 80 switch (ntohs(key->eth.type)) {
81 case ETH_P_IP: 81 case ETH_P_IP:
82 return NFPROTO_IPV4; 82 return NFPROTO_IPV4;
83 case ETH_P_IPV6: 83 case ETH_P_IPV6:
84 return NFPROTO_IPV6; 84 return NFPROTO_IPV6;
85 default: 85 default:
86 return NFPROTO_UNSPEC; 86 return NFPROTO_UNSPEC;
87 } 87 }
88 } 88 }
89 89
90 /* Map SKB connection state into the values used by flow definition. */ 90 /* Map SKB connection state into the values used by flow definition. */
91 static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo) 91 static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
92 { 92 {
93 u8 ct_state = OVS_CS_F_TRACKED; 93 u8 ct_state = OVS_CS_F_TRACKED;
94 94
95 switch (ctinfo) { 95 switch (ctinfo) {
96 case IP_CT_ESTABLISHED_REPLY: 96 case IP_CT_ESTABLISHED_REPLY:
97 case IP_CT_RELATED_REPLY: 97 case IP_CT_RELATED_REPLY:
98 ct_state |= OVS_CS_F_REPLY_DIR; 98 ct_state |= OVS_CS_F_REPLY_DIR;
99 break; 99 break;
100 default: 100 default:
101 break; 101 break;
102 } 102 }
103 103
104 switch (ctinfo) { 104 switch (ctinfo) {
105 case IP_CT_ESTABLISHED: 105 case IP_CT_ESTABLISHED:
106 case IP_CT_ESTABLISHED_REPLY: 106 case IP_CT_ESTABLISHED_REPLY:
107 ct_state |= OVS_CS_F_ESTABLISHED; 107 ct_state |= OVS_CS_F_ESTABLISHED;
108 break; 108 break;
109 case IP_CT_RELATED: 109 case IP_CT_RELATED:
110 case IP_CT_RELATED_REPLY: 110 case IP_CT_RELATED_REPLY:
111 ct_state |= OVS_CS_F_RELATED; 111 ct_state |= OVS_CS_F_RELATED;
112 break; 112 break;
113 case IP_CT_NEW: 113 case IP_CT_NEW:
114 ct_state |= OVS_CS_F_NEW; 114 ct_state |= OVS_CS_F_NEW;
115 break; 115 break;
116 default: 116 default:
117 break; 117 break;
118 } 118 }
119 119
120 return ct_state; 120 return ct_state;
121 } 121 }
122 122
123 static u32 ovs_ct_get_mark(const struct nf_conn *ct) 123 static u32 ovs_ct_get_mark(const struct nf_conn *ct)
124 { 124 {
125 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) 125 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
126 return ct ? ct->mark : 0; 126 return ct ? ct->mark : 0;
127 #else 127 #else
128 return 0; 128 return 0;
129 #endif 129 #endif
130 } 130 }
131 131
132 static void ovs_ct_get_labels(const struct nf_conn *ct, 132 static void ovs_ct_get_labels(const struct nf_conn *ct,
133 struct ovs_key_ct_labels *labels) 133 struct ovs_key_ct_labels *labels)
134 { 134 {
135 struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL; 135 struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;
136 136
137 if (cl) { 137 if (cl) {
138 size_t len = sizeof(cl->bits); 138 size_t len = sizeof(cl->bits);
139 139
140 if (len > OVS_CT_LABELS_LEN) 140 if (len > OVS_CT_LABELS_LEN)
141 len = OVS_CT_LABELS_LEN; 141 len = OVS_CT_LABELS_LEN;
142 else if (len < OVS_CT_LABELS_LEN) 142 else if (len < OVS_CT_LABELS_LEN)
143 memset(labels, 0, OVS_CT_LABELS_LEN); 143 memset(labels, 0, OVS_CT_LABELS_LEN);
144 memcpy(labels, cl->bits, len); 144 memcpy(labels, cl->bits, len);
145 } else { 145 } else {
146 memset(labels, 0, OVS_CT_LABELS_LEN); 146 memset(labels, 0, OVS_CT_LABELS_LEN);
147 } 147 }
148 } 148 }
149 149
150 static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, 150 static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
151 const struct nf_conntrack_zone *zone, 151 const struct nf_conntrack_zone *zone,
152 const struct nf_conn *ct) 152 const struct nf_conn *ct)
153 { 153 {
154 key->ct.state = state; 154 key->ct.state = state;
155 key->ct.zone = zone->id; 155 key->ct.zone = zone->id;
156 key->ct.mark = ovs_ct_get_mark(ct); 156 key->ct.mark = ovs_ct_get_mark(ct);
157 ovs_ct_get_labels(ct, &key->ct.labels); 157 ovs_ct_get_labels(ct, &key->ct.labels);
158 } 158 }
159 159
160 /* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has 160 /* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has
161 * previously sent the packet to conntrack via the ct action. If 161 * previously sent the packet to conntrack via the ct action. If
162 * 'keep_nat_flags' is true, the existing NAT flags retained, else they are 162 * 'keep_nat_flags' is true, the existing NAT flags retained, else they are
163 * initialized from the connection status. 163 * initialized from the connection status.
164 */ 164 */
165 static void ovs_ct_update_key(const struct sk_buff *skb, 165 static void ovs_ct_update_key(const struct sk_buff *skb,
166 const struct ovs_conntrack_info *info, 166 const struct ovs_conntrack_info *info,
167 struct sw_flow_key *key, bool post_ct, 167 struct sw_flow_key *key, bool post_ct,
168 bool keep_nat_flags) 168 bool keep_nat_flags)
169 { 169 {
170 const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; 170 const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
171 enum ip_conntrack_info ctinfo; 171 enum ip_conntrack_info ctinfo;
172 struct nf_conn *ct; 172 struct nf_conn *ct;
173 u8 state = 0; 173 u8 state = 0;
174 174
175 ct = nf_ct_get(skb, &ctinfo); 175 ct = nf_ct_get(skb, &ctinfo);
176 if (ct) { 176 if (ct) {
177 state = ovs_ct_get_state(ctinfo); 177 state = ovs_ct_get_state(ctinfo);
178 /* All unconfirmed entries are NEW connections. */ 178 /* All unconfirmed entries are NEW connections. */
179 if (!nf_ct_is_confirmed(ct)) 179 if (!nf_ct_is_confirmed(ct))
180 state |= OVS_CS_F_NEW; 180 state |= OVS_CS_F_NEW;
181 /* OVS persists the related flag for the duration of the 181 /* OVS persists the related flag for the duration of the
182 * connection. 182 * connection.
183 */ 183 */
184 if (ct->master) 184 if (ct->master)
185 state |= OVS_CS_F_RELATED; 185 state |= OVS_CS_F_RELATED;
186 if (keep_nat_flags) { 186 if (keep_nat_flags) {
187 state |= key->ct.state & OVS_CS_F_NAT_MASK; 187 state |= key->ct.state & OVS_CS_F_NAT_MASK;
188 } else { 188 } else {
189 if (ct->status & IPS_SRC_NAT) 189 if (ct->status & IPS_SRC_NAT)
190 state |= OVS_CS_F_SRC_NAT; 190 state |= OVS_CS_F_SRC_NAT;
191 if (ct->status & IPS_DST_NAT) 191 if (ct->status & IPS_DST_NAT)
192 state |= OVS_CS_F_DST_NAT; 192 state |= OVS_CS_F_DST_NAT;
193 } 193 }
194 zone = nf_ct_zone(ct); 194 zone = nf_ct_zone(ct);
195 } else if (post_ct) { 195 } else if (post_ct) {
196 state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID; 196 state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
197 if (info) 197 if (info)
198 zone = &info->zone; 198 zone = &info->zone;
199 } 199 }
200 __ovs_ct_update_key(key, state, zone, ct); 200 __ovs_ct_update_key(key, state, zone, ct);
201 } 201 }
202 202
203 /* This is called to initialize CT key fields possibly coming in from the local 203 /* This is called to initialize CT key fields possibly coming in from the local
204 * stack. 204 * stack.
205 */ 205 */
206 void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) 206 void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
207 { 207 {
208 ovs_ct_update_key(skb, NULL, key, false, false); 208 ovs_ct_update_key(skb, NULL, key, false, false);
209 } 209 }
210 210
211 int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) 211 int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
212 { 212 {
213 if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state)) 213 if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state))
214 return -EMSGSIZE; 214 return -EMSGSIZE;
215 215
216 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 216 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
217 nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone)) 217 nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone))
218 return -EMSGSIZE; 218 return -EMSGSIZE;
219 219
220 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && 220 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
221 nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, key->ct.mark)) 221 nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, key->ct.mark))
222 return -EMSGSIZE; 222 return -EMSGSIZE;
223 223
224 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 224 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
225 nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(key->ct.labels), 225 nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(key->ct.labels),
226 &key->ct.labels)) 226 &key->ct.labels))
227 return -EMSGSIZE; 227 return -EMSGSIZE;
228 228
229 return 0; 229 return 0;
230 } 230 }
231 231
232 static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key, 232 static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key,
233 u32 ct_mark, u32 mask) 233 u32 ct_mark, u32 mask)
234 { 234 {
235 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) 235 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
236 enum ip_conntrack_info ctinfo; 236 enum ip_conntrack_info ctinfo;
237 struct nf_conn *ct; 237 struct nf_conn *ct;
238 u32 new_mark; 238 u32 new_mark;
239 239
240 /* The connection could be invalid, in which case set_mark is no-op. */ 240 /* The connection could be invalid, in which case set_mark is no-op. */
241 ct = nf_ct_get(skb, &ctinfo); 241 ct = nf_ct_get(skb, &ctinfo);
242 if (!ct) 242 if (!ct)
243 return 0; 243 return 0;
244 244
245 new_mark = ct_mark | (ct->mark & ~(mask)); 245 new_mark = ct_mark | (ct->mark & ~(mask));
246 if (ct->mark != new_mark) { 246 if (ct->mark != new_mark) {
247 ct->mark = new_mark; 247 ct->mark = new_mark;
248 nf_conntrack_event_cache(IPCT_MARK, ct); 248 nf_conntrack_event_cache(IPCT_MARK, ct);
249 key->ct.mark = new_mark; 249 key->ct.mark = new_mark;
250 } 250 }
251 251
252 return 0; 252 return 0;
253 #else 253 #else
254 return -ENOTSUPP; 254 return -ENOTSUPP;
255 #endif 255 #endif
256 } 256 }
257 257
258 static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key, 258 static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key,
259 const struct ovs_key_ct_labels *labels, 259 const struct ovs_key_ct_labels *labels,
260 const struct ovs_key_ct_labels *mask) 260 const struct ovs_key_ct_labels *mask)
261 { 261 {
262 enum ip_conntrack_info ctinfo; 262 enum ip_conntrack_info ctinfo;
263 struct nf_conn_labels *cl; 263 struct nf_conn_labels *cl;
264 struct nf_conn *ct; 264 struct nf_conn *ct;
265 int err; 265 int err;
266 266
267 /* The connection could be invalid, in which case set_label is no-op.*/ 267 /* The connection could be invalid, in which case set_label is no-op.*/
268 ct = nf_ct_get(skb, &ctinfo); 268 ct = nf_ct_get(skb, &ctinfo);
269 if (!ct) 269 if (!ct)
270 return 0; 270 return 0;
271 271
272 cl = nf_ct_labels_find(ct); 272 cl = nf_ct_labels_find(ct);
273 if (!cl) { 273 if (!cl) {
274 nf_ct_labels_ext_add(ct); 274 nf_ct_labels_ext_add(ct);
275 cl = nf_ct_labels_find(ct); 275 cl = nf_ct_labels_find(ct);
276 } 276 }
277 if (!cl || sizeof(cl->bits) < OVS_CT_LABELS_LEN) 277 if (!cl || sizeof(cl->bits) < OVS_CT_LABELS_LEN)
278 return -ENOSPC; 278 return -ENOSPC;
279 279
280 err = nf_connlabels_replace(ct, (u32 *)labels, (u32 *)mask, 280 err = nf_connlabels_replace(ct, (u32 *)labels, (u32 *)mask,
281 OVS_CT_LABELS_LEN / sizeof(u32)); 281 OVS_CT_LABELS_LEN / sizeof(u32));
282 if (err) 282 if (err)
283 return err; 283 return err;
284 284
285 ovs_ct_get_labels(ct, &key->ct.labels); 285 ovs_ct_get_labels(ct, &key->ct.labels);
286 return 0; 286 return 0;
287 } 287 }
288 288
289 /* 'skb' should already be pulled to nh_ofs. */ 289 /* 'skb' should already be pulled to nh_ofs. */
290 static int ovs_ct_helper(struct sk_buff *skb, u16 proto) 290 static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
291 { 291 {
292 const struct nf_conntrack_helper *helper; 292 const struct nf_conntrack_helper *helper;
293 const struct nf_conn_help *help; 293 const struct nf_conn_help *help;
294 enum ip_conntrack_info ctinfo; 294 enum ip_conntrack_info ctinfo;
295 unsigned int protoff; 295 unsigned int protoff;
296 struct nf_conn *ct; 296 struct nf_conn *ct;
297 int err; 297 int err;
298 298
299 ct = nf_ct_get(skb, &ctinfo); 299 ct = nf_ct_get(skb, &ctinfo);
300 if (!ct || ctinfo == IP_CT_RELATED_REPLY) 300 if (!ct || ctinfo == IP_CT_RELATED_REPLY)
301 return NF_ACCEPT; 301 return NF_ACCEPT;
302 302
303 help = nfct_help(ct); 303 help = nfct_help(ct);
304 if (!help) 304 if (!help)
305 return NF_ACCEPT; 305 return NF_ACCEPT;
306 306
307 helper = rcu_dereference(help->helper); 307 helper = rcu_dereference(help->helper);
308 if (!helper) 308 if (!helper)
309 return NF_ACCEPT; 309 return NF_ACCEPT;
310 310
311 switch (proto) { 311 switch (proto) {
312 case NFPROTO_IPV4: 312 case NFPROTO_IPV4:
313 protoff = ip_hdrlen(skb); 313 protoff = ip_hdrlen(skb);
314 break; 314 break;
315 case NFPROTO_IPV6: { 315 case NFPROTO_IPV6: {
316 u8 nexthdr = ipv6_hdr(skb)->nexthdr; 316 u8 nexthdr = ipv6_hdr(skb)->nexthdr;
317 __be16 frag_off; 317 __be16 frag_off;
318 int ofs; 318 int ofs;
319 319
320 ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, 320 ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
321 &frag_off); 321 &frag_off);
322 if (ofs < 0 || (frag_off & htons(~0x7)) != 0) { 322 if (ofs < 0 || (frag_off & htons(~0x7)) != 0) {
323 pr_debug("proto header not found\n"); 323 pr_debug("proto header not found\n");
324 return NF_ACCEPT; 324 return NF_ACCEPT;
325 } 325 }
326 protoff = ofs; 326 protoff = ofs;
327 break; 327 break;
328 } 328 }
329 default: 329 default:
330 WARN_ONCE(1, "helper invoked on non-IP family!"); 330 WARN_ONCE(1, "helper invoked on non-IP family!");
331 return NF_DROP; 331 return NF_DROP;
332 } 332 }
333 333
334 err = helper->help(skb, protoff, ct, ctinfo); 334 err = helper->help(skb, protoff, ct, ctinfo);
335 if (err != NF_ACCEPT) 335 if (err != NF_ACCEPT)
336 return err; 336 return err;
337 337
338 /* Adjust seqs after helper. This is needed due to some helpers (e.g., 338 /* Adjust seqs after helper. This is needed due to some helpers (e.g.,
339 * FTP with NAT) adusting the TCP payload size when mangling IP 339 * FTP with NAT) adusting the TCP payload size when mangling IP
340 * addresses and/or port numbers in the text-based control connection. 340 * addresses and/or port numbers in the text-based control connection.
341 */ 341 */
342 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && 342 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
343 !nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) 343 !nf_ct_seq_adjust(skb, ct, ctinfo, protoff))
344 return NF_DROP; 344 return NF_DROP;
345 return NF_ACCEPT; 345 return NF_ACCEPT;
346 } 346 }
347 347
348 /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero 348 /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
349 * value if 'skb' is freed. 349 * value if 'skb' is freed.
350 */ 350 */
351 static int handle_fragments(struct net *net, struct sw_flow_key *key, 351 static int handle_fragments(struct net *net, struct sw_flow_key *key,
352 u16 zone, struct sk_buff *skb) 352 u16 zone, struct sk_buff *skb)
353 { 353 {
354 struct ovs_skb_cb ovs_cb = *OVS_CB(skb); 354 struct ovs_skb_cb ovs_cb = *OVS_CB(skb);
355 int err; 355 int err;
356 356
357 if (key->eth.type == htons(ETH_P_IP)) { 357 if (key->eth.type == htons(ETH_P_IP)) {
358 enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; 358 enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
359 359
360 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 360 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
361 err = ip_defrag(net, skb, user); 361 err = ip_defrag(net, skb, user);
362 if (err) 362 if (err)
363 return err; 363 return err;
364 364
365 ovs_cb.mru = IPCB(skb)->frag_max_size; 365 ovs_cb.mru = IPCB(skb)->frag_max_size;
366 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 366 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
367 } else if (key->eth.type == htons(ETH_P_IPV6)) { 367 } else if (key->eth.type == htons(ETH_P_IPV6)) {
368 enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; 368 enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
369 369
370 skb_orphan(skb); 370 skb_orphan(skb);
371 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); 371 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
372 err = nf_ct_frag6_gather(net, skb, user); 372 err = nf_ct_frag6_gather(net, skb, user);
373 if (err) 373 if (err)
374 return err; 374 return err;
375 375
376 key->ip.proto = ipv6_hdr(skb)->nexthdr; 376 key->ip.proto = ipv6_hdr(skb)->nexthdr;
377 ovs_cb.mru = IP6CB(skb)->frag_max_size; 377 ovs_cb.mru = IP6CB(skb)->frag_max_size;
378 #endif 378 #endif
379 } else { 379 } else {
380 kfree_skb(skb); 380 kfree_skb(skb);
381 return -EPFNOSUPPORT; 381 return -EPFNOSUPPORT;
382 } 382 }
383 383
384 key->ip.frag = OVS_FRAG_TYPE_NONE; 384 key->ip.frag = OVS_FRAG_TYPE_NONE;
385 skb_clear_hash(skb); 385 skb_clear_hash(skb);
386 skb->ignore_df = 1; 386 skb->ignore_df = 1;
387 *OVS_CB(skb) = ovs_cb; 387 *OVS_CB(skb) = ovs_cb;
388 388
389 return 0; 389 return 0;
390 } 390 }
391 391
392 static struct nf_conntrack_expect * 392 static struct nf_conntrack_expect *
393 ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone, 393 ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
394 u16 proto, const struct sk_buff *skb) 394 u16 proto, const struct sk_buff *skb)
395 { 395 {
396 struct nf_conntrack_tuple tuple; 396 struct nf_conntrack_tuple tuple;
397 397
398 if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple)) 398 if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
399 return NULL; 399 return NULL;
400 return __nf_ct_expect_find(net, zone, &tuple); 400 return __nf_ct_expect_find(net, zone, &tuple);
401 } 401 }
402 402
403 /* This replicates logic from nf_conntrack_core.c that is not exported. */ 403 /* This replicates logic from nf_conntrack_core.c that is not exported. */
404 static enum ip_conntrack_info 404 static enum ip_conntrack_info
405 ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h) 405 ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h)
406 { 406 {
407 const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 407 const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
408 408
409 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) 409 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
410 return IP_CT_ESTABLISHED_REPLY; 410 return IP_CT_ESTABLISHED_REPLY;
411 /* Once we've had two way comms, always ESTABLISHED. */ 411 /* Once we've had two way comms, always ESTABLISHED. */
412 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) 412 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status))
413 return IP_CT_ESTABLISHED; 413 return IP_CT_ESTABLISHED;
414 if (test_bit(IPS_EXPECTED_BIT, &ct->status)) 414 if (test_bit(IPS_EXPECTED_BIT, &ct->status))
415 return IP_CT_RELATED; 415 return IP_CT_RELATED;
416 return IP_CT_NEW; 416 return IP_CT_NEW;
417 } 417 }
418 418
419 /* Find an existing connection which this packet belongs to without 419 /* Find an existing connection which this packet belongs to without
420 * re-attributing statistics or modifying the connection state. This allows an 420 * re-attributing statistics or modifying the connection state. This allows an
421 * skb->nfct lost due to an upcall to be recovered during actions execution. 421 * skb->nfct lost due to an upcall to be recovered during actions execution.
422 * 422 *
423 * Must be called with rcu_read_lock. 423 * Must be called with rcu_read_lock.
424 * 424 *
425 * On success, populates skb->nfct and skb->nfctinfo, and returns the 425 * On success, populates skb->nfct and skb->nfctinfo, and returns the
426 * connection. Returns NULL if there is no existing entry. 426 * connection. Returns NULL if there is no existing entry.
427 */ 427 */
428 static struct nf_conn * 428 static struct nf_conn *
429 ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, 429 ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
430 u8 l3num, struct sk_buff *skb) 430 u8 l3num, struct sk_buff *skb)
431 { 431 {
432 struct nf_conntrack_l3proto *l3proto; 432 struct nf_conntrack_l3proto *l3proto;
433 struct nf_conntrack_l4proto *l4proto; 433 struct nf_conntrack_l4proto *l4proto;
434 struct nf_conntrack_tuple tuple; 434 struct nf_conntrack_tuple tuple;
435 struct nf_conntrack_tuple_hash *h; 435 struct nf_conntrack_tuple_hash *h;
436 struct nf_conn *ct; 436 struct nf_conn *ct;
437 unsigned int dataoff; 437 unsigned int dataoff;
438 u8 protonum; 438 u8 protonum;
439 439
440 l3proto = __nf_ct_l3proto_find(l3num); 440 l3proto = __nf_ct_l3proto_find(l3num);
441 if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, 441 if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
442 &protonum) <= 0) { 442 &protonum) <= 0) {
443 pr_debug("ovs_ct_find_existing: Can't get protonum\n"); 443 pr_debug("ovs_ct_find_existing: Can't get protonum\n");
444 return NULL; 444 return NULL;
445 } 445 }
446 l4proto = __nf_ct_l4proto_find(l3num, protonum); 446 l4proto = __nf_ct_l4proto_find(l3num, protonum);
447 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, 447 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
448 protonum, net, &tuple, l3proto, l4proto)) { 448 protonum, net, &tuple, l3proto, l4proto)) {
449 pr_debug("ovs_ct_find_existing: Can't get tuple\n"); 449 pr_debug("ovs_ct_find_existing: Can't get tuple\n");
450 return NULL; 450 return NULL;
451 } 451 }
452 452
453 /* look for tuple match */ 453 /* look for tuple match */
454 h = nf_conntrack_find_get(net, zone, &tuple); 454 h = nf_conntrack_find_get(net, zone, &tuple);
455 if (!h) 455 if (!h)
456 return NULL; /* Not found. */ 456 return NULL; /* Not found. */
457 457
458 ct = nf_ct_tuplehash_to_ctrack(h); 458 ct = nf_ct_tuplehash_to_ctrack(h);
459 459
460 skb->nfct = &ct->ct_general; 460 skb->nfct = &ct->ct_general;
461 skb->nfctinfo = ovs_ct_get_info(h); 461 skb->nfctinfo = ovs_ct_get_info(h);
462 return ct; 462 return ct;
463 } 463 }
464 464
465 /* Determine whether skb->nfct is equal to the result of conntrack lookup. */ 465 /* Determine whether skb->nfct is equal to the result of conntrack lookup. */
466 static bool skb_nfct_cached(struct net *net, 466 static bool skb_nfct_cached(struct net *net,
467 const struct sw_flow_key *key, 467 const struct sw_flow_key *key,
468 const struct ovs_conntrack_info *info, 468 const struct ovs_conntrack_info *info,
469 struct sk_buff *skb) 469 struct sk_buff *skb)
470 { 470 {
471 enum ip_conntrack_info ctinfo; 471 enum ip_conntrack_info ctinfo;
472 struct nf_conn *ct; 472 struct nf_conn *ct;
473 473
474 ct = nf_ct_get(skb, &ctinfo); 474 ct = nf_ct_get(skb, &ctinfo);
475 /* If no ct, check if we have evidence that an existing conntrack entry 475 /* If no ct, check if we have evidence that an existing conntrack entry
476 * might be found for this skb. This happens when we lose a skb->nfct 476 * might be found for this skb. This happens when we lose a skb->nfct
477 * due to an upcall. If the connection was not confirmed, it is not 477 * due to an upcall. If the connection was not confirmed, it is not
478 * cached and needs to be run through conntrack again. 478 * cached and needs to be run through conntrack again.
479 */ 479 */
480 if (!ct && key->ct.state & OVS_CS_F_TRACKED && 480 if (!ct && key->ct.state & OVS_CS_F_TRACKED &&
481 !(key->ct.state & OVS_CS_F_INVALID) && 481 !(key->ct.state & OVS_CS_F_INVALID) &&
482 key->ct.zone == info->zone.id) 482 key->ct.zone == info->zone.id)
483 ct = ovs_ct_find_existing(net, &info->zone, info->family, skb); 483 ct = ovs_ct_find_existing(net, &info->zone, info->family, skb);
484 if (!ct) 484 if (!ct)
485 return false; 485 return false;
486 if (!net_eq(net, read_pnet(&ct->ct_net))) 486 if (!net_eq(net, read_pnet(&ct->ct_net)))
487 return false; 487 return false;
488 if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct))) 488 if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct)))
489 return false; 489 return false;
490 if (info->helper) { 490 if (info->helper) {
491 struct nf_conn_help *help; 491 struct nf_conn_help *help;
492 492
493 help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER); 493 help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
494 if (help && rcu_access_pointer(help->helper) != info->helper) 494 if (help && rcu_access_pointer(help->helper) != info->helper)
495 return false; 495 return false;
496 } 496 }
497 497
498 return true; 498 return true;
499 } 499 }
500 500
501 #ifdef CONFIG_NF_NAT_NEEDED 501 #ifdef CONFIG_NF_NAT_NEEDED
502 /* Modelled after nf_nat_ipv[46]_fn(). 502 /* Modelled after nf_nat_ipv[46]_fn().
503 * range is only used for new, uninitialized NAT state. 503 * range is only used for new, uninitialized NAT state.
504 * Returns either NF_ACCEPT or NF_DROP. 504 * Returns either NF_ACCEPT or NF_DROP.
505 */ 505 */
506 static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, 506 static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
507 enum ip_conntrack_info ctinfo, 507 enum ip_conntrack_info ctinfo,
508 const struct nf_nat_range *range, 508 const struct nf_nat_range *range,
509 enum nf_nat_manip_type maniptype) 509 enum nf_nat_manip_type maniptype)
510 { 510 {
511 int hooknum, nh_off, err = NF_ACCEPT; 511 int hooknum, nh_off, err = NF_ACCEPT;
512 512
513 nh_off = skb_network_offset(skb); 513 nh_off = skb_network_offset(skb);
514 skb_pull(skb, nh_off); 514 skb_pull(skb, nh_off);
515 515
516 /* See HOOK2MANIP(). */ 516 /* See HOOK2MANIP(). */
517 if (maniptype == NF_NAT_MANIP_SRC) 517 if (maniptype == NF_NAT_MANIP_SRC)
518 hooknum = NF_INET_LOCAL_IN; /* Source NAT */ 518 hooknum = NF_INET_LOCAL_IN; /* Source NAT */
519 else 519 else
520 hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */ 520 hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */
521 521
522 switch (ctinfo) { 522 switch (ctinfo) {
523 case IP_CT_RELATED: 523 case IP_CT_RELATED:
524 case IP_CT_RELATED_REPLY: 524 case IP_CT_RELATED_REPLY:
525 if (IS_ENABLED(CONFIG_NF_NAT_IPV4) && 525 if (IS_ENABLED(CONFIG_NF_NAT_IPV4) &&
526 skb->protocol == htons(ETH_P_IP) && 526 skb->protocol == htons(ETH_P_IP) &&
527 ip_hdr(skb)->protocol == IPPROTO_ICMP) { 527 ip_hdr(skb)->protocol == IPPROTO_ICMP) {
528 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, 528 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
529 hooknum)) 529 hooknum))
530 err = NF_DROP; 530 err = NF_DROP;
531 goto push; 531 goto push;
532 } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) && 532 } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) &&
533 skb->protocol == htons(ETH_P_IPV6)) { 533 skb->protocol == htons(ETH_P_IPV6)) {
534 __be16 frag_off; 534 __be16 frag_off;
535 u8 nexthdr = ipv6_hdr(skb)->nexthdr; 535 u8 nexthdr = ipv6_hdr(skb)->nexthdr;
536 int hdrlen = ipv6_skip_exthdr(skb, 536 int hdrlen = ipv6_skip_exthdr(skb,
537 sizeof(struct ipv6hdr), 537 sizeof(struct ipv6hdr),
538 &nexthdr, &frag_off); 538 &nexthdr, &frag_off);
539 539
540 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { 540 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
541 if (!nf_nat_icmpv6_reply_translation(skb, ct, 541 if (!nf_nat_icmpv6_reply_translation(skb, ct,
542 ctinfo, 542 ctinfo,
543 hooknum, 543 hooknum,
544 hdrlen)) 544 hdrlen))
545 err = NF_DROP; 545 err = NF_DROP;
546 goto push; 546 goto push;
547 } 547 }
548 } 548 }
549 /* Non-ICMP, fall thru to initialize if needed. */ 549 /* Non-ICMP, fall thru to initialize if needed. */
550 case IP_CT_NEW: 550 case IP_CT_NEW:
551 /* Seen it before? This can happen for loopback, retrans, 551 /* Seen it before? This can happen for loopback, retrans,
552 * or local packets. 552 * or local packets.
553 */ 553 */
554 if (!nf_nat_initialized(ct, maniptype)) { 554 if (!nf_nat_initialized(ct, maniptype)) {
555 /* Initialize according to the NAT action. */ 555 /* Initialize according to the NAT action. */
556 err = (range && range->flags & NF_NAT_RANGE_MAP_IPS) 556 err = (range && range->flags & NF_NAT_RANGE_MAP_IPS)
557 /* Action is set up to establish a new 557 /* Action is set up to establish a new
558 * mapping. 558 * mapping.
559 */ 559 */
560 ? nf_nat_setup_info(ct, range, maniptype) 560 ? nf_nat_setup_info(ct, range, maniptype)
561 : nf_nat_alloc_null_binding(ct, hooknum); 561 : nf_nat_alloc_null_binding(ct, hooknum);
562 if (err != NF_ACCEPT) 562 if (err != NF_ACCEPT)
563 goto push; 563 goto push;
564 } 564 }
565 break; 565 break;
566 566
567 case IP_CT_ESTABLISHED: 567 case IP_CT_ESTABLISHED:
568 case IP_CT_ESTABLISHED_REPLY: 568 case IP_CT_ESTABLISHED_REPLY:
569 break; 569 break;
570 570
571 default: 571 default:
572 err = NF_DROP; 572 err = NF_DROP;
573 goto push; 573 goto push;
574 } 574 }
575 575
576 err = nf_nat_packet(ct, ctinfo, hooknum, skb); 576 err = nf_nat_packet(ct, ctinfo, hooknum, skb);
577 push: 577 push:
578 skb_push(skb, nh_off); 578 skb_push(skb, nh_off);
579 579
580 return err; 580 return err;
581 } 581 }
582 582
583 static void ovs_nat_update_key(struct sw_flow_key *key, 583 static void ovs_nat_update_key(struct sw_flow_key *key,
584 const struct sk_buff *skb, 584 const struct sk_buff *skb,
585 enum nf_nat_manip_type maniptype) 585 enum nf_nat_manip_type maniptype)
586 { 586 {
587 if (maniptype == NF_NAT_MANIP_SRC) { 587 if (maniptype == NF_NAT_MANIP_SRC) {
588 __be16 src; 588 __be16 src;
589 589
590 key->ct.state |= OVS_CS_F_SRC_NAT; 590 key->ct.state |= OVS_CS_F_SRC_NAT;
591 if (key->eth.type == htons(ETH_P_IP)) 591 if (key->eth.type == htons(ETH_P_IP))
592 key->ipv4.addr.src = ip_hdr(skb)->saddr; 592 key->ipv4.addr.src = ip_hdr(skb)->saddr;
593 else if (key->eth.type == htons(ETH_P_IPV6)) 593 else if (key->eth.type == htons(ETH_P_IPV6))
594 memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr, 594 memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr,
595 sizeof(key->ipv6.addr.src)); 595 sizeof(key->ipv6.addr.src));
596 else 596 else
597 return; 597 return;
598 598
599 if (key->ip.proto == IPPROTO_UDP) 599 if (key->ip.proto == IPPROTO_UDP)
600 src = udp_hdr(skb)->source; 600 src = udp_hdr(skb)->source;
601 else if (key->ip.proto == IPPROTO_TCP) 601 else if (key->ip.proto == IPPROTO_TCP)
602 src = tcp_hdr(skb)->source; 602 src = tcp_hdr(skb)->source;
603 else if (key->ip.proto == IPPROTO_SCTP) 603 else if (key->ip.proto == IPPROTO_SCTP)
604 src = sctp_hdr(skb)->source; 604 src = sctp_hdr(skb)->source;
605 else 605 else
606 return; 606 return;
607 607
608 key->tp.src = src; 608 key->tp.src = src;
609 } else { 609 } else {
610 __be16 dst; 610 __be16 dst;
611 611
612 key->ct.state |= OVS_CS_F_DST_NAT; 612 key->ct.state |= OVS_CS_F_DST_NAT;
613 if (key->eth.type == htons(ETH_P_IP)) 613 if (key->eth.type == htons(ETH_P_IP))
614 key->ipv4.addr.dst = ip_hdr(skb)->daddr; 614 key->ipv4.addr.dst = ip_hdr(skb)->daddr;
615 else if (key->eth.type == htons(ETH_P_IPV6)) 615 else if (key->eth.type == htons(ETH_P_IPV6))
616 memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr, 616 memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr,
617 sizeof(key->ipv6.addr.dst)); 617 sizeof(key->ipv6.addr.dst));
618 else 618 else
619 return; 619 return;
620 620
621 if (key->ip.proto == IPPROTO_UDP) 621 if (key->ip.proto == IPPROTO_UDP)
622 dst = udp_hdr(skb)->dest; 622 dst = udp_hdr(skb)->dest;
623 else if (key->ip.proto == IPPROTO_TCP) 623 else if (key->ip.proto == IPPROTO_TCP)
624 dst = tcp_hdr(skb)->dest; 624 dst = tcp_hdr(skb)->dest;
625 else if (key->ip.proto == IPPROTO_SCTP) 625 else if (key->ip.proto == IPPROTO_SCTP)
626 dst = sctp_hdr(skb)->dest; 626 dst = sctp_hdr(skb)->dest;
627 else 627 else
628 return; 628 return;
629 629
630 key->tp.dst = dst; 630 key->tp.dst = dst;
631 } 631 }
632 } 632 }
633 633
634 /* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */ 634 /* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */
635 static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, 635 static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
636 const struct ovs_conntrack_info *info, 636 const struct ovs_conntrack_info *info,
637 struct sk_buff *skb, struct nf_conn *ct, 637 struct sk_buff *skb, struct nf_conn *ct,
638 enum ip_conntrack_info ctinfo) 638 enum ip_conntrack_info ctinfo)
639 { 639 {
640 enum nf_nat_manip_type maniptype; 640 enum nf_nat_manip_type maniptype;
641 int err; 641 int err;
642 642
643 if (nf_ct_is_untracked(ct)) { 643 if (nf_ct_is_untracked(ct)) {
644 /* A NAT action may only be performed on tracked packets. */ 644 /* A NAT action may only be performed on tracked packets. */
645 return NF_ACCEPT; 645 return NF_ACCEPT;
646 } 646 }
647 647
648 /* Add NAT extension if not confirmed yet. */ 648 /* Add NAT extension if not confirmed yet. */
649 if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct)) 649 if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct))
650 return NF_ACCEPT; /* Can't NAT. */ 650 return NF_ACCEPT; /* Can't NAT. */
651 651
652 /* Determine NAT type. 652 /* Determine NAT type.
653 * Check if the NAT type can be deduced from the tracked connection. 653 * Check if the NAT type can be deduced from the tracked connection.
654 * Make sure new expected connections (IP_CT_RELATED) are NATted only 654 * Make sure new expected connections (IP_CT_RELATED) are NATted only
655 * when committing. 655 * when committing.
656 */ 656 */
657 if (info->nat & OVS_CT_NAT && ctinfo != IP_CT_NEW && 657 if (info->nat & OVS_CT_NAT && ctinfo != IP_CT_NEW &&
658 ct->status & IPS_NAT_MASK && 658 ct->status & IPS_NAT_MASK &&
659 (ctinfo != IP_CT_RELATED || info->commit)) { 659 (ctinfo != IP_CT_RELATED || info->commit)) {
660 /* NAT an established or related connection like before. */ 660 /* NAT an established or related connection like before. */
661 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) 661 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY)
662 /* This is the REPLY direction for a connection 662 /* This is the REPLY direction for a connection
663 * for which NAT was applied in the forward 663 * for which NAT was applied in the forward
664 * direction. Do the reverse NAT. 664 * direction. Do the reverse NAT.
665 */ 665 */
666 maniptype = ct->status & IPS_SRC_NAT 666 maniptype = ct->status & IPS_SRC_NAT
667 ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC; 667 ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC;
668 else 668 else
669 maniptype = ct->status & IPS_SRC_NAT 669 maniptype = ct->status & IPS_SRC_NAT
670 ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST; 670 ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST;
671 } else if (info->nat & OVS_CT_SRC_NAT) { 671 } else if (info->nat & OVS_CT_SRC_NAT) {
672 maniptype = NF_NAT_MANIP_SRC; 672 maniptype = NF_NAT_MANIP_SRC;
673 } else if (info->nat & OVS_CT_DST_NAT) { 673 } else if (info->nat & OVS_CT_DST_NAT) {
674 maniptype = NF_NAT_MANIP_DST; 674 maniptype = NF_NAT_MANIP_DST;
675 } else { 675 } else {
676 return NF_ACCEPT; /* Connection is not NATed. */ 676 return NF_ACCEPT; /* Connection is not NATed. */
677 } 677 }
678 err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype); 678 err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype);
679 679
680 /* Mark NAT done if successful and update the flow key. */ 680 /* Mark NAT done if successful and update the flow key. */
681 if (err == NF_ACCEPT) 681 if (err == NF_ACCEPT)
682 ovs_nat_update_key(key, skb, maniptype); 682 ovs_nat_update_key(key, skb, maniptype);
683 683
684 return err; 684 return err;
685 } 685 }
686 #else /* !CONFIG_NF_NAT_NEEDED */ 686 #else /* !CONFIG_NF_NAT_NEEDED */
687 static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, 687 static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
688 const struct ovs_conntrack_info *info, 688 const struct ovs_conntrack_info *info,
689 struct sk_buff *skb, struct nf_conn *ct, 689 struct sk_buff *skb, struct nf_conn *ct,
690 enum ip_conntrack_info ctinfo) 690 enum ip_conntrack_info ctinfo)
691 { 691 {
692 return NF_ACCEPT; 692 return NF_ACCEPT;
693 } 693 }
694 #endif 694 #endif
695 695
696 /* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if 696 /* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if
697 * not done already. Update key with new CT state after passing the packet 697 * not done already. Update key with new CT state after passing the packet
698 * through conntrack. 698 * through conntrack.
699 * Note that if the packet is deemed invalid by conntrack, skb->nfct will be 699 * Note that if the packet is deemed invalid by conntrack, skb->nfct will be
700 * set to NULL and 0 will be returned. 700 * set to NULL and 0 will be returned.
701 */ 701 */
702 static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, 702 static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
703 const struct ovs_conntrack_info *info, 703 const struct ovs_conntrack_info *info,
704 struct sk_buff *skb) 704 struct sk_buff *skb)
705 { 705 {
706 /* If we are recirculating packets to match on conntrack fields and 706 /* If we are recirculating packets to match on conntrack fields and
707 * committing with a separate conntrack action, then we don't need to 707 * committing with a separate conntrack action, then we don't need to
708 * actually run the packet through conntrack twice unless it's for a 708 * actually run the packet through conntrack twice unless it's for a
709 * different zone. 709 * different zone.
710 */ 710 */
711 bool cached = skb_nfct_cached(net, key, info, skb); 711 bool cached = skb_nfct_cached(net, key, info, skb);
712 enum ip_conntrack_info ctinfo; 712 enum ip_conntrack_info ctinfo;
713 struct nf_conn *ct; 713 struct nf_conn *ct;
714 714
715 if (!cached) { 715 if (!cached) {
716 struct nf_conn *tmpl = info->ct; 716 struct nf_conn *tmpl = info->ct;
717 int err; 717 int err;
718 718
719 /* Associate skb with specified zone. */ 719 /* Associate skb with specified zone. */
720 if (tmpl) { 720 if (tmpl) {
721 if (skb->nfct) 721 if (skb->nfct)
722 nf_conntrack_put(skb->nfct); 722 nf_conntrack_put(skb->nfct);
723 nf_conntrack_get(&tmpl->ct_general); 723 nf_conntrack_get(&tmpl->ct_general);
724 skb->nfct = &tmpl->ct_general; 724 skb->nfct = &tmpl->ct_general;
725 skb->nfctinfo = IP_CT_NEW; 725 skb->nfctinfo = IP_CT_NEW;
726 } 726 }
727 727
728 /* Repeat if requested, see nf_iterate(). */ 728 err = nf_conntrack_in(net, info->family,
729 do { 729 NF_INET_PRE_ROUTING, skb);
730 err = nf_conntrack_in(net, info->family,
731 NF_INET_PRE_ROUTING, skb);
732 } while (err == NF_REPEAT);
733
734 if (err != NF_ACCEPT) 730 if (err != NF_ACCEPT)
735 return -ENOENT; 731 return -ENOENT;
736 732
737 /* Clear CT state NAT flags to mark that we have not yet done 733 /* Clear CT state NAT flags to mark that we have not yet done
738 * NAT after the nf_conntrack_in() call. We can actually clear 734 * NAT after the nf_conntrack_in() call. We can actually clear
739 * the whole state, as it will be re-initialized below. 735 * the whole state, as it will be re-initialized below.
740 */ 736 */
741 key->ct.state = 0; 737 key->ct.state = 0;
742 738
743 /* Update the key, but keep the NAT flags. */ 739 /* Update the key, but keep the NAT flags. */
744 ovs_ct_update_key(skb, info, key, true, true); 740 ovs_ct_update_key(skb, info, key, true, true);
745 } 741 }
746 742
747 ct = nf_ct_get(skb, &ctinfo); 743 ct = nf_ct_get(skb, &ctinfo);
748 if (ct) { 744 if (ct) {
749 /* Packets starting a new connection must be NATted before the 745 /* Packets starting a new connection must be NATted before the
750 * helper, so that the helper knows about the NAT. We enforce 746 * helper, so that the helper knows about the NAT. We enforce
751 * this by delaying both NAT and helper calls for unconfirmed 747 * this by delaying both NAT and helper calls for unconfirmed
752 * connections until the committing CT action. For later 748 * connections until the committing CT action. For later
753 * packets NAT and Helper may be called in either order. 749 * packets NAT and Helper may be called in either order.
754 * 750 *
755 * NAT will be done only if the CT action has NAT, and only 751 * NAT will be done only if the CT action has NAT, and only
756 * once per packet (per zone), as guarded by the NAT bits in 752 * once per packet (per zone), as guarded by the NAT bits in
757 * the key->ct.state. 753 * the key->ct.state.
758 */ 754 */
759 if (info->nat && !(key->ct.state & OVS_CS_F_NAT_MASK) && 755 if (info->nat && !(key->ct.state & OVS_CS_F_NAT_MASK) &&
760 (nf_ct_is_confirmed(ct) || info->commit) && 756 (nf_ct_is_confirmed(ct) || info->commit) &&
761 ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) { 757 ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) {
762 return -EINVAL; 758 return -EINVAL;
763 } 759 }
764 760
765 /* Userspace may decide to perform a ct lookup without a helper 761 /* Userspace may decide to perform a ct lookup without a helper
766 * specified followed by a (recirculate and) commit with one. 762 * specified followed by a (recirculate and) commit with one.
767 * Therefore, for unconfirmed connections which we will commit, 763 * Therefore, for unconfirmed connections which we will commit,
768 * we need to attach the helper here. 764 * we need to attach the helper here.
769 */ 765 */
770 if (!nf_ct_is_confirmed(ct) && info->commit && 766 if (!nf_ct_is_confirmed(ct) && info->commit &&
771 info->helper && !nfct_help(ct)) { 767 info->helper && !nfct_help(ct)) {
772 int err = __nf_ct_try_assign_helper(ct, info->ct, 768 int err = __nf_ct_try_assign_helper(ct, info->ct,
773 GFP_ATOMIC); 769 GFP_ATOMIC);
774 if (err) 770 if (err)
775 return err; 771 return err;
776 } 772 }
777 773
778 /* Call the helper only if: 774 /* Call the helper only if:
779 * - nf_conntrack_in() was executed above ("!cached") for a 775 * - nf_conntrack_in() was executed above ("!cached") for a
780 * confirmed connection, or 776 * confirmed connection, or
781 * - When committing an unconfirmed connection. 777 * - When committing an unconfirmed connection.
782 */ 778 */
783 if ((nf_ct_is_confirmed(ct) ? !cached : info->commit) && 779 if ((nf_ct_is_confirmed(ct) ? !cached : info->commit) &&
784 ovs_ct_helper(skb, info->family) != NF_ACCEPT) { 780 ovs_ct_helper(skb, info->family) != NF_ACCEPT) {
785 return -EINVAL; 781 return -EINVAL;
786 } 782 }
787 } 783 }
788 784
789 return 0; 785 return 0;
790 } 786 }
791 787
792 /* Lookup connection and read fields into key. */ 788 /* Lookup connection and read fields into key. */
793 static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, 789 static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
794 const struct ovs_conntrack_info *info, 790 const struct ovs_conntrack_info *info,
795 struct sk_buff *skb) 791 struct sk_buff *skb)
796 { 792 {
797 struct nf_conntrack_expect *exp; 793 struct nf_conntrack_expect *exp;
798 794
799 /* If we pass an expected packet through nf_conntrack_in() the 795 /* If we pass an expected packet through nf_conntrack_in() the
800 * expectation is typically removed, but the packet could still be 796 * expectation is typically removed, but the packet could still be
801 * lost in upcall processing. To prevent this from happening we 797 * lost in upcall processing. To prevent this from happening we
802 * perform an explicit expectation lookup. Expected connections are 798 * perform an explicit expectation lookup. Expected connections are
803 * always new, and will be passed through conntrack only when they are 799 * always new, and will be passed through conntrack only when they are
804 * committed, as it is OK to remove the expectation at that time. 800 * committed, as it is OK to remove the expectation at that time.
805 */ 801 */
806 exp = ovs_ct_expect_find(net, &info->zone, info->family, skb); 802 exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
807 if (exp) { 803 if (exp) {
808 u8 state; 804 u8 state;
809 805
810 /* NOTE: New connections are NATted and Helped only when 806 /* NOTE: New connections are NATted and Helped only when
811 * committed, so we are not calling into NAT here. 807 * committed, so we are not calling into NAT here.
812 */ 808 */
813 state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED; 809 state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
814 __ovs_ct_update_key(key, state, &info->zone, exp->master); 810 __ovs_ct_update_key(key, state, &info->zone, exp->master);
815 } else { 811 } else {
816 struct nf_conn *ct; 812 struct nf_conn *ct;
817 int err; 813 int err;
818 814
819 err = __ovs_ct_lookup(net, key, info, skb); 815 err = __ovs_ct_lookup(net, key, info, skb);
820 if (err) 816 if (err)
821 return err; 817 return err;
822 818
823 ct = (struct nf_conn *)skb->nfct; 819 ct = (struct nf_conn *)skb->nfct;
824 if (ct) 820 if (ct)
825 nf_ct_deliver_cached_events(ct); 821 nf_ct_deliver_cached_events(ct);
826 } 822 }
827 823
828 return 0; 824 return 0;
829 } 825 }
830 826
831 static bool labels_nonzero(const struct ovs_key_ct_labels *labels) 827 static bool labels_nonzero(const struct ovs_key_ct_labels *labels)
832 { 828 {
833 size_t i; 829 size_t i;
834 830
835 for (i = 0; i < sizeof(*labels); i++) 831 for (i = 0; i < sizeof(*labels); i++)
836 if (labels->ct_labels[i]) 832 if (labels->ct_labels[i])
837 return true; 833 return true;
838 834
839 return false; 835 return false;
840 } 836 }
841 837
842 /* Lookup connection and confirm if unconfirmed. */ 838 /* Lookup connection and confirm if unconfirmed. */
843 static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, 839 static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
844 const struct ovs_conntrack_info *info, 840 const struct ovs_conntrack_info *info,
845 struct sk_buff *skb) 841 struct sk_buff *skb)
846 { 842 {
847 int err; 843 int err;
848 844
849 err = __ovs_ct_lookup(net, key, info, skb); 845 err = __ovs_ct_lookup(net, key, info, skb);
850 if (err) 846 if (err)
851 return err; 847 return err;
852 848
853 /* Apply changes before confirming the connection so that the initial 849 /* Apply changes before confirming the connection so that the initial
854 * conntrack NEW netlink event carries the values given in the CT 850 * conntrack NEW netlink event carries the values given in the CT
855 * action. 851 * action.
856 */ 852 */
857 if (info->mark.mask) { 853 if (info->mark.mask) {
858 err = ovs_ct_set_mark(skb, key, info->mark.value, 854 err = ovs_ct_set_mark(skb, key, info->mark.value,
859 info->mark.mask); 855 info->mark.mask);
860 if (err) 856 if (err)
861 return err; 857 return err;
862 } 858 }
863 if (labels_nonzero(&info->labels.mask)) { 859 if (labels_nonzero(&info->labels.mask)) {
864 err = ovs_ct_set_labels(skb, key, &info->labels.value, 860 err = ovs_ct_set_labels(skb, key, &info->labels.value,
865 &info->labels.mask); 861 &info->labels.mask);
866 if (err) 862 if (err)
867 return err; 863 return err;
868 } 864 }
869 /* This will take care of sending queued events even if the connection 865 /* This will take care of sending queued events even if the connection
870 * is already confirmed. 866 * is already confirmed.
871 */ 867 */
872 if (nf_conntrack_confirm(skb) != NF_ACCEPT) 868 if (nf_conntrack_confirm(skb) != NF_ACCEPT)
873 return -EINVAL; 869 return -EINVAL;
874 870
875 return 0; 871 return 0;
876 } 872 }
877 873
878 /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero 874 /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
879 * value if 'skb' is freed. 875 * value if 'skb' is freed.
880 */ 876 */
881 int ovs_ct_execute(struct net *net, struct sk_buff *skb, 877 int ovs_ct_execute(struct net *net, struct sk_buff *skb,
882 struct sw_flow_key *key, 878 struct sw_flow_key *key,
883 const struct ovs_conntrack_info *info) 879 const struct ovs_conntrack_info *info)
884 { 880 {
885 int nh_ofs; 881 int nh_ofs;
886 int err; 882 int err;
887 883
888 /* The conntrack module expects to be working at L3. */ 884 /* The conntrack module expects to be working at L3. */
889 nh_ofs = skb_network_offset(skb); 885 nh_ofs = skb_network_offset(skb);
890 skb_pull(skb, nh_ofs); 886 skb_pull(skb, nh_ofs);
891 887
892 if (key->ip.frag != OVS_FRAG_TYPE_NONE) { 888 if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
893 err = handle_fragments(net, key, info->zone.id, skb); 889 err = handle_fragments(net, key, info->zone.id, skb);
894 if (err) 890 if (err)
895 return err; 891 return err;
896 } 892 }
897 893
898 if (info->commit) 894 if (info->commit)
899 err = ovs_ct_commit(net, key, info, skb); 895 err = ovs_ct_commit(net, key, info, skb);
900 else 896 else
901 err = ovs_ct_lookup(net, key, info, skb); 897 err = ovs_ct_lookup(net, key, info, skb);
902 898
903 skb_push(skb, nh_ofs); 899 skb_push(skb, nh_ofs);
904 if (err) 900 if (err)
905 kfree_skb(skb); 901 kfree_skb(skb);
906 return err; 902 return err;
907 } 903 }
908 904
909 static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, 905 static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
910 const struct sw_flow_key *key, bool log) 906 const struct sw_flow_key *key, bool log)
911 { 907 {
912 struct nf_conntrack_helper *helper; 908 struct nf_conntrack_helper *helper;
913 struct nf_conn_help *help; 909 struct nf_conn_help *help;
914 910
915 helper = nf_conntrack_helper_try_module_get(name, info->family, 911 helper = nf_conntrack_helper_try_module_get(name, info->family,
916 key->ip.proto); 912 key->ip.proto);
917 if (!helper) { 913 if (!helper) {
918 OVS_NLERR(log, "Unknown helper \"%s\"", name); 914 OVS_NLERR(log, "Unknown helper \"%s\"", name);
919 return -EINVAL; 915 return -EINVAL;
920 } 916 }
921 917
922 help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL); 918 help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL);
923 if (!help) { 919 if (!help) {
924 module_put(helper->me); 920 module_put(helper->me);
925 return -ENOMEM; 921 return -ENOMEM;
926 } 922 }
927 923
928 rcu_assign_pointer(help->helper, helper); 924 rcu_assign_pointer(help->helper, helper);
929 info->helper = helper; 925 info->helper = helper;
930 return 0; 926 return 0;
931 } 927 }
932 928
933 #ifdef CONFIG_NF_NAT_NEEDED 929 #ifdef CONFIG_NF_NAT_NEEDED
934 static int parse_nat(const struct nlattr *attr, 930 static int parse_nat(const struct nlattr *attr,
935 struct ovs_conntrack_info *info, bool log) 931 struct ovs_conntrack_info *info, bool log)
936 { 932 {
937 struct nlattr *a; 933 struct nlattr *a;
938 int rem; 934 int rem;
939 bool have_ip_max = false; 935 bool have_ip_max = false;
940 bool have_proto_max = false; 936 bool have_proto_max = false;
941 bool ip_vers = (info->family == NFPROTO_IPV6); 937 bool ip_vers = (info->family == NFPROTO_IPV6);
942 938
943 nla_for_each_nested(a, attr, rem) { 939 nla_for_each_nested(a, attr, rem) {
944 static const int ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1][2] = { 940 static const int ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1][2] = {
945 [OVS_NAT_ATTR_SRC] = {0, 0}, 941 [OVS_NAT_ATTR_SRC] = {0, 0},
946 [OVS_NAT_ATTR_DST] = {0, 0}, 942 [OVS_NAT_ATTR_DST] = {0, 0},
947 [OVS_NAT_ATTR_IP_MIN] = {sizeof(struct in_addr), 943 [OVS_NAT_ATTR_IP_MIN] = {sizeof(struct in_addr),
948 sizeof(struct in6_addr)}, 944 sizeof(struct in6_addr)},
949 [OVS_NAT_ATTR_IP_MAX] = {sizeof(struct in_addr), 945 [OVS_NAT_ATTR_IP_MAX] = {sizeof(struct in_addr),
950 sizeof(struct in6_addr)}, 946 sizeof(struct in6_addr)},
951 [OVS_NAT_ATTR_PROTO_MIN] = {sizeof(u16), sizeof(u16)}, 947 [OVS_NAT_ATTR_PROTO_MIN] = {sizeof(u16), sizeof(u16)},
952 [OVS_NAT_ATTR_PROTO_MAX] = {sizeof(u16), sizeof(u16)}, 948 [OVS_NAT_ATTR_PROTO_MAX] = {sizeof(u16), sizeof(u16)},
953 [OVS_NAT_ATTR_PERSISTENT] = {0, 0}, 949 [OVS_NAT_ATTR_PERSISTENT] = {0, 0},
954 [OVS_NAT_ATTR_PROTO_HASH] = {0, 0}, 950 [OVS_NAT_ATTR_PROTO_HASH] = {0, 0},
955 [OVS_NAT_ATTR_PROTO_RANDOM] = {0, 0}, 951 [OVS_NAT_ATTR_PROTO_RANDOM] = {0, 0},
956 }; 952 };
957 int type = nla_type(a); 953 int type = nla_type(a);
958 954
959 if (type > OVS_NAT_ATTR_MAX) { 955 if (type > OVS_NAT_ATTR_MAX) {
960 OVS_NLERR(log, 956 OVS_NLERR(log,
961 "Unknown NAT attribute (type=%d, max=%d).\n", 957 "Unknown NAT attribute (type=%d, max=%d).\n",
962 type, OVS_NAT_ATTR_MAX); 958 type, OVS_NAT_ATTR_MAX);
963 return -EINVAL; 959 return -EINVAL;
964 } 960 }
965 961
966 if (nla_len(a) != ovs_nat_attr_lens[type][ip_vers]) { 962 if (nla_len(a) != ovs_nat_attr_lens[type][ip_vers]) {
967 OVS_NLERR(log, 963 OVS_NLERR(log,
968 "NAT attribute type %d has unexpected length (%d != %d).\n", 964 "NAT attribute type %d has unexpected length (%d != %d).\n",
969 type, nla_len(a), 965 type, nla_len(a),
970 ovs_nat_attr_lens[type][ip_vers]); 966 ovs_nat_attr_lens[type][ip_vers]);
971 return -EINVAL; 967 return -EINVAL;
972 } 968 }
973 969
974 switch (type) { 970 switch (type) {
975 case OVS_NAT_ATTR_SRC: 971 case OVS_NAT_ATTR_SRC:
976 case OVS_NAT_ATTR_DST: 972 case OVS_NAT_ATTR_DST:
977 if (info->nat) { 973 if (info->nat) {
978 OVS_NLERR(log, 974 OVS_NLERR(log,
979 "Only one type of NAT may be specified.\n" 975 "Only one type of NAT may be specified.\n"
980 ); 976 );
981 return -ERANGE; 977 return -ERANGE;
982 } 978 }
983 info->nat |= OVS_CT_NAT; 979 info->nat |= OVS_CT_NAT;
984 info->nat |= ((type == OVS_NAT_ATTR_SRC) 980 info->nat |= ((type == OVS_NAT_ATTR_SRC)
985 ? OVS_CT_SRC_NAT : OVS_CT_DST_NAT); 981 ? OVS_CT_SRC_NAT : OVS_CT_DST_NAT);
986 break; 982 break;
987 983
988 case OVS_NAT_ATTR_IP_MIN: 984 case OVS_NAT_ATTR_IP_MIN:
989 nla_memcpy(&info->range.min_addr, a, 985 nla_memcpy(&info->range.min_addr, a,
990 sizeof(info->range.min_addr)); 986 sizeof(info->range.min_addr));
991 info->range.flags |= NF_NAT_RANGE_MAP_IPS; 987 info->range.flags |= NF_NAT_RANGE_MAP_IPS;
992 break; 988 break;
993 989
994 case OVS_NAT_ATTR_IP_MAX: 990 case OVS_NAT_ATTR_IP_MAX:
995 have_ip_max = true; 991 have_ip_max = true;
996 nla_memcpy(&info->range.max_addr, a, 992 nla_memcpy(&info->range.max_addr, a,
997 sizeof(info->range.max_addr)); 993 sizeof(info->range.max_addr));
998 info->range.flags |= NF_NAT_RANGE_MAP_IPS; 994 info->range.flags |= NF_NAT_RANGE_MAP_IPS;
999 break; 995 break;
1000 996
1001 case OVS_NAT_ATTR_PROTO_MIN: 997 case OVS_NAT_ATTR_PROTO_MIN:
1002 info->range.min_proto.all = htons(nla_get_u16(a)); 998 info->range.min_proto.all = htons(nla_get_u16(a));
1003 info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; 999 info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
1004 break; 1000 break;
1005 1001
1006 case OVS_NAT_ATTR_PROTO_MAX: 1002 case OVS_NAT_ATTR_PROTO_MAX:
1007 have_proto_max = true; 1003 have_proto_max = true;
1008 info->range.max_proto.all = htons(nla_get_u16(a)); 1004 info->range.max_proto.all = htons(nla_get_u16(a));
1009 info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; 1005 info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
1010 break; 1006 break;
1011 1007
1012 case OVS_NAT_ATTR_PERSISTENT: 1008 case OVS_NAT_ATTR_PERSISTENT:
1013 info->range.flags |= NF_NAT_RANGE_PERSISTENT; 1009 info->range.flags |= NF_NAT_RANGE_PERSISTENT;
1014 break; 1010 break;
1015 1011
1016 case OVS_NAT_ATTR_PROTO_HASH: 1012 case OVS_NAT_ATTR_PROTO_HASH:
1017 info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM; 1013 info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM;
1018 break; 1014 break;
1019 1015
1020 case OVS_NAT_ATTR_PROTO_RANDOM: 1016 case OVS_NAT_ATTR_PROTO_RANDOM:
1021 info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM_FULLY; 1017 info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM_FULLY;
1022 break; 1018 break;
1023 1019
1024 default: 1020 default:
1025 OVS_NLERR(log, "Unknown nat attribute (%d).\n", type); 1021 OVS_NLERR(log, "Unknown nat attribute (%d).\n", type);
1026 return -EINVAL; 1022 return -EINVAL;
1027 } 1023 }
1028 } 1024 }
1029 1025
1030 if (rem > 0) { 1026 if (rem > 0) {
1031 OVS_NLERR(log, "NAT attribute has %d unknown bytes.\n", rem); 1027 OVS_NLERR(log, "NAT attribute has %d unknown bytes.\n", rem);
1032 return -EINVAL; 1028 return -EINVAL;
1033 } 1029 }
1034 if (!info->nat) { 1030 if (!info->nat) {
1035 /* Do not allow flags if no type is given. */ 1031 /* Do not allow flags if no type is given. */
1036 if (info->range.flags) { 1032 if (info->range.flags) {
1037 OVS_NLERR(log, 1033 OVS_NLERR(log,
1038 "NAT flags may be given only when NAT range (SRC or DST) is also specified.\n" 1034 "NAT flags may be given only when NAT range (SRC or DST) is also specified.\n"
1039 ); 1035 );
1040 return -EINVAL; 1036 return -EINVAL;
1041 } 1037 }
1042 info->nat = OVS_CT_NAT; /* NAT existing connections. */ 1038 info->nat = OVS_CT_NAT; /* NAT existing connections. */
1043 } else if (!info->commit) { 1039 } else if (!info->commit) {
1044 OVS_NLERR(log, 1040 OVS_NLERR(log,
1045 "NAT attributes may be specified only when CT COMMIT flag is also specified.\n" 1041 "NAT attributes may be specified only when CT COMMIT flag is also specified.\n"
1046 ); 1042 );
1047 return -EINVAL; 1043 return -EINVAL;
1048 } 1044 }
1049 /* Allow missing IP_MAX. */ 1045 /* Allow missing IP_MAX. */
1050 if (info->range.flags & NF_NAT_RANGE_MAP_IPS && !have_ip_max) { 1046 if (info->range.flags & NF_NAT_RANGE_MAP_IPS && !have_ip_max) {
1051 memcpy(&info->range.max_addr, &info->range.min_addr, 1047 memcpy(&info->range.max_addr, &info->range.min_addr,
1052 sizeof(info->range.max_addr)); 1048 sizeof(info->range.max_addr));
1053 } 1049 }
1054 /* Allow missing PROTO_MAX. */ 1050 /* Allow missing PROTO_MAX. */
1055 if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED && 1051 if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED &&
1056 !have_proto_max) { 1052 !have_proto_max) {
1057 info->range.max_proto.all = info->range.min_proto.all; 1053 info->range.max_proto.all = info->range.min_proto.all;
1058 } 1054 }
1059 return 0; 1055 return 0;
1060 } 1056 }
1061 #endif 1057 #endif
1062 1058
1063 static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { 1059 static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
1064 [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 }, 1060 [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 },
1065 [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16), 1061 [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16),
1066 .maxlen = sizeof(u16) }, 1062 .maxlen = sizeof(u16) },
1067 [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark), 1063 [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark),
1068 .maxlen = sizeof(struct md_mark) }, 1064 .maxlen = sizeof(struct md_mark) },
1069 [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels), 1065 [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels),
1070 .maxlen = sizeof(struct md_labels) }, 1066 .maxlen = sizeof(struct md_labels) },
1071 [OVS_CT_ATTR_HELPER] = { .minlen = 1, 1067 [OVS_CT_ATTR_HELPER] = { .minlen = 1,
1072 .maxlen = NF_CT_HELPER_NAME_LEN }, 1068 .maxlen = NF_CT_HELPER_NAME_LEN },
1073 #ifdef CONFIG_NF_NAT_NEEDED 1069 #ifdef CONFIG_NF_NAT_NEEDED
1074 /* NAT length is checked when parsing the nested attributes. */ 1070 /* NAT length is checked when parsing the nested attributes. */
1075 [OVS_CT_ATTR_NAT] = { .minlen = 0, .maxlen = INT_MAX }, 1071 [OVS_CT_ATTR_NAT] = { .minlen = 0, .maxlen = INT_MAX },
1076 #endif 1072 #endif
1077 }; 1073 };
1078 1074
1079 static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, 1075 static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
1080 const char **helper, bool log) 1076 const char **helper, bool log)
1081 { 1077 {
1082 struct nlattr *a; 1078 struct nlattr *a;
1083 int rem; 1079 int rem;
1084 1080
1085 nla_for_each_nested(a, attr, rem) { 1081 nla_for_each_nested(a, attr, rem) {
1086 int type = nla_type(a); 1082 int type = nla_type(a);
1087 int maxlen = ovs_ct_attr_lens[type].maxlen; 1083 int maxlen = ovs_ct_attr_lens[type].maxlen;
1088 int minlen = ovs_ct_attr_lens[type].minlen; 1084 int minlen = ovs_ct_attr_lens[type].minlen;
1089 1085
1090 if (type > OVS_CT_ATTR_MAX) { 1086 if (type > OVS_CT_ATTR_MAX) {
1091 OVS_NLERR(log, 1087 OVS_NLERR(log,
1092 "Unknown conntrack attr (type=%d, max=%d)", 1088 "Unknown conntrack attr (type=%d, max=%d)",
1093 type, OVS_CT_ATTR_MAX); 1089 type, OVS_CT_ATTR_MAX);
1094 return -EINVAL; 1090 return -EINVAL;
1095 } 1091 }
1096 if (nla_len(a) < minlen || nla_len(a) > maxlen) { 1092 if (nla_len(a) < minlen || nla_len(a) > maxlen) {
1097 OVS_NLERR(log, 1093 OVS_NLERR(log,
1098 "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)", 1094 "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)",
1099 type, nla_len(a), maxlen); 1095 type, nla_len(a), maxlen);
1100 return -EINVAL; 1096 return -EINVAL;
1101 } 1097 }
1102 1098
1103 switch (type) { 1099 switch (type) {
1104 case OVS_CT_ATTR_COMMIT: 1100 case OVS_CT_ATTR_COMMIT:
1105 info->commit = true; 1101 info->commit = true;
1106 break; 1102 break;
1107 #ifdef CONFIG_NF_CONNTRACK_ZONES 1103 #ifdef CONFIG_NF_CONNTRACK_ZONES
1108 case OVS_CT_ATTR_ZONE: 1104 case OVS_CT_ATTR_ZONE:
1109 info->zone.id = nla_get_u16(a); 1105 info->zone.id = nla_get_u16(a);
1110 break; 1106 break;
1111 #endif 1107 #endif
1112 #ifdef CONFIG_NF_CONNTRACK_MARK 1108 #ifdef CONFIG_NF_CONNTRACK_MARK
1113 case OVS_CT_ATTR_MARK: { 1109 case OVS_CT_ATTR_MARK: {
1114 struct md_mark *mark = nla_data(a); 1110 struct md_mark *mark = nla_data(a);
1115 1111
1116 if (!mark->mask) { 1112 if (!mark->mask) {
1117 OVS_NLERR(log, "ct_mark mask cannot be 0"); 1113 OVS_NLERR(log, "ct_mark mask cannot be 0");
1118 return -EINVAL; 1114 return -EINVAL;
1119 } 1115 }
1120 info->mark = *mark; 1116 info->mark = *mark;
1121 break; 1117 break;
1122 } 1118 }
1123 #endif 1119 #endif
1124 #ifdef CONFIG_NF_CONNTRACK_LABELS 1120 #ifdef CONFIG_NF_CONNTRACK_LABELS
1125 case OVS_CT_ATTR_LABELS: { 1121 case OVS_CT_ATTR_LABELS: {
1126 struct md_labels *labels = nla_data(a); 1122 struct md_labels *labels = nla_data(a);
1127 1123
1128 if (!labels_nonzero(&labels->mask)) { 1124 if (!labels_nonzero(&labels->mask)) {
1129 OVS_NLERR(log, "ct_labels mask cannot be 0"); 1125 OVS_NLERR(log, "ct_labels mask cannot be 0");
1130 return -EINVAL; 1126 return -EINVAL;
1131 } 1127 }
1132 info->labels = *labels; 1128 info->labels = *labels;
1133 break; 1129 break;
1134 } 1130 }
1135 #endif 1131 #endif
1136 case OVS_CT_ATTR_HELPER: 1132 case OVS_CT_ATTR_HELPER:
1137 *helper = nla_data(a); 1133 *helper = nla_data(a);
1138 if (!memchr(*helper, '\0', nla_len(a))) { 1134 if (!memchr(*helper, '\0', nla_len(a))) {
1139 OVS_NLERR(log, "Invalid conntrack helper"); 1135 OVS_NLERR(log, "Invalid conntrack helper");
1140 return -EINVAL; 1136 return -EINVAL;
1141 } 1137 }
1142 break; 1138 break;
1143 #ifdef CONFIG_NF_NAT_NEEDED 1139 #ifdef CONFIG_NF_NAT_NEEDED
1144 case OVS_CT_ATTR_NAT: { 1140 case OVS_CT_ATTR_NAT: {
1145 int err = parse_nat(a, info, log); 1141 int err = parse_nat(a, info, log);
1146 1142
1147 if (err) 1143 if (err)
1148 return err; 1144 return err;
1149 break; 1145 break;
1150 } 1146 }
1151 #endif 1147 #endif
1152 default: 1148 default:
1153 OVS_NLERR(log, "Unknown conntrack attr (%d)", 1149 OVS_NLERR(log, "Unknown conntrack attr (%d)",
1154 type); 1150 type);
1155 return -EINVAL; 1151 return -EINVAL;
1156 } 1152 }
1157 } 1153 }
1158 1154
1159 #ifdef CONFIG_NF_CONNTRACK_MARK 1155 #ifdef CONFIG_NF_CONNTRACK_MARK
1160 if (!info->commit && info->mark.mask) { 1156 if (!info->commit && info->mark.mask) {
1161 OVS_NLERR(log, 1157 OVS_NLERR(log,
1162 "Setting conntrack mark requires 'commit' flag."); 1158 "Setting conntrack mark requires 'commit' flag.");
1163 return -EINVAL; 1159 return -EINVAL;
1164 } 1160 }
1165 #endif 1161 #endif
1166 #ifdef CONFIG_NF_CONNTRACK_LABELS 1162 #ifdef CONFIG_NF_CONNTRACK_LABELS
1167 if (!info->commit && labels_nonzero(&info->labels.mask)) { 1163 if (!info->commit && labels_nonzero(&info->labels.mask)) {
1168 OVS_NLERR(log, 1164 OVS_NLERR(log,
1169 "Setting conntrack labels requires 'commit' flag."); 1165 "Setting conntrack labels requires 'commit' flag.");
1170 return -EINVAL; 1166 return -EINVAL;
1171 } 1167 }
1172 #endif 1168 #endif
1173 if (rem > 0) { 1169 if (rem > 0) {
1174 OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem); 1170 OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem);
1175 return -EINVAL; 1171 return -EINVAL;
1176 } 1172 }
1177 1173
1178 return 0; 1174 return 0;
1179 } 1175 }
1180 1176
1181 bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr) 1177 bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
1182 { 1178 {
1183 if (attr == OVS_KEY_ATTR_CT_STATE) 1179 if (attr == OVS_KEY_ATTR_CT_STATE)
1184 return true; 1180 return true;
1185 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 1181 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
1186 attr == OVS_KEY_ATTR_CT_ZONE) 1182 attr == OVS_KEY_ATTR_CT_ZONE)
1187 return true; 1183 return true;
1188 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && 1184 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
1189 attr == OVS_KEY_ATTR_CT_MARK) 1185 attr == OVS_KEY_ATTR_CT_MARK)
1190 return true; 1186 return true;
1191 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 1187 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
1192 attr == OVS_KEY_ATTR_CT_LABELS) { 1188 attr == OVS_KEY_ATTR_CT_LABELS) {
1193 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 1189 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1194 1190
1195 return ovs_net->xt_label; 1191 return ovs_net->xt_label;
1196 } 1192 }
1197 1193
1198 return false; 1194 return false;
1199 } 1195 }
1200 1196
1201 int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, 1197 int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
1202 const struct sw_flow_key *key, 1198 const struct sw_flow_key *key,
1203 struct sw_flow_actions **sfa, bool log) 1199 struct sw_flow_actions **sfa, bool log)
1204 { 1200 {
1205 struct ovs_conntrack_info ct_info; 1201 struct ovs_conntrack_info ct_info;
1206 const char *helper = NULL; 1202 const char *helper = NULL;
1207 u16 family; 1203 u16 family;
1208 int err; 1204 int err;
1209 1205
1210 family = key_to_nfproto(key); 1206 family = key_to_nfproto(key);
1211 if (family == NFPROTO_UNSPEC) { 1207 if (family == NFPROTO_UNSPEC) {
1212 OVS_NLERR(log, "ct family unspecified"); 1208 OVS_NLERR(log, "ct family unspecified");
1213 return -EINVAL; 1209 return -EINVAL;
1214 } 1210 }
1215 1211
1216 memset(&ct_info, 0, sizeof(ct_info)); 1212 memset(&ct_info, 0, sizeof(ct_info));
1217 ct_info.family = family; 1213 ct_info.family = family;
1218 1214
1219 nf_ct_zone_init(&ct_info.zone, NF_CT_DEFAULT_ZONE_ID, 1215 nf_ct_zone_init(&ct_info.zone, NF_CT_DEFAULT_ZONE_ID,
1220 NF_CT_DEFAULT_ZONE_DIR, 0); 1216 NF_CT_DEFAULT_ZONE_DIR, 0);
1221 1217
1222 err = parse_ct(attr, &ct_info, &helper, log); 1218 err = parse_ct(attr, &ct_info, &helper, log);
1223 if (err) 1219 if (err)
1224 return err; 1220 return err;
1225 1221
1226 /* Set up template for tracking connections in specific zones. */ 1222 /* Set up template for tracking connections in specific zones. */
1227 ct_info.ct = nf_ct_tmpl_alloc(net, &ct_info.zone, GFP_KERNEL); 1223 ct_info.ct = nf_ct_tmpl_alloc(net, &ct_info.zone, GFP_KERNEL);
1228 if (!ct_info.ct) { 1224 if (!ct_info.ct) {
1229 OVS_NLERR(log, "Failed to allocate conntrack template"); 1225 OVS_NLERR(log, "Failed to allocate conntrack template");
1230 return -ENOMEM; 1226 return -ENOMEM;
1231 } 1227 }
1232 1228
1233 __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status); 1229 __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
1234 nf_conntrack_get(&ct_info.ct->ct_general); 1230 nf_conntrack_get(&ct_info.ct->ct_general);
1235 1231
1236 if (helper) { 1232 if (helper) {
1237 err = ovs_ct_add_helper(&ct_info, helper, key, log); 1233 err = ovs_ct_add_helper(&ct_info, helper, key, log);
1238 if (err) 1234 if (err)
1239 goto err_free_ct; 1235 goto err_free_ct;
1240 } 1236 }
1241 1237
1242 err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info, 1238 err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info,
1243 sizeof(ct_info), log); 1239 sizeof(ct_info), log);
1244 if (err) 1240 if (err)
1245 goto err_free_ct; 1241 goto err_free_ct;
1246 1242
1247 return 0; 1243 return 0;
1248 err_free_ct: 1244 err_free_ct:
1249 __ovs_ct_free_action(&ct_info); 1245 __ovs_ct_free_action(&ct_info);
1250 return err; 1246 return err;
1251 } 1247 }
1252 1248
1253 #ifdef CONFIG_NF_NAT_NEEDED 1249 #ifdef CONFIG_NF_NAT_NEEDED
1254 static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, 1250 static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info,
1255 struct sk_buff *skb) 1251 struct sk_buff *skb)
1256 { 1252 {
1257 struct nlattr *start; 1253 struct nlattr *start;
1258 1254
1259 start = nla_nest_start(skb, OVS_CT_ATTR_NAT); 1255 start = nla_nest_start(skb, OVS_CT_ATTR_NAT);
1260 if (!start) 1256 if (!start)
1261 return false; 1257 return false;
1262 1258
1263 if (info->nat & OVS_CT_SRC_NAT) { 1259 if (info->nat & OVS_CT_SRC_NAT) {
1264 if (nla_put_flag(skb, OVS_NAT_ATTR_SRC)) 1260 if (nla_put_flag(skb, OVS_NAT_ATTR_SRC))
1265 return false; 1261 return false;
1266 } else if (info->nat & OVS_CT_DST_NAT) { 1262 } else if (info->nat & OVS_CT_DST_NAT) {
1267 if (nla_put_flag(skb, OVS_NAT_ATTR_DST)) 1263 if (nla_put_flag(skb, OVS_NAT_ATTR_DST))
1268 return false; 1264 return false;
1269 } else { 1265 } else {
1270 goto out; 1266 goto out;
1271 } 1267 }
1272 1268
1273 if (info->range.flags & NF_NAT_RANGE_MAP_IPS) { 1269 if (info->range.flags & NF_NAT_RANGE_MAP_IPS) {
1274 if (IS_ENABLED(CONFIG_NF_NAT_IPV4) && 1270 if (IS_ENABLED(CONFIG_NF_NAT_IPV4) &&
1275 info->family == NFPROTO_IPV4) { 1271 info->family == NFPROTO_IPV4) {
1276 if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN, 1272 if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN,
1277 info->range.min_addr.ip) || 1273 info->range.min_addr.ip) ||
1278 (info->range.max_addr.ip 1274 (info->range.max_addr.ip
1279 != info->range.min_addr.ip && 1275 != info->range.min_addr.ip &&
1280 (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX, 1276 (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX,
1281 info->range.max_addr.ip)))) 1277 info->range.max_addr.ip))))
1282 return false; 1278 return false;
1283 } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) && 1279 } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) &&
1284 info->family == NFPROTO_IPV6) { 1280 info->family == NFPROTO_IPV6) {
1285 if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN, 1281 if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN,
1286 &info->range.min_addr.in6) || 1282 &info->range.min_addr.in6) ||
1287 (memcmp(&info->range.max_addr.in6, 1283 (memcmp(&info->range.max_addr.in6,
1288 &info->range.min_addr.in6, 1284 &info->range.min_addr.in6,
1289 sizeof(info->range.max_addr.in6)) && 1285 sizeof(info->range.max_addr.in6)) &&
1290 (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MAX, 1286 (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MAX,
1291 &info->range.max_addr.in6)))) 1287 &info->range.max_addr.in6))))
1292 return false; 1288 return false;
1293 } else { 1289 } else {
1294 return false; 1290 return false;
1295 } 1291 }
1296 } 1292 }
1297 if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED && 1293 if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED &&
1298 (nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MIN, 1294 (nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MIN,
1299 ntohs(info->range.min_proto.all)) || 1295 ntohs(info->range.min_proto.all)) ||
1300 (info->range.max_proto.all != info->range.min_proto.all && 1296 (info->range.max_proto.all != info->range.min_proto.all &&
1301 nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MAX, 1297 nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MAX,
1302 ntohs(info->range.max_proto.all))))) 1298 ntohs(info->range.max_proto.all)))))
1303 return false; 1299 return false;
1304 1300
1305 if (info->range.flags & NF_NAT_RANGE_PERSISTENT && 1301 if (info->range.flags & NF_NAT_RANGE_PERSISTENT &&
1306 nla_put_flag(skb, OVS_NAT_ATTR_PERSISTENT)) 1302 nla_put_flag(skb, OVS_NAT_ATTR_PERSISTENT))
1307 return false; 1303 return false;
1308 if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM && 1304 if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM &&
1309 nla_put_flag(skb, OVS_NAT_ATTR_PROTO_HASH)) 1305 nla_put_flag(skb, OVS_NAT_ATTR_PROTO_HASH))
1310 return false; 1306 return false;
1311 if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY && 1307 if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY &&
1312 nla_put_flag(skb, OVS_NAT_ATTR_PROTO_RANDOM)) 1308 nla_put_flag(skb, OVS_NAT_ATTR_PROTO_RANDOM))
1313 return false; 1309 return false;
1314 out: 1310 out:
1315 nla_nest_end(skb, start); 1311 nla_nest_end(skb, start);
1316 1312
1317 return true; 1313 return true;
1318 } 1314 }
1319 #endif 1315 #endif
1320 1316
1321 int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, 1317 int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
1322 struct sk_buff *skb) 1318 struct sk_buff *skb)
1323 { 1319 {
1324 struct nlattr *start; 1320 struct nlattr *start;
1325 1321
1326 start = nla_nest_start(skb, OVS_ACTION_ATTR_CT); 1322 start = nla_nest_start(skb, OVS_ACTION_ATTR_CT);
1327 if (!start) 1323 if (!start)
1328 return -EMSGSIZE; 1324 return -EMSGSIZE;
1329 1325
1330 if (ct_info->commit && nla_put_flag(skb, OVS_CT_ATTR_COMMIT)) 1326 if (ct_info->commit && nla_put_flag(skb, OVS_CT_ATTR_COMMIT))
1331 return -EMSGSIZE; 1327 return -EMSGSIZE;
1332 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && 1328 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
1333 nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id)) 1329 nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
1334 return -EMSGSIZE; 1330 return -EMSGSIZE;
1335 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask && 1331 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask &&
1336 nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark), 1332 nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark),
1337 &ct_info->mark)) 1333 &ct_info->mark))
1338 return -EMSGSIZE; 1334 return -EMSGSIZE;
1339 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && 1335 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
1340 labels_nonzero(&ct_info->labels.mask) && 1336 labels_nonzero(&ct_info->labels.mask) &&
1341 nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels), 1337 nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels),
1342 &ct_info->labels)) 1338 &ct_info->labels))
1343 return -EMSGSIZE; 1339 return -EMSGSIZE;
1344 if (ct_info->helper) { 1340 if (ct_info->helper) {
1345 if (nla_put_string(skb, OVS_CT_ATTR_HELPER, 1341 if (nla_put_string(skb, OVS_CT_ATTR_HELPER,
1346 ct_info->helper->name)) 1342 ct_info->helper->name))
1347 return -EMSGSIZE; 1343 return -EMSGSIZE;
1348 } 1344 }
1349 #ifdef CONFIG_NF_NAT_NEEDED 1345 #ifdef CONFIG_NF_NAT_NEEDED
1350 if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb)) 1346 if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb))
1351 return -EMSGSIZE; 1347 return -EMSGSIZE;
1352 #endif 1348 #endif
1353 nla_nest_end(skb, start); 1349 nla_nest_end(skb, start);
1354 1350
1355 return 0; 1351 return 0;
1356 } 1352 }
1357 1353
1358 void ovs_ct_free_action(const struct nlattr *a) 1354 void ovs_ct_free_action(const struct nlattr *a)
1359 { 1355 {
1360 struct ovs_conntrack_info *ct_info = nla_data(a); 1356 struct ovs_conntrack_info *ct_info = nla_data(a);
1361 1357
1362 __ovs_ct_free_action(ct_info); 1358 __ovs_ct_free_action(ct_info);
1363 } 1359 }
1364 1360
1365 static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info) 1361 static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
1366 { 1362 {
1367 if (ct_info->helper) 1363 if (ct_info->helper)
1368 module_put(ct_info->helper->me); 1364 module_put(ct_info->helper->me);
1369 if (ct_info->ct) 1365 if (ct_info->ct)
1370 nf_ct_tmpl_free(ct_info->ct); 1366 nf_ct_tmpl_free(ct_info->ct);
1371 } 1367 }
1372 1368
1373 void ovs_ct_init(struct net *net) 1369 void ovs_ct_init(struct net *net)
1374 { 1370 {
1375 unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE; 1371 unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
1376 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 1372 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1377 1373
1378 if (nf_connlabels_get(net, n_bits - 1)) { 1374 if (nf_connlabels_get(net, n_bits - 1)) {
1379 ovs_net->xt_label = false; 1375 ovs_net->xt_label = false;
1380 OVS_NLERR(true, "Failed to set connlabel length"); 1376 OVS_NLERR(true, "Failed to set connlabel length");
1381 } else { 1377 } else {
1382 ovs_net->xt_label = true; 1378 ovs_net->xt_label = true;
1383 } 1379 }
1384 } 1380 }
1385 1381
1386 void ovs_ct_exit(struct net *net) 1382 void ovs_ct_exit(struct net *net)
1387 { 1383 {
1388 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 1384 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1389 1385
1390 if (ovs_net->xt_label) 1386 if (ovs_net->xt_label)
1391 nf_connlabels_put(net); 1387 nf_connlabels_put(net);
1392 } 1388 }
1393 1389