Commit ac196f8c92948deb0fc9ae617f3a453c6d71fa69
Exists in
smarc-l5.0.0_1.0.0-ga
and in
5 other branches
Merge branch 'master' of git://1984.lsi.us.es/nf
Pablo Neira Ayuso says: ==================== The following batch contains Netfilter fixes for 3.8-rc1. They are a mixture of old bugs that have passed unnoticed (I'll pass these to stable) and more fresh ones from the previous merge window, they are: * Fix for MAC address in 6in4 tunnels via NFLOG that results in ulogd showing up wrong address, from Bob Hockney. * Fix a comment in nf_conntrack_ipv6, from Florent Fourcot. * Fix a leak an error path in ctnetlink while creating an expectation, from Jesper Juhl. * Fix missing ICMP time exceeded in the IPv6 defragmentation code, from Haibo Xi. * Fix inconsistent handling of routing changes in MASQUERADE for the new connections case, from Andrew Collins. * Fix a missing skb_reset_transport in ip[6]t_REJECT that leads to crashes in the ixgbe driver (since it seems to access the transport header with TSO enabled), from Mukund Jampala. * Recover obsoleted NOTRACK target by including it into the CT and spot a warning via printk about being obsoleted. Many people don't check the scheduled to be removal file under Documentation, so we follow some less agressive approach to kill this in a year or so. Spotted by Florian Westphal, patch from myself. * Fix race condition in xt_hashlimit that allows to create two or more entries, from myself. * Fix crash if the CT is used due to the recently added facilities to consult the dying and unconfirmed conntrack lists, from myself. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Showing 15 changed files Side-by-side Diff
- include/net/netns/conntrack.h
- include/net/netns/x_tables.h
- net/ipv4/netfilter/ipt_REJECT.c
- net/ipv4/netfilter/iptable_nat.c
- net/ipv6/netfilter/ip6t_REJECT.c
- net/ipv6/netfilter/ip6table_nat.c
- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
- net/ipv6/netfilter/nf_conntrack_reasm.c
- net/netfilter/Kconfig
- net/netfilter/nf_conntrack_core.c
- net/netfilter/nf_conntrack_netlink.c
- net/netfilter/nfnetlink_log.c
- net/netfilter/xt_CT.c
- net/netfilter/xt_hashlimit.c
- net/netfilter/xt_recent.c
include/net/netns/conntrack.h
... | ... | @@ -71,6 +71,7 @@ |
71 | 71 | struct hlist_head *expect_hash; |
72 | 72 | struct hlist_nulls_head unconfirmed; |
73 | 73 | struct hlist_nulls_head dying; |
74 | + struct hlist_nulls_head tmpl; | |
74 | 75 | struct ip_conntrack_stat __percpu *stat; |
75 | 76 | struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; |
76 | 77 | struct nf_exp_event_notifier __rcu *nf_expect_event_cb; |
include/net/netns/x_tables.h
net/ipv4/netfilter/ipt_REJECT.c
net/ipv4/netfilter/iptable_nat.c
... | ... | @@ -124,23 +124,28 @@ |
124 | 124 | ret = nf_nat_rule_find(skb, hooknum, in, out, ct); |
125 | 125 | if (ret != NF_ACCEPT) |
126 | 126 | return ret; |
127 | - } else | |
127 | + } else { | |
128 | 128 | pr_debug("Already setup manip %s for ct %p\n", |
129 | 129 | maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", |
130 | 130 | ct); |
131 | + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) | |
132 | + goto oif_changed; | |
133 | + } | |
131 | 134 | break; |
132 | 135 | |
133 | 136 | default: |
134 | 137 | /* ESTABLISHED */ |
135 | 138 | NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || |
136 | 139 | ctinfo == IP_CT_ESTABLISHED_REPLY); |
137 | - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) { | |
138 | - nf_ct_kill_acct(ct, ctinfo, skb); | |
139 | - return NF_DROP; | |
140 | - } | |
140 | + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) | |
141 | + goto oif_changed; | |
141 | 142 | } |
142 | 143 | |
143 | 144 | return nf_nat_packet(ct, ctinfo, hooknum, skb); |
145 | + | |
146 | +oif_changed: | |
147 | + nf_ct_kill_acct(ct, ctinfo, skb); | |
148 | + return NF_DROP; | |
144 | 149 | } |
145 | 150 | |
146 | 151 | static unsigned int |
net/ipv6/netfilter/ip6t_REJECT.c
... | ... | @@ -132,6 +132,7 @@ |
132 | 132 | ip6h->saddr = oip6h->daddr; |
133 | 133 | ip6h->daddr = oip6h->saddr; |
134 | 134 | |
135 | + skb_reset_transport_header(nskb); | |
135 | 136 | tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); |
136 | 137 | /* Truncate to length (no data) */ |
137 | 138 | tcph->doff = sizeof(struct tcphdr)/4; |
net/ipv6/netfilter/ip6table_nat.c
... | ... | @@ -127,23 +127,28 @@ |
127 | 127 | ret = nf_nat_rule_find(skb, hooknum, in, out, ct); |
128 | 128 | if (ret != NF_ACCEPT) |
129 | 129 | return ret; |
130 | - } else | |
130 | + } else { | |
131 | 131 | pr_debug("Already setup manip %s for ct %p\n", |
132 | 132 | maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", |
133 | 133 | ct); |
134 | + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) | |
135 | + goto oif_changed; | |
136 | + } | |
134 | 137 | break; |
135 | 138 | |
136 | 139 | default: |
137 | 140 | /* ESTABLISHED */ |
138 | 141 | NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || |
139 | 142 | ctinfo == IP_CT_ESTABLISHED_REPLY); |
140 | - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) { | |
141 | - nf_ct_kill_acct(ct, ctinfo, skb); | |
142 | - return NF_DROP; | |
143 | - } | |
143 | + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) | |
144 | + goto oif_changed; | |
144 | 145 | } |
145 | 146 | |
146 | 147 | return nf_nat_packet(ct, ctinfo, hooknum, skb); |
148 | + | |
149 | +oif_changed: | |
150 | + nf_ct_kill_acct(ct, ctinfo, skb); | |
151 | + return NF_DROP; | |
147 | 152 | } |
148 | 153 | |
149 | 154 | static unsigned int |
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
... | ... | @@ -81,8 +81,8 @@ |
81 | 81 | } |
82 | 82 | protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); |
83 | 83 | /* |
84 | - * (protoff == skb->len) mean that the packet doesn't have no data | |
85 | - * except of IPv6 & ext headers. but it's tracked anyway. - YK | |
84 | + * (protoff == skb->len) means the packet has not data, just | |
85 | + * IPv6 and possibly extensions headers, but it is tracked anyway | |
86 | 86 | */ |
87 | 87 | if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { |
88 | 88 | pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); |
net/ipv6/netfilter/nf_conntrack_reasm.c
... | ... | @@ -311,7 +311,10 @@ |
311 | 311 | else |
312 | 312 | fq->q.fragments = skb; |
313 | 313 | |
314 | - skb->dev = NULL; | |
314 | + if (skb->dev) { | |
315 | + fq->iif = skb->dev->ifindex; | |
316 | + skb->dev = NULL; | |
317 | + } | |
315 | 318 | fq->q.stamp = skb->tstamp; |
316 | 319 | fq->q.meat += skb->len; |
317 | 320 | if (payload_len > fq->q.max_size) |
net/netfilter/Kconfig
... | ... | @@ -680,6 +680,10 @@ |
680 | 680 | |
681 | 681 | To compile it as a module, choose M here. If unsure, say N. |
682 | 682 | |
683 | +config NETFILTER_XT_TARGET_NOTRACK | |
684 | + tristate '"NOTRACK" target support (DEPRECATED)' | |
685 | + select NETFILTER_XT_TARGET_CT | |
686 | + | |
683 | 687 | config NETFILTER_XT_TARGET_RATEEST |
684 | 688 | tristate '"RATEEST" target support' |
685 | 689 | depends on NETFILTER_ADVANCED |
net/netfilter/nf_conntrack_core.c
... | ... | @@ -1526,6 +1526,7 @@ |
1526 | 1526 | */ |
1527 | 1527 | #define UNCONFIRMED_NULLS_VAL ((1<<30)+0) |
1528 | 1528 | #define DYING_NULLS_VAL ((1<<30)+1) |
1529 | +#define TEMPLATE_NULLS_VAL ((1<<30)+2) | |
1529 | 1530 | |
1530 | 1531 | static int nf_conntrack_init_net(struct net *net) |
1531 | 1532 | { |
... | ... | @@ -1534,6 +1535,7 @@ |
1534 | 1535 | atomic_set(&net->ct.count, 0); |
1535 | 1536 | INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL); |
1536 | 1537 | INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL); |
1538 | + INIT_HLIST_NULLS_HEAD(&net->ct.tmpl, TEMPLATE_NULLS_VAL); | |
1537 | 1539 | net->ct.stat = alloc_percpu(struct ip_conntrack_stat); |
1538 | 1540 | if (!net->ct.stat) { |
1539 | 1541 | ret = -ENOMEM; |
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nfnetlink_log.c
... | ... | @@ -13,6 +13,7 @@ |
13 | 13 | */ |
14 | 14 | #include <linux/module.h> |
15 | 15 | #include <linux/skbuff.h> |
16 | +#include <linux/if_arp.h> | |
16 | 17 | #include <linux/init.h> |
17 | 18 | #include <linux/ip.h> |
18 | 19 | #include <linux/ipv6.h> |
... | ... | @@ -384,6 +385,7 @@ |
384 | 385 | struct nfgenmsg *nfmsg; |
385 | 386 | sk_buff_data_t old_tail = inst->skb->tail; |
386 | 387 | struct sock *sk; |
388 | + const unsigned char *hwhdrp; | |
387 | 389 | |
388 | 390 | nlh = nlmsg_put(inst->skb, 0, 0, |
389 | 391 | NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET, |
... | ... | @@ -485,9 +487,17 @@ |
485 | 487 | if (indev && skb_mac_header_was_set(skb)) { |
486 | 488 | if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) || |
487 | 489 | nla_put_be16(inst->skb, NFULA_HWLEN, |
488 | - htons(skb->dev->hard_header_len)) || | |
489 | - nla_put(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len, | |
490 | - skb_mac_header(skb))) | |
490 | + htons(skb->dev->hard_header_len))) | |
491 | + goto nla_put_failure; | |
492 | + | |
493 | + hwhdrp = skb_mac_header(skb); | |
494 | + | |
495 | + if (skb->dev->type == ARPHRD_SIT) | |
496 | + hwhdrp -= ETH_HLEN; | |
497 | + | |
498 | + if (hwhdrp >= skb->head && | |
499 | + nla_put(inst->skb, NFULA_HWHEADER, | |
500 | + skb->dev->hard_header_len, hwhdrp)) | |
491 | 501 | goto nla_put_failure; |
492 | 502 | } |
493 | 503 |
net/netfilter/xt_CT.c
... | ... | @@ -149,6 +149,10 @@ |
149 | 149 | |
150 | 150 | __set_bit(IPS_TEMPLATE_BIT, &ct->status); |
151 | 151 | __set_bit(IPS_CONFIRMED_BIT, &ct->status); |
152 | + | |
153 | + /* Overload tuple linked list to put us in template list. */ | |
154 | + hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, | |
155 | + &par->net->ct.tmpl); | |
152 | 156 | out: |
153 | 157 | info->ct = ct; |
154 | 158 | return 0; |
... | ... | @@ -289,6 +293,10 @@ |
289 | 293 | |
290 | 294 | __set_bit(IPS_TEMPLATE_BIT, &ct->status); |
291 | 295 | __set_bit(IPS_CONFIRMED_BIT, &ct->status); |
296 | + | |
297 | + /* Overload tuple linked list to put us in template list. */ | |
298 | + hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, | |
299 | + &par->net->ct.tmpl); | |
292 | 300 | out: |
293 | 301 | info->ct = ct; |
294 | 302 | return 0; |
295 | 303 | |
296 | 304 | |
... | ... | @@ -377,14 +385,60 @@ |
377 | 385 | }, |
378 | 386 | }; |
379 | 387 | |
388 | +static unsigned int | |
389 | +notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) | |
390 | +{ | |
391 | + /* Previously seen (loopback)? Ignore. */ | |
392 | + if (skb->nfct != NULL) | |
393 | + return XT_CONTINUE; | |
394 | + | |
395 | + skb->nfct = &nf_ct_untracked_get()->ct_general; | |
396 | + skb->nfctinfo = IP_CT_NEW; | |
397 | + nf_conntrack_get(skb->nfct); | |
398 | + | |
399 | + return XT_CONTINUE; | |
400 | +} | |
401 | + | |
402 | +static int notrack_chk(const struct xt_tgchk_param *par) | |
403 | +{ | |
404 | + if (!par->net->xt.notrack_deprecated_warning) { | |
405 | + pr_info("netfilter: NOTRACK target is deprecated, " | |
406 | + "use CT instead or upgrade iptables\n"); | |
407 | + par->net->xt.notrack_deprecated_warning = true; | |
408 | + } | |
409 | + return 0; | |
410 | +} | |
411 | + | |
412 | +static struct xt_target notrack_tg_reg __read_mostly = { | |
413 | + .name = "NOTRACK", | |
414 | + .revision = 0, | |
415 | + .family = NFPROTO_UNSPEC, | |
416 | + .checkentry = notrack_chk, | |
417 | + .target = notrack_tg, | |
418 | + .table = "raw", | |
419 | + .me = THIS_MODULE, | |
420 | +}; | |
421 | + | |
380 | 422 | static int __init xt_ct_tg_init(void) |
381 | 423 | { |
382 | - return xt_register_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg)); | |
424 | + int ret; | |
425 | + | |
426 | + ret = xt_register_target(¬rack_tg_reg); | |
427 | + if (ret < 0) | |
428 | + return ret; | |
429 | + | |
430 | + ret = xt_register_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg)); | |
431 | + if (ret < 0) { | |
432 | + xt_unregister_target(¬rack_tg_reg); | |
433 | + return ret; | |
434 | + } | |
435 | + return 0; | |
383 | 436 | } |
384 | 437 | |
385 | 438 | static void __exit xt_ct_tg_exit(void) |
386 | 439 | { |
387 | 440 | xt_unregister_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg)); |
441 | + xt_unregister_target(¬rack_tg_reg); | |
388 | 442 | } |
389 | 443 | |
390 | 444 | module_init(xt_ct_tg_init); |
... | ... | @@ -394,4 +448,6 @@ |
394 | 448 | MODULE_DESCRIPTION("Xtables: connection tracking target"); |
395 | 449 | MODULE_ALIAS("ipt_CT"); |
396 | 450 | MODULE_ALIAS("ip6t_CT"); |
451 | +MODULE_ALIAS("ipt_NOTRACK"); | |
452 | +MODULE_ALIAS("ip6t_NOTRACK"); |
net/netfilter/xt_hashlimit.c
... | ... | @@ -157,11 +157,22 @@ |
157 | 157 | /* allocate dsthash_ent, initialize dst, put in htable and lock it */ |
158 | 158 | static struct dsthash_ent * |
159 | 159 | dsthash_alloc_init(struct xt_hashlimit_htable *ht, |
160 | - const struct dsthash_dst *dst) | |
160 | + const struct dsthash_dst *dst, bool *race) | |
161 | 161 | { |
162 | 162 | struct dsthash_ent *ent; |
163 | 163 | |
164 | 164 | spin_lock(&ht->lock); |
165 | + | |
166 | + /* Two or more packets may race to create the same entry in the | |
167 | + * hashtable, double check if this packet lost race. | |
168 | + */ | |
169 | + ent = dsthash_find(ht, dst); | |
170 | + if (ent != NULL) { | |
171 | + spin_unlock(&ht->lock); | |
172 | + *race = true; | |
173 | + return ent; | |
174 | + } | |
175 | + | |
165 | 176 | /* initialize hash with random val at the time we allocate |
166 | 177 | * the first hashtable entry */ |
167 | 178 | if (unlikely(!ht->rnd_initialized)) { |
... | ... | @@ -318,7 +329,10 @@ |
318 | 329 | parent = hashlimit_net->ipt_hashlimit; |
319 | 330 | else |
320 | 331 | parent = hashlimit_net->ip6t_hashlimit; |
321 | - remove_proc_entry(hinfo->pde->name, parent); | |
332 | + | |
333 | + if(parent != NULL) | |
334 | + remove_proc_entry(hinfo->pde->name, parent); | |
335 | + | |
322 | 336 | htable_selective_cleanup(hinfo, select_all); |
323 | 337 | vfree(hinfo); |
324 | 338 | } |
... | ... | @@ -585,6 +599,7 @@ |
585 | 599 | unsigned long now = jiffies; |
586 | 600 | struct dsthash_ent *dh; |
587 | 601 | struct dsthash_dst dst; |
602 | + bool race = false; | |
588 | 603 | u32 cost; |
589 | 604 | |
590 | 605 | if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) |
591 | 606 | |
592 | 607 | |
... | ... | @@ -593,13 +608,18 @@ |
593 | 608 | rcu_read_lock_bh(); |
594 | 609 | dh = dsthash_find(hinfo, &dst); |
595 | 610 | if (dh == NULL) { |
596 | - dh = dsthash_alloc_init(hinfo, &dst); | |
611 | + dh = dsthash_alloc_init(hinfo, &dst, &race); | |
597 | 612 | if (dh == NULL) { |
598 | 613 | rcu_read_unlock_bh(); |
599 | 614 | goto hotdrop; |
615 | + } else if (race) { | |
616 | + /* Already got an entry, update expiration timeout */ | |
617 | + dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); | |
618 | + rateinfo_recalc(dh, now, hinfo->cfg.mode); | |
619 | + } else { | |
620 | + dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); | |
621 | + rateinfo_init(dh, hinfo); | |
600 | 622 | } |
601 | - dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); | |
602 | - rateinfo_init(dh, hinfo); | |
603 | 623 | } else { |
604 | 624 | /* update expiration timeout */ |
605 | 625 | dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); |
... | ... | @@ -856,6 +876,27 @@ |
856 | 876 | |
857 | 877 | static void __net_exit hashlimit_proc_net_exit(struct net *net) |
858 | 878 | { |
879 | + struct xt_hashlimit_htable *hinfo; | |
880 | + struct hlist_node *pos; | |
881 | + struct proc_dir_entry *pde; | |
882 | + struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); | |
883 | + | |
884 | + /* recent_net_exit() is called before recent_mt_destroy(). Make sure | |
885 | + * that the parent xt_recent proc entry is is empty before trying to | |
886 | + * remove it. | |
887 | + */ | |
888 | + mutex_lock(&hashlimit_mutex); | |
889 | + pde = hashlimit_net->ipt_hashlimit; | |
890 | + if (pde == NULL) | |
891 | + pde = hashlimit_net->ip6t_hashlimit; | |
892 | + | |
893 | + hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node) | |
894 | + remove_proc_entry(hinfo->pde->name, pde); | |
895 | + | |
896 | + hashlimit_net->ipt_hashlimit = NULL; | |
897 | + hashlimit_net->ip6t_hashlimit = NULL; | |
898 | + mutex_unlock(&hashlimit_mutex); | |
899 | + | |
859 | 900 | proc_net_remove(net, "ipt_hashlimit"); |
860 | 901 | #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) |
861 | 902 | proc_net_remove(net, "ip6t_hashlimit"); |
... | ... | @@ -872,9 +913,6 @@ |
872 | 913 | |
873 | 914 | static void __net_exit hashlimit_net_exit(struct net *net) |
874 | 915 | { |
875 | - struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); | |
876 | - | |
877 | - BUG_ON(!hlist_empty(&hashlimit_net->htables)); | |
878 | 916 | hashlimit_proc_net_exit(net); |
879 | 917 | } |
880 | 918 |
net/netfilter/xt_recent.c
... | ... | @@ -431,7 +431,8 @@ |
431 | 431 | list_del(&t->list); |
432 | 432 | spin_unlock_bh(&recent_lock); |
433 | 433 | #ifdef CONFIG_PROC_FS |
434 | - remove_proc_entry(t->name, recent_net->xt_recent); | |
434 | + if (recent_net->xt_recent != NULL) | |
435 | + remove_proc_entry(t->name, recent_net->xt_recent); | |
435 | 436 | #endif |
436 | 437 | recent_table_flush(t); |
437 | 438 | kfree(t); |
... | ... | @@ -615,6 +616,20 @@ |
615 | 616 | |
616 | 617 | static void __net_exit recent_proc_net_exit(struct net *net) |
617 | 618 | { |
619 | + struct recent_net *recent_net = recent_pernet(net); | |
620 | + struct recent_table *t; | |
621 | + | |
622 | + /* recent_net_exit() is called before recent_mt_destroy(). Make sure | |
623 | + * that the parent xt_recent proc entry is is empty before trying to | |
624 | + * remove it. | |
625 | + */ | |
626 | + spin_lock_bh(&recent_lock); | |
627 | + list_for_each_entry(t, &recent_net->tables, list) | |
628 | + remove_proc_entry(t->name, recent_net->xt_recent); | |
629 | + | |
630 | + recent_net->xt_recent = NULL; | |
631 | + spin_unlock_bh(&recent_lock); | |
632 | + | |
618 | 633 | proc_net_remove(net, "xt_recent"); |
619 | 634 | } |
620 | 635 | #else |
... | ... | @@ -638,9 +653,6 @@ |
638 | 653 | |
639 | 654 | static void __net_exit recent_net_exit(struct net *net) |
640 | 655 | { |
641 | - struct recent_net *recent_net = recent_pernet(net); | |
642 | - | |
643 | - BUG_ON(!list_empty(&recent_net->tables)); | |
644 | 656 | recent_proc_net_exit(net); |
645 | 657 | } |
646 | 658 |