Commit ac196f8c92948deb0fc9ae617f3a453c6d71fa69

Authored by David S. Miller

Merge branch 'master' of git://1984.lsi.us.es/nf

Pablo Neira Ayuso says:

====================
The following batch contains Netfilter fixes for 3.8-rc1. They are
a mixture of old bugs that have passed unnoticed (I'll pass these to
stable) and more fresh ones from the previous merge window, they are:

* Fix for MAC address in 6in4 tunnels via NFLOG that results in ulogd
  showing up wrong address, from Bob Hockney.

* Fix a comment in nf_conntrack_ipv6, from Florent Fourcot.

* Fix a leak an error path in ctnetlink while creating an expectation,
  from Jesper Juhl.

* Fix missing ICMP time exceeded in the IPv6 defragmentation code, from
  Haibo Xi.

* Fix inconsistent handling of routing changes in MASQUERADE for the
  new connections case, from Andrew Collins.

* Fix a missing skb_reset_transport in ip[6]t_REJECT that leads to
  crashes in the ixgbe driver (since it seems to access the transport
  header with TSO enabled), from Mukund Jampala.

* Recover obsoleted NOTRACK target by including it into the CT and spot
  a warning via printk about being obsoleted. Many people don't check the
  scheduled to be removal file under Documentation, so we follow some
  less agressive approach to kill this in a year or so. Spotted by Florian
  Westphal, patch from myself.

* Fix race condition in xt_hashlimit that allows to create two or more
  entries, from myself.

* Fix crash if the CT is used due to the recently added facilities to
  consult the dying and unconfirmed conntrack lists, from myself.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

Showing 15 changed files Side-by-side Diff

include/net/netns/conntrack.h
... ... @@ -71,6 +71,7 @@
71 71 struct hlist_head *expect_hash;
72 72 struct hlist_nulls_head unconfirmed;
73 73 struct hlist_nulls_head dying;
  74 + struct hlist_nulls_head tmpl;
74 75 struct ip_conntrack_stat __percpu *stat;
75 76 struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
76 77 struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
include/net/netns/x_tables.h
... ... @@ -8,6 +8,7 @@
8 8  
9 9 struct netns_xt {
10 10 struct list_head tables[NFPROTO_NUMPROTO];
  11 + bool notrack_deprecated_warning;
11 12 #if defined(CONFIG_BRIDGE_NF_EBTABLES) || \
12 13 defined(CONFIG_BRIDGE_NF_EBTABLES_MODULE)
13 14 struct ebt_table *broute_table;
net/ipv4/netfilter/ipt_REJECT.c
... ... @@ -81,6 +81,7 @@
81 81 niph->saddr = oiph->daddr;
82 82 niph->daddr = oiph->saddr;
83 83  
  84 + skb_reset_transport_header(nskb);
84 85 tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
85 86 memset(tcph, 0, sizeof(*tcph));
86 87 tcph->source = oth->dest;
net/ipv4/netfilter/iptable_nat.c
... ... @@ -124,23 +124,28 @@
124 124 ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
125 125 if (ret != NF_ACCEPT)
126 126 return ret;
127   - } else
  127 + } else {
128 128 pr_debug("Already setup manip %s for ct %p\n",
129 129 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
130 130 ct);
  131 + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
  132 + goto oif_changed;
  133 + }
131 134 break;
132 135  
133 136 default:
134 137 /* ESTABLISHED */
135 138 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
136 139 ctinfo == IP_CT_ESTABLISHED_REPLY);
137   - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) {
138   - nf_ct_kill_acct(ct, ctinfo, skb);
139   - return NF_DROP;
140   - }
  140 + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
  141 + goto oif_changed;
141 142 }
142 143  
143 144 return nf_nat_packet(ct, ctinfo, hooknum, skb);
  145 +
  146 +oif_changed:
  147 + nf_ct_kill_acct(ct, ctinfo, skb);
  148 + return NF_DROP;
144 149 }
145 150  
146 151 static unsigned int
net/ipv6/netfilter/ip6t_REJECT.c
... ... @@ -132,6 +132,7 @@
132 132 ip6h->saddr = oip6h->daddr;
133 133 ip6h->daddr = oip6h->saddr;
134 134  
  135 + skb_reset_transport_header(nskb);
135 136 tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
136 137 /* Truncate to length (no data) */
137 138 tcph->doff = sizeof(struct tcphdr)/4;
net/ipv6/netfilter/ip6table_nat.c
... ... @@ -127,23 +127,28 @@
127 127 ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
128 128 if (ret != NF_ACCEPT)
129 129 return ret;
130   - } else
  130 + } else {
131 131 pr_debug("Already setup manip %s for ct %p\n",
132 132 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
133 133 ct);
  134 + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
  135 + goto oif_changed;
  136 + }
134 137 break;
135 138  
136 139 default:
137 140 /* ESTABLISHED */
138 141 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
139 142 ctinfo == IP_CT_ESTABLISHED_REPLY);
140   - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) {
141   - nf_ct_kill_acct(ct, ctinfo, skb);
142   - return NF_DROP;
143   - }
  143 + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
  144 + goto oif_changed;
144 145 }
145 146  
146 147 return nf_nat_packet(ct, ctinfo, hooknum, skb);
  148 +
  149 +oif_changed:
  150 + nf_ct_kill_acct(ct, ctinfo, skb);
  151 + return NF_DROP;
147 152 }
148 153  
149 154 static unsigned int
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
... ... @@ -81,8 +81,8 @@
81 81 }
82 82 protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
83 83 /*
84   - * (protoff == skb->len) mean that the packet doesn't have no data
85   - * except of IPv6 & ext headers. but it's tracked anyway. - YK
  84 + * (protoff == skb->len) means the packet has not data, just
  85 + * IPv6 and possibly extensions headers, but it is tracked anyway
86 86 */
87 87 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
88 88 pr_debug("ip6_conntrack_core: can't find proto in pkt\n");
net/ipv6/netfilter/nf_conntrack_reasm.c
... ... @@ -311,7 +311,10 @@
311 311 else
312 312 fq->q.fragments = skb;
313 313  
314   - skb->dev = NULL;
  314 + if (skb->dev) {
  315 + fq->iif = skb->dev->ifindex;
  316 + skb->dev = NULL;
  317 + }
315 318 fq->q.stamp = skb->tstamp;
316 319 fq->q.meat += skb->len;
317 320 if (payload_len > fq->q.max_size)
net/netfilter/Kconfig
... ... @@ -680,6 +680,10 @@
680 680  
681 681 To compile it as a module, choose M here. If unsure, say N.
682 682  
  683 +config NETFILTER_XT_TARGET_NOTRACK
  684 + tristate '"NOTRACK" target support (DEPRECATED)'
  685 + select NETFILTER_XT_TARGET_CT
  686 +
683 687 config NETFILTER_XT_TARGET_RATEEST
684 688 tristate '"RATEEST" target support'
685 689 depends on NETFILTER_ADVANCED
net/netfilter/nf_conntrack_core.c
... ... @@ -1526,6 +1526,7 @@
1526 1526 */
1527 1527 #define UNCONFIRMED_NULLS_VAL ((1<<30)+0)
1528 1528 #define DYING_NULLS_VAL ((1<<30)+1)
  1529 +#define TEMPLATE_NULLS_VAL ((1<<30)+2)
1529 1530  
1530 1531 static int nf_conntrack_init_net(struct net *net)
1531 1532 {
... ... @@ -1534,6 +1535,7 @@
1534 1535 atomic_set(&net->ct.count, 0);
1535 1536 INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL);
1536 1537 INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL);
  1538 + INIT_HLIST_NULLS_HEAD(&net->ct.tmpl, TEMPLATE_NULLS_VAL);
1537 1539 net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
1538 1540 if (!net->ct.stat) {
1539 1541 ret = -ENOMEM;
net/netfilter/nf_conntrack_netlink.c
... ... @@ -2624,7 +2624,7 @@
2624 2624 if (!help) {
2625 2625 if (!cda[CTA_EXPECT_TIMEOUT]) {
2626 2626 err = -EINVAL;
2627   - goto out;
  2627 + goto err_out;
2628 2628 }
2629 2629 exp->timeout.expires =
2630 2630 jiffies + ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ;
net/netfilter/nfnetlink_log.c
... ... @@ -13,6 +13,7 @@
13 13 */
14 14 #include <linux/module.h>
15 15 #include <linux/skbuff.h>
  16 +#include <linux/if_arp.h>
16 17 #include <linux/init.h>
17 18 #include <linux/ip.h>
18 19 #include <linux/ipv6.h>
... ... @@ -384,6 +385,7 @@
384 385 struct nfgenmsg *nfmsg;
385 386 sk_buff_data_t old_tail = inst->skb->tail;
386 387 struct sock *sk;
  388 + const unsigned char *hwhdrp;
387 389  
388 390 nlh = nlmsg_put(inst->skb, 0, 0,
389 391 NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET,
... ... @@ -485,9 +487,17 @@
485 487 if (indev && skb_mac_header_was_set(skb)) {
486 488 if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) ||
487 489 nla_put_be16(inst->skb, NFULA_HWLEN,
488   - htons(skb->dev->hard_header_len)) ||
489   - nla_put(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len,
490   - skb_mac_header(skb)))
  490 + htons(skb->dev->hard_header_len)))
  491 + goto nla_put_failure;
  492 +
  493 + hwhdrp = skb_mac_header(skb);
  494 +
  495 + if (skb->dev->type == ARPHRD_SIT)
  496 + hwhdrp -= ETH_HLEN;
  497 +
  498 + if (hwhdrp >= skb->head &&
  499 + nla_put(inst->skb, NFULA_HWHEADER,
  500 + skb->dev->hard_header_len, hwhdrp))
491 501 goto nla_put_failure;
492 502 }
493 503  
net/netfilter/xt_CT.c
... ... @@ -149,6 +149,10 @@
149 149  
150 150 __set_bit(IPS_TEMPLATE_BIT, &ct->status);
151 151 __set_bit(IPS_CONFIRMED_BIT, &ct->status);
  152 +
  153 + /* Overload tuple linked list to put us in template list. */
  154 + hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
  155 + &par->net->ct.tmpl);
152 156 out:
153 157 info->ct = ct;
154 158 return 0;
... ... @@ -289,6 +293,10 @@
289 293  
290 294 __set_bit(IPS_TEMPLATE_BIT, &ct->status);
291 295 __set_bit(IPS_CONFIRMED_BIT, &ct->status);
  296 +
  297 + /* Overload tuple linked list to put us in template list. */
  298 + hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
  299 + &par->net->ct.tmpl);
292 300 out:
293 301 info->ct = ct;
294 302 return 0;
295 303  
296 304  
... ... @@ -377,14 +385,60 @@
377 385 },
378 386 };
379 387  
  388 +static unsigned int
  389 +notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
  390 +{
  391 + /* Previously seen (loopback)? Ignore. */
  392 + if (skb->nfct != NULL)
  393 + return XT_CONTINUE;
  394 +
  395 + skb->nfct = &nf_ct_untracked_get()->ct_general;
  396 + skb->nfctinfo = IP_CT_NEW;
  397 + nf_conntrack_get(skb->nfct);
  398 +
  399 + return XT_CONTINUE;
  400 +}
  401 +
  402 +static int notrack_chk(const struct xt_tgchk_param *par)
  403 +{
  404 + if (!par->net->xt.notrack_deprecated_warning) {
  405 + pr_info("netfilter: NOTRACK target is deprecated, "
  406 + "use CT instead or upgrade iptables\n");
  407 + par->net->xt.notrack_deprecated_warning = true;
  408 + }
  409 + return 0;
  410 +}
  411 +
  412 +static struct xt_target notrack_tg_reg __read_mostly = {
  413 + .name = "NOTRACK",
  414 + .revision = 0,
  415 + .family = NFPROTO_UNSPEC,
  416 + .checkentry = notrack_chk,
  417 + .target = notrack_tg,
  418 + .table = "raw",
  419 + .me = THIS_MODULE,
  420 +};
  421 +
380 422 static int __init xt_ct_tg_init(void)
381 423 {
382   - return xt_register_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg));
  424 + int ret;
  425 +
  426 + ret = xt_register_target(&notrack_tg_reg);
  427 + if (ret < 0)
  428 + return ret;
  429 +
  430 + ret = xt_register_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg));
  431 + if (ret < 0) {
  432 + xt_unregister_target(&notrack_tg_reg);
  433 + return ret;
  434 + }
  435 + return 0;
383 436 }
384 437  
385 438 static void __exit xt_ct_tg_exit(void)
386 439 {
387 440 xt_unregister_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg));
  441 + xt_unregister_target(&notrack_tg_reg);
388 442 }
389 443  
390 444 module_init(xt_ct_tg_init);
... ... @@ -394,4 +448,6 @@
394 448 MODULE_DESCRIPTION("Xtables: connection tracking target");
395 449 MODULE_ALIAS("ipt_CT");
396 450 MODULE_ALIAS("ip6t_CT");
  451 +MODULE_ALIAS("ipt_NOTRACK");
  452 +MODULE_ALIAS("ip6t_NOTRACK");
net/netfilter/xt_hashlimit.c
... ... @@ -157,11 +157,22 @@
157 157 /* allocate dsthash_ent, initialize dst, put in htable and lock it */
158 158 static struct dsthash_ent *
159 159 dsthash_alloc_init(struct xt_hashlimit_htable *ht,
160   - const struct dsthash_dst *dst)
  160 + const struct dsthash_dst *dst, bool *race)
161 161 {
162 162 struct dsthash_ent *ent;
163 163  
164 164 spin_lock(&ht->lock);
  165 +
  166 + /* Two or more packets may race to create the same entry in the
  167 + * hashtable, double check if this packet lost race.
  168 + */
  169 + ent = dsthash_find(ht, dst);
  170 + if (ent != NULL) {
  171 + spin_unlock(&ht->lock);
  172 + *race = true;
  173 + return ent;
  174 + }
  175 +
165 176 /* initialize hash with random val at the time we allocate
166 177 * the first hashtable entry */
167 178 if (unlikely(!ht->rnd_initialized)) {
... ... @@ -318,7 +329,10 @@
318 329 parent = hashlimit_net->ipt_hashlimit;
319 330 else
320 331 parent = hashlimit_net->ip6t_hashlimit;
321   - remove_proc_entry(hinfo->pde->name, parent);
  332 +
  333 + if(parent != NULL)
  334 + remove_proc_entry(hinfo->pde->name, parent);
  335 +
322 336 htable_selective_cleanup(hinfo, select_all);
323 337 vfree(hinfo);
324 338 }
... ... @@ -585,6 +599,7 @@
585 599 unsigned long now = jiffies;
586 600 struct dsthash_ent *dh;
587 601 struct dsthash_dst dst;
  602 + bool race = false;
588 603 u32 cost;
589 604  
590 605 if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
591 606  
592 607  
... ... @@ -593,13 +608,18 @@
593 608 rcu_read_lock_bh();
594 609 dh = dsthash_find(hinfo, &dst);
595 610 if (dh == NULL) {
596   - dh = dsthash_alloc_init(hinfo, &dst);
  611 + dh = dsthash_alloc_init(hinfo, &dst, &race);
597 612 if (dh == NULL) {
598 613 rcu_read_unlock_bh();
599 614 goto hotdrop;
  615 + } else if (race) {
  616 + /* Already got an entry, update expiration timeout */
  617 + dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
  618 + rateinfo_recalc(dh, now, hinfo->cfg.mode);
  619 + } else {
  620 + dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
  621 + rateinfo_init(dh, hinfo);
600 622 }
601   - dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
602   - rateinfo_init(dh, hinfo);
603 623 } else {
604 624 /* update expiration timeout */
605 625 dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
... ... @@ -856,6 +876,27 @@
856 876  
857 877 static void __net_exit hashlimit_proc_net_exit(struct net *net)
858 878 {
  879 + struct xt_hashlimit_htable *hinfo;
  880 + struct hlist_node *pos;
  881 + struct proc_dir_entry *pde;
  882 + struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
  883 +
  884 + /* recent_net_exit() is called before recent_mt_destroy(). Make sure
  885 + * that the parent xt_recent proc entry is is empty before trying to
  886 + * remove it.
  887 + */
  888 + mutex_lock(&hashlimit_mutex);
  889 + pde = hashlimit_net->ipt_hashlimit;
  890 + if (pde == NULL)
  891 + pde = hashlimit_net->ip6t_hashlimit;
  892 +
  893 + hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node)
  894 + remove_proc_entry(hinfo->pde->name, pde);
  895 +
  896 + hashlimit_net->ipt_hashlimit = NULL;
  897 + hashlimit_net->ip6t_hashlimit = NULL;
  898 + mutex_unlock(&hashlimit_mutex);
  899 +
859 900 proc_net_remove(net, "ipt_hashlimit");
860 901 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
861 902 proc_net_remove(net, "ip6t_hashlimit");
... ... @@ -872,9 +913,6 @@
872 913  
873 914 static void __net_exit hashlimit_net_exit(struct net *net)
874 915 {
875   - struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
876   -
877   - BUG_ON(!hlist_empty(&hashlimit_net->htables));
878 916 hashlimit_proc_net_exit(net);
879 917 }
880 918  
net/netfilter/xt_recent.c
... ... @@ -431,7 +431,8 @@
431 431 list_del(&t->list);
432 432 spin_unlock_bh(&recent_lock);
433 433 #ifdef CONFIG_PROC_FS
434   - remove_proc_entry(t->name, recent_net->xt_recent);
  434 + if (recent_net->xt_recent != NULL)
  435 + remove_proc_entry(t->name, recent_net->xt_recent);
435 436 #endif
436 437 recent_table_flush(t);
437 438 kfree(t);
... ... @@ -615,6 +616,20 @@
615 616  
616 617 static void __net_exit recent_proc_net_exit(struct net *net)
617 618 {
  619 + struct recent_net *recent_net = recent_pernet(net);
  620 + struct recent_table *t;
  621 +
  622 + /* recent_net_exit() is called before recent_mt_destroy(). Make sure
  623 + * that the parent xt_recent proc entry is is empty before trying to
  624 + * remove it.
  625 + */
  626 + spin_lock_bh(&recent_lock);
  627 + list_for_each_entry(t, &recent_net->tables, list)
  628 + remove_proc_entry(t->name, recent_net->xt_recent);
  629 +
  630 + recent_net->xt_recent = NULL;
  631 + spin_unlock_bh(&recent_lock);
  632 +
618 633 proc_net_remove(net, "xt_recent");
619 634 }
620 635 #else
... ... @@ -638,9 +653,6 @@
638 653  
639 654 static void __net_exit recent_net_exit(struct net *net)
640 655 {
641   - struct recent_net *recent_net = recent_pernet(net);
642   -
643   - BUG_ON(!list_empty(&recent_net->tables));
644 656 recent_proc_net_exit(net);
645 657 }
646 658