Commit f3c5c1bfd430858d3a05436f82c51e53104feb6b

Authored by Jan Engelhardt
Committed by Patrick McHardy
1 parent e281b19897

netfilter: xtables: make ip_tables reentrant

Currently, the table traverser stores return addresses in the ruleset
itself (struct ip6t_entry->comefrom). This has a well-known drawback:
the jumpstack is overwritten on reentry, making it necessary for
targets to return absolute verdicts. Also, the ruleset (which might
be heavy memory-wise) needs to be replicated for each CPU that can
possibly invoke ip6t_do_table.

This patch decouples the jumpstack from struct ip6t_entry and instead
puts it into xt_table_info. Not being restricted by 'comefrom'
anymore, we can set up a stack as needed. By default, there is room
allocated for two entries into the traverser.

arp_tables is not touched though, because there is just one/two
modules and further patches seek to collapse the table traverser
anyhow.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>

Showing 5 changed files with 145 additions and 66 deletions Side-by-side Diff

include/linux/netfilter/x_tables.h
... ... @@ -401,6 +401,13 @@
401 401 unsigned int hook_entry[NF_INET_NUMHOOKS];
402 402 unsigned int underflow[NF_INET_NUMHOOKS];
403 403  
  404 + /*
  405 + * Number of user chains. Since tables cannot have loops, at most
  406 + * @stacksize jumps (number of user chains) can possibly be made.
  407 + */
  408 + unsigned int stacksize;
  409 + unsigned int *stackptr;
  410 + void ***jumpstack;
404 411 /* ipt_entry tables: one per CPU */
405 412 /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
406 413 void *entries[1];
net/ipv4/netfilter/arp_tables.c
... ... @@ -649,6 +649,9 @@
649 649 if (ret != 0)
650 650 break;
651 651 ++i;
  652 + if (strcmp(arpt_get_target(iter)->u.user.name,
  653 + XT_ERROR_TARGET) == 0)
  654 + ++newinfo->stacksize;
652 655 }
653 656 duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
654 657 if (ret != 0)
... ... @@ -1774,8 +1777,7 @@
1774 1777 {
1775 1778 int ret;
1776 1779 struct xt_table_info *newinfo;
1777   - struct xt_table_info bootstrap
1778   - = { 0, 0, 0, { 0 }, { 0 }, { } };
  1780 + struct xt_table_info bootstrap = {0};
1779 1781 void *loc_cpu_entry;
1780 1782 struct xt_table *new_table;
1781 1783  
net/ipv4/netfilter/ip_tables.c
... ... @@ -321,8 +321,6 @@
321 321 const struct net_device *out,
322 322 struct xt_table *table)
323 323 {
324   -#define tb_comefrom ((struct ipt_entry *)table_base)->comefrom
325   -
326 324 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
327 325 const struct iphdr *ip;
328 326 bool hotdrop = false;
... ... @@ -330,7 +328,8 @@
330 328 unsigned int verdict = NF_DROP;
331 329 const char *indev, *outdev;
332 330 const void *table_base;
333   - struct ipt_entry *e, *back;
  331 + struct ipt_entry *e, **jumpstack;
  332 + unsigned int *stackptr, origptr, cpu;
334 333 const struct xt_table_info *private;
335 334 struct xt_match_param mtpar;
336 335 struct xt_target_param tgpar;
337 336  
338 337  
... ... @@ -356,19 +355,23 @@
356 355 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
357 356 xt_info_rdlock_bh();
358 357 private = table->private;
359   - table_base = private->entries[smp_processor_id()];
  358 + cpu = smp_processor_id();
  359 + table_base = private->entries[cpu];
  360 + jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
  361 + stackptr = &private->stackptr[cpu];
  362 + origptr = *stackptr;
360 363  
361 364 e = get_entry(table_base, private->hook_entry[hook]);
362 365  
363   - /* For return from builtin chain */
364   - back = get_entry(table_base, private->underflow[hook]);
  366 + pr_devel("Entering %s(hook %u); sp at %u (UF %p)\n",
  367 + table->name, hook, origptr,
  368 + get_entry(table_base, private->underflow[hook]));
365 369  
366 370 do {
367 371 const struct ipt_entry_target *t;
368 372 const struct xt_entry_match *ematch;
369 373  
370 374 IP_NF_ASSERT(e);
371   - IP_NF_ASSERT(back);
372 375 if (!ip_packet_match(ip, indev, outdev,
373 376 &e->ip, mtpar.fragoff)) {
374 377 no_match:
375 378  
... ... @@ -403,17 +406,28 @@
403 406 verdict = (unsigned)(-v) - 1;
404 407 break;
405 408 }
406   - e = back;
407   - back = get_entry(table_base, back->comefrom);
  409 + if (*stackptr == 0) {
  410 + e = get_entry(table_base,
  411 + private->underflow[hook]);
  412 + pr_devel("Underflow (this is normal) "
  413 + "to %p\n", e);
  414 + } else {
  415 + e = jumpstack[--*stackptr];
  416 + pr_devel("Pulled %p out from pos %u\n",
  417 + e, *stackptr);
  418 + e = ipt_next_entry(e);
  419 + }
408 420 continue;
409 421 }
410 422 if (table_base + v != ipt_next_entry(e) &&
411 423 !(e->ip.flags & IPT_F_GOTO)) {
412   - /* Save old back ptr in next entry */
413   - struct ipt_entry *next = ipt_next_entry(e);
414   - next->comefrom = (void *)back - table_base;
415   - /* set back pointer to next entry */
416   - back = next;
  424 + if (*stackptr >= private->stacksize) {
  425 + verdict = NF_DROP;
  426 + break;
  427 + }
  428 + jumpstack[(*stackptr)++] = e;
  429 + pr_devel("Pushed %p into pos %u\n",
  430 + e, *stackptr - 1);
417 431 }
418 432  
419 433 e = get_entry(table_base, v);
420 434  
... ... @@ -426,18 +440,7 @@
426 440 tgpar.targinfo = t->data;
427 441  
428 442  
429   -#ifdef CONFIG_NETFILTER_DEBUG
430   - tb_comefrom = 0xeeeeeeec;
431   -#endif
432 443 verdict = t->u.kernel.target->target(skb, &tgpar);
433   -#ifdef CONFIG_NETFILTER_DEBUG
434   - if (tb_comefrom != 0xeeeeeeec && verdict == IPT_CONTINUE) {
435   - printk("Target %s reentered!\n",
436   - t->u.kernel.target->name);
437   - verdict = NF_DROP;
438   - }
439   - tb_comefrom = 0x57acc001;
440   -#endif
441 444 /* Target might have changed stuff. */
442 445 ip = ip_hdr(skb);
443 446 if (verdict == IPT_CONTINUE)
... ... @@ -447,7 +450,9 @@
447 450 break;
448 451 } while (!hotdrop);
449 452 xt_info_rdunlock_bh();
450   -
  453 + pr_devel("Exiting %s; resetting sp from %u to %u\n",
  454 + __func__, *stackptr, origptr);
  455 + *stackptr = origptr;
451 456 #ifdef DEBUG_ALLOW_ALL
452 457 return NF_ACCEPT;
453 458 #else
... ... @@ -455,8 +460,6 @@
455 460 return NF_DROP;
456 461 else return verdict;
457 462 #endif
458   -
459   -#undef tb_comefrom
460 463 }
461 464  
462 465 /* Figures out from what hook each rule can be called: returns 0 if
... ... @@ -838,6 +841,9 @@
838 841 if (ret != 0)
839 842 return ret;
840 843 ++i;
  844 + if (strcmp(ipt_get_target(iter)->u.user.name,
  845 + XT_ERROR_TARGET) == 0)
  846 + ++newinfo->stacksize;
841 847 }
842 848  
843 849 if (i != repl->num_entries) {
... ... @@ -2086,8 +2092,7 @@
2086 2092 {
2087 2093 int ret;
2088 2094 struct xt_table_info *newinfo;
2089   - struct xt_table_info bootstrap
2090   - = { 0, 0, 0, { 0 }, { 0 }, { } };
  2095 + struct xt_table_info bootstrap = {0};
2091 2096 void *loc_cpu_entry;
2092 2097 struct xt_table *new_table;
2093 2098  
net/ipv6/netfilter/ip6_tables.c
... ... @@ -351,15 +351,14 @@
351 351 const struct net_device *out,
352 352 struct xt_table *table)
353 353 {
354   -#define tb_comefrom ((struct ip6t_entry *)table_base)->comefrom
355   -
356 354 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
357 355 bool hotdrop = false;
358 356 /* Initializing verdict to NF_DROP keeps gcc happy. */
359 357 unsigned int verdict = NF_DROP;
360 358 const char *indev, *outdev;
361 359 const void *table_base;
362   - struct ip6t_entry *e, *back;
  360 + struct ip6t_entry *e, **jumpstack;
  361 + unsigned int *stackptr, origptr, cpu;
363 362 const struct xt_table_info *private;
364 363 struct xt_match_param mtpar;
365 364 struct xt_target_param tgpar;
366 365  
367 366  
... ... @@ -383,19 +382,19 @@
383 382  
384 383 xt_info_rdlock_bh();
385 384 private = table->private;
386   - table_base = private->entries[smp_processor_id()];
  385 + cpu = smp_processor_id();
  386 + table_base = private->entries[cpu];
  387 + jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
  388 + stackptr = &private->stackptr[cpu];
  389 + origptr = *stackptr;
387 390  
388 391 e = get_entry(table_base, private->hook_entry[hook]);
389 392  
390   - /* For return from builtin chain */
391   - back = get_entry(table_base, private->underflow[hook]);
392   -
393 393 do {
394 394 const struct ip6t_entry_target *t;
395 395 const struct xt_entry_match *ematch;
396 396  
397 397 IP_NF_ASSERT(e);
398   - IP_NF_ASSERT(back);
399 398 if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
400 399 &mtpar.thoff, &mtpar.fragoff, &hotdrop)) {
401 400 no_match:
402 401  
... ... @@ -432,17 +431,20 @@
432 431 verdict = (unsigned)(-v) - 1;
433 432 break;
434 433 }
435   - e = back;
436   - back = get_entry(table_base, back->comefrom);
  434 + if (*stackptr == 0)
  435 + e = get_entry(table_base,
  436 + private->underflow[hook]);
  437 + else
  438 + e = ip6t_next_entry(jumpstack[--*stackptr]);
437 439 continue;
438 440 }
439 441 if (table_base + v != ip6t_next_entry(e) &&
440 442 !(e->ipv6.flags & IP6T_F_GOTO)) {
441   - /* Save old back ptr in next entry */
442   - struct ip6t_entry *next = ip6t_next_entry(e);
443   - next->comefrom = (void *)back - table_base;
444   - /* set back pointer to next entry */
445   - back = next;
  443 + if (*stackptr >= private->stacksize) {
  444 + verdict = NF_DROP;
  445 + break;
  446 + }
  447 + jumpstack[(*stackptr)++] = e;
446 448 }
447 449  
448 450 e = get_entry(table_base, v);
449 451  
... ... @@ -454,19 +456,7 @@
454 456 tgpar.target = t->u.kernel.target;
455 457 tgpar.targinfo = t->data;
456 458  
457   -#ifdef CONFIG_NETFILTER_DEBUG
458   - tb_comefrom = 0xeeeeeeec;
459   -#endif
460 459 verdict = t->u.kernel.target->target(skb, &tgpar);
461   -
462   -#ifdef CONFIG_NETFILTER_DEBUG
463   - if (tb_comefrom != 0xeeeeeeec && verdict == IP6T_CONTINUE) {
464   - printk("Target %s reentered!\n",
465   - t->u.kernel.target->name);
466   - verdict = NF_DROP;
467   - }
468   - tb_comefrom = 0x57acc001;
469   -#endif
470 460 if (verdict == IP6T_CONTINUE)
471 461 e = ip6t_next_entry(e);
472 462 else
473 463  
... ... @@ -474,10 +464,8 @@
474 464 break;
475 465 } while (!hotdrop);
476 466  
477   -#ifdef CONFIG_NETFILTER_DEBUG
478   - tb_comefrom = NETFILTER_LINK_POISON;
479   -#endif
480 467 xt_info_rdunlock_bh();
  468 + *stackptr = origptr;
481 469  
482 470 #ifdef DEBUG_ALLOW_ALL
483 471 return NF_ACCEPT;
... ... @@ -486,8 +474,6 @@
486 474 return NF_DROP;
487 475 else return verdict;
488 476 #endif
489   -
490   -#undef tb_comefrom
491 477 }
492 478  
493 479 /* Figures out from what hook each rule can be called: returns 0 if
... ... @@ -869,6 +855,9 @@
869 855 if (ret != 0)
870 856 return ret;
871 857 ++i;
  858 + if (strcmp(ip6t_get_target(iter)->u.user.name,
  859 + XT_ERROR_TARGET) == 0)
  860 + ++newinfo->stacksize;
872 861 }
873 862  
874 863 if (i != repl->num_entries) {
... ... @@ -2120,8 +2109,7 @@
2120 2109 {
2121 2110 int ret;
2122 2111 struct xt_table_info *newinfo;
2123   - struct xt_table_info bootstrap
2124   - = { 0, 0, 0, { 0 }, { 0 }, { } };
  2112 + struct xt_table_info bootstrap = {0};
2125 2113 void *loc_cpu_entry;
2126 2114 struct xt_table *new_table;
2127 2115  
net/netfilter/x_tables.c
... ... @@ -62,6 +62,9 @@
62 62 [NFPROTO_IPV6] = "ip6",
63 63 };
64 64  
  65 +/* Allow this many total (re)entries. */
  66 +static const unsigned int xt_jumpstack_multiplier = 2;
  67 +
65 68 /* Registration hooks for targets. */
66 69 int
67 70 xt_register_target(struct xt_target *target)
... ... @@ -680,6 +683,26 @@
680 683 else
681 684 vfree(info->entries[cpu]);
682 685 }
  686 +
  687 + if (info->jumpstack != NULL) {
  688 + if (sizeof(void *) * info->stacksize > PAGE_SIZE) {
  689 + for_each_possible_cpu(cpu)
  690 + vfree(info->jumpstack[cpu]);
  691 + } else {
  692 + for_each_possible_cpu(cpu)
  693 + kfree(info->jumpstack[cpu]);
  694 + }
  695 + }
  696 +
  697 + if (sizeof(void **) * nr_cpu_ids > PAGE_SIZE)
  698 + vfree(info->jumpstack);
  699 + else
  700 + kfree(info->jumpstack);
  701 + if (sizeof(unsigned int) * nr_cpu_ids > PAGE_SIZE)
  702 + vfree(info->stackptr);
  703 + else
  704 + kfree(info->stackptr);
  705 +
683 706 kfree(info);
684 707 }
685 708 EXPORT_SYMBOL(xt_free_table_info);
686 709  
... ... @@ -724,7 +747,50 @@
724 747 DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks);
725 748 EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks);
726 749  
  750 +static int xt_jumpstack_alloc(struct xt_table_info *i)
  751 +{
  752 + unsigned int size;
  753 + int cpu;
727 754  
  755 + size = sizeof(unsigned int) * nr_cpu_ids;
  756 + if (size > PAGE_SIZE)
  757 + i->stackptr = vmalloc(size);
  758 + else
  759 + i->stackptr = kmalloc(size, GFP_KERNEL);
  760 + if (i->stackptr == NULL)
  761 + return -ENOMEM;
  762 + memset(i->stackptr, 0, size);
  763 +
  764 + size = sizeof(void **) * nr_cpu_ids;
  765 + if (size > PAGE_SIZE)
  766 + i->jumpstack = vmalloc(size);
  767 + else
  768 + i->jumpstack = kmalloc(size, GFP_KERNEL);
  769 + if (i->jumpstack == NULL)
  770 + return -ENOMEM;
  771 + memset(i->jumpstack, 0, size);
  772 +
  773 + i->stacksize *= xt_jumpstack_multiplier;
  774 + size = sizeof(void *) * i->stacksize;
  775 + for_each_possible_cpu(cpu) {
  776 + if (size > PAGE_SIZE)
  777 + i->jumpstack[cpu] = vmalloc_node(size,
  778 + cpu_to_node(cpu));
  779 + else
  780 + i->jumpstack[cpu] = kmalloc_node(size,
  781 + GFP_KERNEL, cpu_to_node(cpu));
  782 + if (i->jumpstack[cpu] == NULL)
  783 + /*
  784 + * Freeing will be done later on by the callers. The
  785 + * chain is: xt_replace_table -> __do_replace ->
  786 + * do_replace -> xt_free_table_info.
  787 + */
  788 + return -ENOMEM;
  789 + }
  790 +
  791 + return 0;
  792 +}
  793 +
728 794 struct xt_table_info *
729 795 xt_replace_table(struct xt_table *table,
730 796 unsigned int num_counters,
... ... @@ -732,6 +798,7 @@
732 798 int *error)
733 799 {
734 800 struct xt_table_info *private;
  801 + int ret;
735 802  
736 803 /* Do the substitution. */
737 804 local_bh_disable();
... ... @@ -746,6 +813,12 @@
746 813 return NULL;
747 814 }
748 815  
  816 + ret = xt_jumpstack_alloc(newinfo);
  817 + if (ret < 0) {
  818 + *error = ret;
  819 + return NULL;
  820 + }
  821 +
749 822 table->private = newinfo;
750 823 newinfo->initial_entries = private->initial_entries;
751 824  
... ... @@ -769,6 +842,10 @@
769 842 int ret;
770 843 struct xt_table_info *private;
771 844 struct xt_table *t, *table;
  845 +
  846 + ret = xt_jumpstack_alloc(newinfo);
  847 + if (ret < 0)
  848 + return ERR_PTR(ret);
772 849  
773 850 /* Don't add one object to multiple lists. */
774 851 table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);