Commit f3c5c1bfd430858d3a05436f82c51e53104feb6b
Committed by
Patrick McHardy
1 parent
e281b19897
Exists in
master
and in
7 other branches
netfilter: xtables: make ip_tables reentrant
Currently, the table traverser stores return addresses in the ruleset itself (struct ip6t_entry->comefrom). This has a well-known drawback: the jumpstack is overwritten on reentry, making it necessary for targets to return absolute verdicts. Also, the ruleset (which might be heavy memory-wise) needs to be replicated for each CPU that can possibly invoke ip6t_do_table. This patch decouples the jumpstack from struct ip6t_entry and instead puts it into xt_table_info. Not being restricted by 'comefrom' anymore, we can set up a stack as needed. By default, there is room allocated for two entries into the traverser. arp_tables is not touched though, because there is just one/two modules and further patches seek to collapse the table traverser anyhow. Signed-off-by: Jan Engelhardt <jengelh@medozas.de> Signed-off-by: Patrick McHardy <kaber@trash.net>
Showing 5 changed files with 145 additions and 66 deletions Side-by-side Diff
include/linux/netfilter/x_tables.h
... | ... | @@ -401,6 +401,13 @@ |
401 | 401 | unsigned int hook_entry[NF_INET_NUMHOOKS]; |
402 | 402 | unsigned int underflow[NF_INET_NUMHOOKS]; |
403 | 403 | |
404 | + /* | |
405 | + * Number of user chains. Since tables cannot have loops, at most | |
406 | + * @stacksize jumps (number of user chains) can possibly be made. | |
407 | + */ | |
408 | + unsigned int stacksize; | |
409 | + unsigned int *stackptr; | |
410 | + void ***jumpstack; | |
404 | 411 | /* ipt_entry tables: one per CPU */ |
405 | 412 | /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */ |
406 | 413 | void *entries[1]; |
net/ipv4/netfilter/arp_tables.c
... | ... | @@ -649,6 +649,9 @@ |
649 | 649 | if (ret != 0) |
650 | 650 | break; |
651 | 651 | ++i; |
652 | + if (strcmp(arpt_get_target(iter)->u.user.name, | |
653 | + XT_ERROR_TARGET) == 0) | |
654 | + ++newinfo->stacksize; | |
652 | 655 | } |
653 | 656 | duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); |
654 | 657 | if (ret != 0) |
... | ... | @@ -1774,8 +1777,7 @@ |
1774 | 1777 | { |
1775 | 1778 | int ret; |
1776 | 1779 | struct xt_table_info *newinfo; |
1777 | - struct xt_table_info bootstrap | |
1778 | - = { 0, 0, 0, { 0 }, { 0 }, { } }; | |
1780 | + struct xt_table_info bootstrap = {0}; | |
1779 | 1781 | void *loc_cpu_entry; |
1780 | 1782 | struct xt_table *new_table; |
1781 | 1783 |
net/ipv4/netfilter/ip_tables.c
... | ... | @@ -321,8 +321,6 @@ |
321 | 321 | const struct net_device *out, |
322 | 322 | struct xt_table *table) |
323 | 323 | { |
324 | -#define tb_comefrom ((struct ipt_entry *)table_base)->comefrom | |
325 | - | |
326 | 324 | static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); |
327 | 325 | const struct iphdr *ip; |
328 | 326 | bool hotdrop = false; |
... | ... | @@ -330,7 +328,8 @@ |
330 | 328 | unsigned int verdict = NF_DROP; |
331 | 329 | const char *indev, *outdev; |
332 | 330 | const void *table_base; |
333 | - struct ipt_entry *e, *back; | |
331 | + struct ipt_entry *e, **jumpstack; | |
332 | + unsigned int *stackptr, origptr, cpu; | |
334 | 333 | const struct xt_table_info *private; |
335 | 334 | struct xt_match_param mtpar; |
336 | 335 | struct xt_target_param tgpar; |
337 | 336 | |
338 | 337 | |
... | ... | @@ -356,19 +355,23 @@ |
356 | 355 | IP_NF_ASSERT(table->valid_hooks & (1 << hook)); |
357 | 356 | xt_info_rdlock_bh(); |
358 | 357 | private = table->private; |
359 | - table_base = private->entries[smp_processor_id()]; | |
358 | + cpu = smp_processor_id(); | |
359 | + table_base = private->entries[cpu]; | |
360 | + jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; | |
361 | + stackptr = &private->stackptr[cpu]; | |
362 | + origptr = *stackptr; | |
360 | 363 | |
361 | 364 | e = get_entry(table_base, private->hook_entry[hook]); |
362 | 365 | |
363 | - /* For return from builtin chain */ | |
364 | - back = get_entry(table_base, private->underflow[hook]); | |
366 | + pr_devel("Entering %s(hook %u); sp at %u (UF %p)\n", | |
367 | + table->name, hook, origptr, | |
368 | + get_entry(table_base, private->underflow[hook])); | |
365 | 369 | |
366 | 370 | do { |
367 | 371 | const struct ipt_entry_target *t; |
368 | 372 | const struct xt_entry_match *ematch; |
369 | 373 | |
370 | 374 | IP_NF_ASSERT(e); |
371 | - IP_NF_ASSERT(back); | |
372 | 375 | if (!ip_packet_match(ip, indev, outdev, |
373 | 376 | &e->ip, mtpar.fragoff)) { |
374 | 377 | no_match: |
375 | 378 | |
... | ... | @@ -403,17 +406,28 @@ |
403 | 406 | verdict = (unsigned)(-v) - 1; |
404 | 407 | break; |
405 | 408 | } |
406 | - e = back; | |
407 | - back = get_entry(table_base, back->comefrom); | |
409 | + if (*stackptr == 0) { | |
410 | + e = get_entry(table_base, | |
411 | + private->underflow[hook]); | |
412 | + pr_devel("Underflow (this is normal) " | |
413 | + "to %p\n", e); | |
414 | + } else { | |
415 | + e = jumpstack[--*stackptr]; | |
416 | + pr_devel("Pulled %p out from pos %u\n", | |
417 | + e, *stackptr); | |
418 | + e = ipt_next_entry(e); | |
419 | + } | |
408 | 420 | continue; |
409 | 421 | } |
410 | 422 | if (table_base + v != ipt_next_entry(e) && |
411 | 423 | !(e->ip.flags & IPT_F_GOTO)) { |
412 | - /* Save old back ptr in next entry */ | |
413 | - struct ipt_entry *next = ipt_next_entry(e); | |
414 | - next->comefrom = (void *)back - table_base; | |
415 | - /* set back pointer to next entry */ | |
416 | - back = next; | |
424 | + if (*stackptr >= private->stacksize) { | |
425 | + verdict = NF_DROP; | |
426 | + break; | |
427 | + } | |
428 | + jumpstack[(*stackptr)++] = e; | |
429 | + pr_devel("Pushed %p into pos %u\n", | |
430 | + e, *stackptr - 1); | |
417 | 431 | } |
418 | 432 | |
419 | 433 | e = get_entry(table_base, v); |
420 | 434 | |
... | ... | @@ -426,18 +440,7 @@ |
426 | 440 | tgpar.targinfo = t->data; |
427 | 441 | |
428 | 442 | |
429 | -#ifdef CONFIG_NETFILTER_DEBUG | |
430 | - tb_comefrom = 0xeeeeeeec; | |
431 | -#endif | |
432 | 443 | verdict = t->u.kernel.target->target(skb, &tgpar); |
433 | -#ifdef CONFIG_NETFILTER_DEBUG | |
434 | - if (tb_comefrom != 0xeeeeeeec && verdict == IPT_CONTINUE) { | |
435 | - printk("Target %s reentered!\n", | |
436 | - t->u.kernel.target->name); | |
437 | - verdict = NF_DROP; | |
438 | - } | |
439 | - tb_comefrom = 0x57acc001; | |
440 | -#endif | |
441 | 444 | /* Target might have changed stuff. */ |
442 | 445 | ip = ip_hdr(skb); |
443 | 446 | if (verdict == IPT_CONTINUE) |
... | ... | @@ -447,7 +450,9 @@ |
447 | 450 | break; |
448 | 451 | } while (!hotdrop); |
449 | 452 | xt_info_rdunlock_bh(); |
450 | - | |
453 | + pr_devel("Exiting %s; resetting sp from %u to %u\n", | |
454 | + __func__, *stackptr, origptr); | |
455 | + *stackptr = origptr; | |
451 | 456 | #ifdef DEBUG_ALLOW_ALL |
452 | 457 | return NF_ACCEPT; |
453 | 458 | #else |
... | ... | @@ -455,8 +460,6 @@ |
455 | 460 | return NF_DROP; |
456 | 461 | else return verdict; |
457 | 462 | #endif |
458 | - | |
459 | -#undef tb_comefrom | |
460 | 463 | } |
461 | 464 | |
462 | 465 | /* Figures out from what hook each rule can be called: returns 0 if |
... | ... | @@ -838,6 +841,9 @@ |
838 | 841 | if (ret != 0) |
839 | 842 | return ret; |
840 | 843 | ++i; |
844 | + if (strcmp(ipt_get_target(iter)->u.user.name, | |
845 | + XT_ERROR_TARGET) == 0) | |
846 | + ++newinfo->stacksize; | |
841 | 847 | } |
842 | 848 | |
843 | 849 | if (i != repl->num_entries) { |
... | ... | @@ -2086,8 +2092,7 @@ |
2086 | 2092 | { |
2087 | 2093 | int ret; |
2088 | 2094 | struct xt_table_info *newinfo; |
2089 | - struct xt_table_info bootstrap | |
2090 | - = { 0, 0, 0, { 0 }, { 0 }, { } }; | |
2095 | + struct xt_table_info bootstrap = {0}; | |
2091 | 2096 | void *loc_cpu_entry; |
2092 | 2097 | struct xt_table *new_table; |
2093 | 2098 |
net/ipv6/netfilter/ip6_tables.c
... | ... | @@ -351,15 +351,14 @@ |
351 | 351 | const struct net_device *out, |
352 | 352 | struct xt_table *table) |
353 | 353 | { |
354 | -#define tb_comefrom ((struct ip6t_entry *)table_base)->comefrom | |
355 | - | |
356 | 354 | static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); |
357 | 355 | bool hotdrop = false; |
358 | 356 | /* Initializing verdict to NF_DROP keeps gcc happy. */ |
359 | 357 | unsigned int verdict = NF_DROP; |
360 | 358 | const char *indev, *outdev; |
361 | 359 | const void *table_base; |
362 | - struct ip6t_entry *e, *back; | |
360 | + struct ip6t_entry *e, **jumpstack; | |
361 | + unsigned int *stackptr, origptr, cpu; | |
363 | 362 | const struct xt_table_info *private; |
364 | 363 | struct xt_match_param mtpar; |
365 | 364 | struct xt_target_param tgpar; |
366 | 365 | |
367 | 366 | |
... | ... | @@ -383,19 +382,19 @@ |
383 | 382 | |
384 | 383 | xt_info_rdlock_bh(); |
385 | 384 | private = table->private; |
386 | - table_base = private->entries[smp_processor_id()]; | |
385 | + cpu = smp_processor_id(); | |
386 | + table_base = private->entries[cpu]; | |
387 | + jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; | |
388 | + stackptr = &private->stackptr[cpu]; | |
389 | + origptr = *stackptr; | |
387 | 390 | |
388 | 391 | e = get_entry(table_base, private->hook_entry[hook]); |
389 | 392 | |
390 | - /* For return from builtin chain */ | |
391 | - back = get_entry(table_base, private->underflow[hook]); | |
392 | - | |
393 | 393 | do { |
394 | 394 | const struct ip6t_entry_target *t; |
395 | 395 | const struct xt_entry_match *ematch; |
396 | 396 | |
397 | 397 | IP_NF_ASSERT(e); |
398 | - IP_NF_ASSERT(back); | |
399 | 398 | if (!ip6_packet_match(skb, indev, outdev, &e->ipv6, |
400 | 399 | &mtpar.thoff, &mtpar.fragoff, &hotdrop)) { |
401 | 400 | no_match: |
402 | 401 | |
... | ... | @@ -432,17 +431,20 @@ |
432 | 431 | verdict = (unsigned)(-v) - 1; |
433 | 432 | break; |
434 | 433 | } |
435 | - e = back; | |
436 | - back = get_entry(table_base, back->comefrom); | |
434 | + if (*stackptr == 0) | |
435 | + e = get_entry(table_base, | |
436 | + private->underflow[hook]); | |
437 | + else | |
438 | + e = ip6t_next_entry(jumpstack[--*stackptr]); | |
437 | 439 | continue; |
438 | 440 | } |
439 | 441 | if (table_base + v != ip6t_next_entry(e) && |
440 | 442 | !(e->ipv6.flags & IP6T_F_GOTO)) { |
441 | - /* Save old back ptr in next entry */ | |
442 | - struct ip6t_entry *next = ip6t_next_entry(e); | |
443 | - next->comefrom = (void *)back - table_base; | |
444 | - /* set back pointer to next entry */ | |
445 | - back = next; | |
443 | + if (*stackptr >= private->stacksize) { | |
444 | + verdict = NF_DROP; | |
445 | + break; | |
446 | + } | |
447 | + jumpstack[(*stackptr)++] = e; | |
446 | 448 | } |
447 | 449 | |
448 | 450 | e = get_entry(table_base, v); |
449 | 451 | |
... | ... | @@ -454,19 +456,7 @@ |
454 | 456 | tgpar.target = t->u.kernel.target; |
455 | 457 | tgpar.targinfo = t->data; |
456 | 458 | |
457 | -#ifdef CONFIG_NETFILTER_DEBUG | |
458 | - tb_comefrom = 0xeeeeeeec; | |
459 | -#endif | |
460 | 459 | verdict = t->u.kernel.target->target(skb, &tgpar); |
461 | - | |
462 | -#ifdef CONFIG_NETFILTER_DEBUG | |
463 | - if (tb_comefrom != 0xeeeeeeec && verdict == IP6T_CONTINUE) { | |
464 | - printk("Target %s reentered!\n", | |
465 | - t->u.kernel.target->name); | |
466 | - verdict = NF_DROP; | |
467 | - } | |
468 | - tb_comefrom = 0x57acc001; | |
469 | -#endif | |
470 | 460 | if (verdict == IP6T_CONTINUE) |
471 | 461 | e = ip6t_next_entry(e); |
472 | 462 | else |
473 | 463 | |
... | ... | @@ -474,10 +464,8 @@ |
474 | 464 | break; |
475 | 465 | } while (!hotdrop); |
476 | 466 | |
477 | -#ifdef CONFIG_NETFILTER_DEBUG | |
478 | - tb_comefrom = NETFILTER_LINK_POISON; | |
479 | -#endif | |
480 | 467 | xt_info_rdunlock_bh(); |
468 | + *stackptr = origptr; | |
481 | 469 | |
482 | 470 | #ifdef DEBUG_ALLOW_ALL |
483 | 471 | return NF_ACCEPT; |
... | ... | @@ -486,8 +474,6 @@ |
486 | 474 | return NF_DROP; |
487 | 475 | else return verdict; |
488 | 476 | #endif |
489 | - | |
490 | -#undef tb_comefrom | |
491 | 477 | } |
492 | 478 | |
493 | 479 | /* Figures out from what hook each rule can be called: returns 0 if |
... | ... | @@ -869,6 +855,9 @@ |
869 | 855 | if (ret != 0) |
870 | 856 | return ret; |
871 | 857 | ++i; |
858 | + if (strcmp(ip6t_get_target(iter)->u.user.name, | |
859 | + XT_ERROR_TARGET) == 0) | |
860 | + ++newinfo->stacksize; | |
872 | 861 | } |
873 | 862 | |
874 | 863 | if (i != repl->num_entries) { |
... | ... | @@ -2120,8 +2109,7 @@ |
2120 | 2109 | { |
2121 | 2110 | int ret; |
2122 | 2111 | struct xt_table_info *newinfo; |
2123 | - struct xt_table_info bootstrap | |
2124 | - = { 0, 0, 0, { 0 }, { 0 }, { } }; | |
2112 | + struct xt_table_info bootstrap = {0}; | |
2125 | 2113 | void *loc_cpu_entry; |
2126 | 2114 | struct xt_table *new_table; |
2127 | 2115 |
net/netfilter/x_tables.c
... | ... | @@ -62,6 +62,9 @@ |
62 | 62 | [NFPROTO_IPV6] = "ip6", |
63 | 63 | }; |
64 | 64 | |
65 | +/* Allow this many total (re)entries. */ | |
66 | +static const unsigned int xt_jumpstack_multiplier = 2; | |
67 | + | |
65 | 68 | /* Registration hooks for targets. */ |
66 | 69 | int |
67 | 70 | xt_register_target(struct xt_target *target) |
... | ... | @@ -680,6 +683,26 @@ |
680 | 683 | else |
681 | 684 | vfree(info->entries[cpu]); |
682 | 685 | } |
686 | + | |
687 | + if (info->jumpstack != NULL) { | |
688 | + if (sizeof(void *) * info->stacksize > PAGE_SIZE) { | |
689 | + for_each_possible_cpu(cpu) | |
690 | + vfree(info->jumpstack[cpu]); | |
691 | + } else { | |
692 | + for_each_possible_cpu(cpu) | |
693 | + kfree(info->jumpstack[cpu]); | |
694 | + } | |
695 | + } | |
696 | + | |
697 | + if (sizeof(void **) * nr_cpu_ids > PAGE_SIZE) | |
698 | + vfree(info->jumpstack); | |
699 | + else | |
700 | + kfree(info->jumpstack); | |
701 | + if (sizeof(unsigned int) * nr_cpu_ids > PAGE_SIZE) | |
702 | + vfree(info->stackptr); | |
703 | + else | |
704 | + kfree(info->stackptr); | |
705 | + | |
683 | 706 | kfree(info); |
684 | 707 | } |
685 | 708 | EXPORT_SYMBOL(xt_free_table_info); |
686 | 709 | |
... | ... | @@ -724,7 +747,50 @@ |
724 | 747 | DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks); |
725 | 748 | EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks); |
726 | 749 | |
750 | +static int xt_jumpstack_alloc(struct xt_table_info *i) | |
751 | +{ | |
752 | + unsigned int size; | |
753 | + int cpu; | |
727 | 754 | |
755 | + size = sizeof(unsigned int) * nr_cpu_ids; | |
756 | + if (size > PAGE_SIZE) | |
757 | + i->stackptr = vmalloc(size); | |
758 | + else | |
759 | + i->stackptr = kmalloc(size, GFP_KERNEL); | |
760 | + if (i->stackptr == NULL) | |
761 | + return -ENOMEM; | |
762 | + memset(i->stackptr, 0, size); | |
763 | + | |
764 | + size = sizeof(void **) * nr_cpu_ids; | |
765 | + if (size > PAGE_SIZE) | |
766 | + i->jumpstack = vmalloc(size); | |
767 | + else | |
768 | + i->jumpstack = kmalloc(size, GFP_KERNEL); | |
769 | + if (i->jumpstack == NULL) | |
770 | + return -ENOMEM; | |
771 | + memset(i->jumpstack, 0, size); | |
772 | + | |
773 | + i->stacksize *= xt_jumpstack_multiplier; | |
774 | + size = sizeof(void *) * i->stacksize; | |
775 | + for_each_possible_cpu(cpu) { | |
776 | + if (size > PAGE_SIZE) | |
777 | + i->jumpstack[cpu] = vmalloc_node(size, | |
778 | + cpu_to_node(cpu)); | |
779 | + else | |
780 | + i->jumpstack[cpu] = kmalloc_node(size, | |
781 | + GFP_KERNEL, cpu_to_node(cpu)); | |
782 | + if (i->jumpstack[cpu] == NULL) | |
783 | + /* | |
784 | + * Freeing will be done later on by the callers. The | |
785 | + * chain is: xt_replace_table -> __do_replace -> | |
786 | + * do_replace -> xt_free_table_info. | |
787 | + */ | |
788 | + return -ENOMEM; | |
789 | + } | |
790 | + | |
791 | + return 0; | |
792 | +} | |
793 | + | |
728 | 794 | struct xt_table_info * |
729 | 795 | xt_replace_table(struct xt_table *table, |
730 | 796 | unsigned int num_counters, |
... | ... | @@ -732,6 +798,7 @@ |
732 | 798 | int *error) |
733 | 799 | { |
734 | 800 | struct xt_table_info *private; |
801 | + int ret; | |
735 | 802 | |
736 | 803 | /* Do the substitution. */ |
737 | 804 | local_bh_disable(); |
... | ... | @@ -746,6 +813,12 @@ |
746 | 813 | return NULL; |
747 | 814 | } |
748 | 815 | |
816 | + ret = xt_jumpstack_alloc(newinfo); | |
817 | + if (ret < 0) { | |
818 | + *error = ret; | |
819 | + return NULL; | |
820 | + } | |
821 | + | |
749 | 822 | table->private = newinfo; |
750 | 823 | newinfo->initial_entries = private->initial_entries; |
751 | 824 | |
... | ... | @@ -769,6 +842,10 @@ |
769 | 842 | int ret; |
770 | 843 | struct xt_table_info *private; |
771 | 844 | struct xt_table *t, *table; |
845 | + | |
846 | + ret = xt_jumpstack_alloc(newinfo); | |
847 | + if (ret < 0) | |
848 | + return ERR_PTR(ret); | |
772 | 849 | |
773 | 850 | /* Don't add one object to multiple lists. */ |
774 | 851 | table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL); |