Commit 9e3961a0979817c612b10b2da4f3045ec9faa779
Committed by
Linus Torvalds
1 parent
f938612dd9
Exists in
ti-lsk-linux-4.1.y
and in
10 other branches
kernel: add panic_on_warn
There have been several times where I have had to rebuild a kernel to cause a panic when hitting a WARN() in the code in order to get a crash dump from a system. Sometimes this is easy to do, other times (such as in the case of a remote admin) it is not trivial to send new images to the user. A much easier method would be a switch to change the WARN() over to a panic. This makes debugging easier in that I can now test the actual image the WARN() was seen on and I do not have to engage in remote debugging. This patch adds a panic_on_warn kernel parameter and /proc/sys/kernel/panic_on_warn calls panic() in the warn_slowpath_common() path. The function will still print out the location of the warning. An example of the panic_on_warn output: The first line below is from the WARN_ON() to output the WARN_ON()'s location. After that the panic() output is displayed. WARNING: CPU: 30 PID: 11698 at /home/prarit/dummy_module/dummy-module.c:25 init_dummy+0x1f/0x30 [dummy_module]() Kernel panic - not syncing: panic_on_warn set ... CPU: 30 PID: 11698 Comm: insmod Tainted: G W OE 3.17.0+ #57 Hardware name: Intel Corporation S2600CP/S2600CP, BIOS RMLSDP.86I.00.29.D696.1311111329 11/11/2013 0000000000000000 000000008e3f87df ffff88080f093c38 ffffffff81665190 0000000000000000 ffffffff818aea3d ffff88080f093cb8 ffffffff8165e2ec ffffffff00000008 ffff88080f093cc8 ffff88080f093c68 000000008e3f87df Call Trace: [<ffffffff81665190>] dump_stack+0x46/0x58 [<ffffffff8165e2ec>] panic+0xd0/0x204 [<ffffffffa038e05f>] ? init_dummy+0x1f/0x30 [dummy_module] [<ffffffff81076b90>] warn_slowpath_common+0xd0/0xd0 [<ffffffffa038e040>] ? dummy_greetings+0x40/0x40 [dummy_module] [<ffffffff81076c8a>] warn_slowpath_null+0x1a/0x20 [<ffffffffa038e05f>] init_dummy+0x1f/0x30 [dummy_module] [<ffffffff81002144>] do_one_initcall+0xd4/0x210 [<ffffffff811b52c2>] ? __vunmap+0xc2/0x110 [<ffffffff810f8889>] load_module+0x16a9/0x1b30 [<ffffffff810f3d30>] ? store_uevent+0x70/0x70 [<ffffffff810f49b9>] ? copy_module_from_fd.isra.44+0x129/0x180 [<ffffffff810f8ec6>] SyS_finit_module+0xa6/0xd0 [<ffffffff8166cf29>] system_call_fastpath+0x12/0x17 Successfully tested by me. hpa said: There is another very valid use for this: many operators would rather a machine shuts down than being potentially compromised either functionally or security-wise. Signed-off-by: Prarit Bhargava <prarit@redhat.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Acked-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Cc: Fabian Frederick <fabf@skynet.be> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Showing 8 changed files with 61 additions and 14 deletions Side-by-side Diff
Documentation/kdump/kdump.txt
... | ... | @@ -471,6 +471,13 @@ |
471 | 471 | |
472 | 472 | http://people.redhat.com/~anderson/ |
473 | 473 | |
474 | +Trigger Kdump on WARN() | |
475 | +======================= | |
476 | + | |
477 | +The kernel parameter, panic_on_warn, calls panic() in all WARN() paths. This | |
478 | +will cause a kdump to occur at the panic() call. In cases where a user wants | |
479 | +to specify this during runtime, /proc/sys/kernel/panic_on_warn can be set to 1 | |
480 | +to achieve the same behaviour. | |
474 | 481 | |
475 | 482 | Contact |
476 | 483 | ======= |
Documentation/kernel-parameters.txt
... | ... | @@ -2509,6 +2509,9 @@ |
2509 | 2509 | timeout < 0: reboot immediately |
2510 | 2510 | Format: <timeout> |
2511 | 2511 | |
2512 | + panic_on_warn panic() instead of WARN(). Useful to cause kdump | |
2513 | + on a WARN(). | |
2514 | + | |
2512 | 2515 | crash_kexec_post_notifiers |
2513 | 2516 | Run kdump after running panic-notifiers and dumping |
2514 | 2517 | kmsg. This only for the users who doubt kdump always |
Documentation/sysctl/kernel.txt
... | ... | @@ -54,8 +54,9 @@ |
54 | 54 | - overflowuid |
55 | 55 | - panic |
56 | 56 | - panic_on_oops |
57 | -- panic_on_unrecovered_nmi | |
58 | 57 | - panic_on_stackoverflow |
58 | +- panic_on_unrecovered_nmi | |
59 | +- panic_on_warn | |
59 | 60 | - pid_max |
60 | 61 | - powersave-nap [ PPC only ] |
61 | 62 | - printk |
... | ... | @@ -527,19 +528,6 @@ |
527 | 528 | |
528 | 529 | ============================================================== |
529 | 530 | |
530 | -panic_on_unrecovered_nmi: | |
531 | - | |
532 | -The default Linux behaviour on an NMI of either memory or unknown is | |
533 | -to continue operation. For many environments such as scientific | |
534 | -computing it is preferable that the box is taken out and the error | |
535 | -dealt with than an uncorrected parity/ECC error get propagated. | |
536 | - | |
537 | -A small number of systems do generate NMI's for bizarre random reasons | |
538 | -such as power management so the default is off. That sysctl works like | |
539 | -the existing panic controls already in that directory. | |
540 | - | |
541 | -============================================================== | |
542 | - | |
543 | 531 | panic_on_oops: |
544 | 532 | |
545 | 533 | Controls the kernel's behaviour when an oops or BUG is encountered. |
... | ... | @@ -560,6 +548,30 @@ |
560 | 548 | 0: try to continue operation. |
561 | 549 | |
562 | 550 | 1: panic immediately. |
551 | + | |
552 | +============================================================== | |
553 | + | |
554 | +panic_on_unrecovered_nmi: | |
555 | + | |
556 | +The default Linux behaviour on an NMI of either memory or unknown is | |
557 | +to continue operation. For many environments such as scientific | |
558 | +computing it is preferable that the box is taken out and the error | |
559 | +dealt with than an uncorrected parity/ECC error get propagated. | |
560 | + | |
561 | +A small number of systems do generate NMI's for bizarre random reasons | |
562 | +such as power management so the default is off. That sysctl works like | |
563 | +the existing panic controls already in that directory. | |
564 | + | |
565 | +============================================================== | |
566 | + | |
567 | +panic_on_warn: | |
568 | + | |
569 | +Calls panic() in the WARN() path when set to 1. This is useful to avoid | |
570 | +a kernel rebuild when attempting to kdump at the location of a WARN(). | |
571 | + | |
572 | +0: only WARN(), default behaviour. | |
573 | + | |
574 | +1: call panic() after printing out WARN() location. | |
563 | 575 | |
564 | 576 | ============================================================== |
565 | 577 |
include/linux/kernel.h
... | ... | @@ -427,6 +427,7 @@ |
427 | 427 | extern int panic_on_oops; |
428 | 428 | extern int panic_on_unrecovered_nmi; |
429 | 429 | extern int panic_on_io_nmi; |
430 | +extern int panic_on_warn; | |
430 | 431 | extern int sysctl_panic_on_stackoverflow; |
431 | 432 | /* |
432 | 433 | * Only to be used by arch init code. If the user over-wrote the default |
include/uapi/linux/sysctl.h
... | ... | @@ -153,6 +153,7 @@ |
153 | 153 | KERN_MAX_LOCK_DEPTH=74, /* int: rtmutex's maximum lock depth */ |
154 | 154 | KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */ |
155 | 155 | KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */ |
156 | + KERN_PANIC_ON_WARN=77, /* int: call panic() in WARN() functions */ | |
156 | 157 | }; |
157 | 158 | |
158 | 159 |
kernel/panic.c
... | ... | @@ -33,6 +33,7 @@ |
33 | 33 | static int pause_on_oops_flag; |
34 | 34 | static DEFINE_SPINLOCK(pause_on_oops_lock); |
35 | 35 | static bool crash_kexec_post_notifiers; |
36 | +int panic_on_warn __read_mostly; | |
36 | 37 | |
37 | 38 | int panic_timeout = CONFIG_PANIC_TIMEOUT; |
38 | 39 | EXPORT_SYMBOL_GPL(panic_timeout); |
... | ... | @@ -428,6 +429,17 @@ |
428 | 429 | if (args) |
429 | 430 | vprintk(args->fmt, args->args); |
430 | 431 | |
432 | + if (panic_on_warn) { | |
433 | + /* | |
434 | + * This thread may hit another WARN() in the panic path. | |
435 | + * Resetting this prevents additional WARN() from panicking the | |
436 | + * system on this thread. Other threads are blocked by the | |
437 | + * panic_mutex in panic(). | |
438 | + */ | |
439 | + panic_on_warn = 0; | |
440 | + panic("panic_on_warn set ...\n"); | |
441 | + } | |
442 | + | |
431 | 443 | print_modules(); |
432 | 444 | dump_stack(); |
433 | 445 | print_oops_end_marker(); |
... | ... | @@ -485,6 +497,7 @@ |
485 | 497 | |
486 | 498 | core_param(panic, panic_timeout, int, 0644); |
487 | 499 | core_param(pause_on_oops, pause_on_oops, int, 0644); |
500 | +core_param(panic_on_warn, panic_on_warn, int, 0644); | |
488 | 501 | |
489 | 502 | static int __init setup_crash_kexec_post_notifiers(char *s) |
490 | 503 | { |
kernel/sysctl.c
... | ... | @@ -1104,6 +1104,15 @@ |
1104 | 1104 | .proc_handler = proc_dointvec, |
1105 | 1105 | }, |
1106 | 1106 | #endif |
1107 | + { | |
1108 | + .procname = "panic_on_warn", | |
1109 | + .data = &panic_on_warn, | |
1110 | + .maxlen = sizeof(int), | |
1111 | + .mode = 0644, | |
1112 | + .proc_handler = proc_dointvec_minmax, | |
1113 | + .extra1 = &zero, | |
1114 | + .extra2 = &one, | |
1115 | + }, | |
1107 | 1116 | { } |
1108 | 1117 | }; |
1109 | 1118 |
kernel/sysctl_binary.c