Commit 9c44bc03fff44ff04237a7d92e35304a0e50c331
Committed by
Thomas Gleixner
1 parent
75d3bce2fc
Exists in
master
and in
20 other branches
softlockup: allow panic on lockup
allow users to configure the softlockup detector to generate a panic instead of a warning message. high-availability systems might opt for this strict method (combined with panic_timeout= boot option/sysctl), instead of generating softlockup warnings ad infinitum. also, automated tests work better if the system reboots reliably (into a safe kernel) in case of a lockup. The full spectrum of configurability is supported: boot option, sysctl option and Kconfig option. it's default-disabled. Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Showing 5 changed files with 62 additions and 2 deletions Side-by-side Diff
Documentation/kernel-parameters.txt
include/linux/sched.h
... | ... | @@ -294,7 +294,8 @@ |
294 | 294 | extern void spawn_softlockup_task(void); |
295 | 295 | extern void touch_softlockup_watchdog(void); |
296 | 296 | extern void touch_all_softlockup_watchdogs(void); |
297 | -extern unsigned long softlockup_thresh; | |
297 | +extern unsigned int softlockup_panic; | |
298 | +extern unsigned long softlockup_thresh; | |
298 | 299 | extern unsigned long sysctl_hung_task_check_count; |
299 | 300 | extern unsigned long sysctl_hung_task_timeout_secs; |
300 | 301 | extern unsigned long sysctl_hung_task_warnings; |
kernel/softlockup.c
... | ... | @@ -27,6 +27,21 @@ |
27 | 27 | static int __read_mostly did_panic; |
28 | 28 | unsigned long __read_mostly softlockup_thresh = 60; |
29 | 29 | |
30 | +/* | |
31 | + * Should we panic (and reboot, if panic_timeout= is set) when a | |
32 | + * soft-lockup occurs: | |
33 | + */ | |
34 | +unsigned int __read_mostly softlockup_panic = | |
35 | + CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; | |
36 | + | |
37 | +static int __init softlockup_panic_setup(char *str) | |
38 | +{ | |
39 | + softlockup_panic = simple_strtoul(str, NULL, 0); | |
40 | + | |
41 | + return 1; | |
42 | +} | |
43 | +__setup("softlockup_panic=", softlockup_panic_setup); | |
44 | + | |
30 | 45 | static int |
31 | 46 | softlock_panic(struct notifier_block *this, unsigned long event, void *ptr) |
32 | 47 | { |
... | ... | @@ -120,6 +135,9 @@ |
120 | 135 | else |
121 | 136 | dump_stack(); |
122 | 137 | spin_unlock(&print_lock); |
138 | + | |
139 | + if (softlockup_panic) | |
140 | + panic("softlockup: hung tasks"); | |
123 | 141 | } |
124 | 142 | |
125 | 143 | /* |
... | ... | @@ -172,6 +190,9 @@ |
172 | 190 | |
173 | 191 | t->last_switch_timestamp = now; |
174 | 192 | touch_nmi_watchdog(); |
193 | + | |
194 | + if (softlockup_panic) | |
195 | + panic("softlockup: blocked tasks"); | |
175 | 196 | } |
176 | 197 | |
177 | 198 | /* |
kernel/sysctl.c
... | ... | @@ -729,6 +729,17 @@ |
729 | 729 | #ifdef CONFIG_DETECT_SOFTLOCKUP |
730 | 730 | { |
731 | 731 | .ctl_name = CTL_UNNUMBERED, |
732 | + .procname = "softlockup_panic", | |
733 | + .data = &softlockup_panic, | |
734 | + .maxlen = sizeof(int), | |
735 | + .mode = 0644, | |
736 | + .proc_handler = &proc_doulongvec_minmax, | |
737 | + .strategy = &sysctl_intvec, | |
738 | + .extra1 = &zero, | |
739 | + .extra2 = &one, | |
740 | + }, | |
741 | + { | |
742 | + .ctl_name = CTL_UNNUMBERED, | |
732 | 743 | .procname = "softlockup_thresh", |
733 | 744 | .data = &softlockup_thresh, |
734 | 745 | .maxlen = sizeof(unsigned long), |
lib/Kconfig.debug
... | ... | @@ -147,7 +147,7 @@ |
147 | 147 | help |
148 | 148 | Say Y here to enable the kernel to detect "soft lockups", |
149 | 149 | which are bugs that cause the kernel to loop in kernel |
150 | - mode for more than 10 seconds, without giving other tasks a | |
150 | + mode for more than 60 seconds, without giving other tasks a | |
151 | 151 | chance to run. |
152 | 152 | |
153 | 153 | When a soft-lockup is detected, the kernel will print the |
... | ... | @@ -158,6 +158,30 @@ |
158 | 158 | (Note that "hard lockups" are separate type of bugs that |
159 | 159 | can be detected via the NMI-watchdog, on platforms that |
160 | 160 | support it.) |
161 | + | |
162 | +config BOOTPARAM_SOFTLOCKUP_PANIC | |
163 | + bool "Panic (Reboot) On Soft Lockups" | |
164 | + depends on DETECT_SOFTLOCKUP | |
165 | + help | |
166 | + Say Y here to enable the kernel to panic on "soft lockups", | |
167 | + which are bugs that cause the kernel to loop in kernel | |
168 | + mode for more than 60 seconds, without giving other tasks a | |
169 | + chance to run. | |
170 | + | |
171 | + The panic can be used in combination with panic_timeout, | |
172 | + to cause the system to reboot automatically after a | |
173 | + lockup has been detected. This feature is useful for | |
174 | + high-availability systems that have uptime guarantees and | |
175 | + where a lockup must be resolved ASAP. | |
176 | + | |
177 | + Say N if unsure. | |
178 | + | |
179 | +config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE | |
180 | + int | |
181 | + depends on DETECT_SOFTLOCKUP | |
182 | + range 0 1 | |
183 | + default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC | |
184 | + default 1 if BOOTPARAM_SOFTLOCKUP_PANIC | |
161 | 185 | |
162 | 186 | config SCHED_DEBUG |
163 | 187 | bool "Collect scheduler debugging info" |