Commit 9c44bc03fff44ff04237a7d92e35304a0e50c331

Authored by Ingo Molnar
Committed by Thomas Gleixner
1 parent 75d3bce2fc

softlockup: allow panic on lockup

allow users to configure the softlockup detector to generate a panic
instead of a warning message.

high-availability systems might opt for this strict method (combined
with panic_timeout= boot option/sysctl), instead of generating
softlockup warnings ad infinitum.

also, automated tests work better if the system reboots reliably (into
a safe kernel) in case of a lockup.

The full spectrum of configurability is supported: boot option, sysctl
option and Kconfig option.

it's default-disabled.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Showing 5 changed files with 62 additions and 2 deletions Side-by-side Diff

Documentation/kernel-parameters.txt
... ... @@ -1971,6 +1971,9 @@
1971 1971  
1972 1972 snd-ymfpci= [HW,ALSA]
1973 1973  
  1974 + softlockup_panic=
  1975 + [KNL] Should the soft-lockup detector generate panics.
  1976 +
1974 1977 sonypi.*= [HW] Sony Programmable I/O Control Device driver
1975 1978 See Documentation/sonypi.txt
1976 1979  
include/linux/sched.h
... ... @@ -294,7 +294,8 @@
294 294 extern void spawn_softlockup_task(void);
295 295 extern void touch_softlockup_watchdog(void);
296 296 extern void touch_all_softlockup_watchdogs(void);
297   -extern unsigned long softlockup_thresh;
  297 +extern unsigned int softlockup_panic;
  298 +extern unsigned long softlockup_thresh;
298 299 extern unsigned long sysctl_hung_task_check_count;
299 300 extern unsigned long sysctl_hung_task_timeout_secs;
300 301 extern unsigned long sysctl_hung_task_warnings;
... ... @@ -27,6 +27,21 @@
27 27 static int __read_mostly did_panic;
28 28 unsigned long __read_mostly softlockup_thresh = 60;
29 29  
  30 +/*
  31 + * Should we panic (and reboot, if panic_timeout= is set) when a
  32 + * soft-lockup occurs:
  33 + */
  34 +unsigned int __read_mostly softlockup_panic =
  35 + CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
  36 +
  37 +static int __init softlockup_panic_setup(char *str)
  38 +{
  39 + softlockup_panic = simple_strtoul(str, NULL, 0);
  40 +
  41 + return 1;
  42 +}
  43 +__setup("softlockup_panic=", softlockup_panic_setup);
  44 +
30 45 static int
31 46 softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
32 47 {
... ... @@ -120,6 +135,9 @@
120 135 else
121 136 dump_stack();
122 137 spin_unlock(&print_lock);
  138 +
  139 + if (softlockup_panic)
  140 + panic("softlockup: hung tasks");
123 141 }
124 142  
125 143 /*
... ... @@ -172,6 +190,9 @@
172 190  
173 191 t->last_switch_timestamp = now;
174 192 touch_nmi_watchdog();
  193 +
  194 + if (softlockup_panic)
  195 + panic("softlockup: blocked tasks");
175 196 }
176 197  
177 198 /*
... ... @@ -729,6 +729,17 @@
729 729 #ifdef CONFIG_DETECT_SOFTLOCKUP
730 730 {
731 731 .ctl_name = CTL_UNNUMBERED,
  732 + .procname = "softlockup_panic",
  733 + .data = &softlockup_panic,
  734 + .maxlen = sizeof(int),
  735 + .mode = 0644,
  736 + .proc_handler = &proc_doulongvec_minmax,
  737 + .strategy = &sysctl_intvec,
  738 + .extra1 = &zero,
  739 + .extra2 = &one,
  740 + },
  741 + {
  742 + .ctl_name = CTL_UNNUMBERED,
732 743 .procname = "softlockup_thresh",
733 744 .data = &softlockup_thresh,
734 745 .maxlen = sizeof(unsigned long),
... ... @@ -147,7 +147,7 @@
147 147 help
148 148 Say Y here to enable the kernel to detect "soft lockups",
149 149 which are bugs that cause the kernel to loop in kernel
150   - mode for more than 10 seconds, without giving other tasks a
  150 + mode for more than 60 seconds, without giving other tasks a
151 151 chance to run.
152 152  
153 153 When a soft-lockup is detected, the kernel will print the
... ... @@ -158,6 +158,30 @@
158 158 (Note that "hard lockups" are separate type of bugs that
159 159 can be detected via the NMI-watchdog, on platforms that
160 160 support it.)
  161 +
  162 +config BOOTPARAM_SOFTLOCKUP_PANIC
  163 + bool "Panic (Reboot) On Soft Lockups"
  164 + depends on DETECT_SOFTLOCKUP
  165 + help
  166 + Say Y here to enable the kernel to panic on "soft lockups",
  167 + which are bugs that cause the kernel to loop in kernel
  168 + mode for more than 60 seconds, without giving other tasks a
  169 + chance to run.
  170 +
  171 + The panic can be used in combination with panic_timeout,
  172 + to cause the system to reboot automatically after a
  173 + lockup has been detected. This feature is useful for
  174 + high-availability systems that have uptime guarantees and
  175 + where a lockup must be resolved ASAP.
  176 +
  177 + Say N if unsure.
  178 +
  179 +config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
  180 + int
  181 + depends on DETECT_SOFTLOCKUP
  182 + range 0 1
  183 + default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
  184 + default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
161 185  
162 186 config SCHED_DEBUG
163 187 bool "Collect scheduler debugging info"