Commit c4f3b63fe15b4629aa1ec163c95ab30423d0f76a

Authored by Ravikiran G Thirumalai
Committed by Linus Torvalds
1 parent a5f2ce3c60

softlockup: add a /proc tuning parameter

Control the trigger limit for softlockup warnings.  This is useful for
debugging softlockups, by lowering the softlockup_thresh to identify
possible softlockups earlier.

This patch:
1. Adds a sysctl softlockup_thresh with valid values of 1-60s
   (Higher value to disable false positives)
2. Changes the softlockup printk to print the cpu softlockup time

[akpm@linux-foundation.org: Fix various warnings and add definition of "two"]
Signed-off-by: Ravikiran Thirumalai <kiran@scalex86.org>
Signed-off-by: Shai Fultheim <shai@scalex86.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Showing 4 changed files with 40 additions and 9 deletions Side-by-side Diff

Documentation/sysctl/kernel.txt
... ... @@ -320,6 +320,14 @@
320 320  
321 321 ==============================================================
322 322  
  323 +softlockup_thresh:
  324 +
  325 +This value can be used to lower the softlockup tolerance
  326 +threshold. The default threshold is 10s. If a cpu is locked up
  327 +for 10s, the kernel complains. Valid values are 1-60s.
  328 +
  329 +==============================================================
  330 +
323 331 tainted:
324 332  
325 333 Non-zero if the kernel has been tainted. Numeric values, which
include/linux/sched.h
... ... @@ -261,6 +261,7 @@
261 261 extern void spawn_softlockup_task(void);
262 262 extern void touch_softlockup_watchdog(void);
263 263 extern void touch_all_softlockup_watchdogs(void);
  264 +extern int softlockup_thresh;
264 265 #else
265 266 static inline void softlockup_tick(void)
266 267 {
... ... @@ -24,6 +24,7 @@
24 24 static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
25 25  
26 26 static int did_panic;
  27 +int softlockup_thresh = 10;
27 28  
28 29 static int
29 30 softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
30 31  
... ... @@ -104,13 +105,15 @@
104 105 wake_up_process(per_cpu(watchdog_task, this_cpu));
105 106  
106 107 /* Warn about unreasonable 10+ seconds delays: */
107   - if (now <= (touch_timestamp + 10))
  108 + if (now <= (touch_timestamp + softlockup_thresh))
108 109 return;
109 110  
110 111 per_cpu(print_timestamp, this_cpu) = touch_timestamp;
111 112  
112 113 spin_lock(&print_lock);
113   - printk(KERN_ERR "BUG: soft lockup detected on CPU#%d!\n", this_cpu);
  114 + printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
  115 + this_cpu, now - touch_timestamp,
  116 + current->comm, current->pid);
114 117 if (regs)
115 118 show_regs(regs);
116 119 else
... ... @@ -80,6 +80,19 @@
80 80 extern int sysctl_stat_interval;
81 81 extern int audit_argv_kb;
82 82  
  83 +/* Constants used for minimum and maximum */
  84 +#ifdef CONFIG_DETECT_SOFTLOCKUP
  85 +static int one = 1;
  86 +static int sixty = 60;
  87 +#endif
  88 +
  89 +#ifdef CONFIG_MMU
  90 +static int two = 2;
  91 +#endif
  92 +
  93 +static int zero;
  94 +static int one_hundred = 100;
  95 +
83 96 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
84 97 static int maxolduid = 65535;
85 98 static int minolduid;
... ... @@ -711,6 +724,19 @@
711 724 .proc_handler = &proc_dointvec,
712 725 },
713 726 #endif
  727 +#ifdef CONFIG_DETECT_SOFTLOCKUP
  728 + {
  729 + .ctl_name = CTL_UNNUMBERED,
  730 + .procname = "softlockup_thresh",
  731 + .data = &softlockup_thresh,
  732 + .maxlen = sizeof(int),
  733 + .mode = 0644,
  734 + .proc_handler = &proc_dointvec_minmax,
  735 + .strategy = &sysctl_intvec,
  736 + .extra1 = &one,
  737 + .extra2 = &sixty,
  738 + },
  739 +#endif
714 740 #ifdef CONFIG_COMPAT
715 741 {
716 742 .ctl_name = KERN_COMPAT_LOG,
... ... @@ -756,13 +782,6 @@
756 782 */
757 783 { .ctl_name = 0 }
758 784 };
759   -
760   -/* Constants for minimum and maximum testing in vm_table.
761   - We use these as one-element integer vectors. */
762   -static int zero;
763   -static int two = 2;
764   -static int one_hundred = 100;
765   -
766 785  
767 786 static ctl_table vm_table[] = {
768 787 {