Commit 82a1fcb90287052aabfa235e7ffc693ea003fe69

Authored by Ingo Molnar
1 parent d0d23b5432

softlockup: automatically detect hung TASK_UNINTERRUPTIBLE tasks

this patch extends the soft-lockup detector to automatically
detect hung TASK_UNINTERRUPTIBLE tasks. Such hung tasks are
printed the following way:

 ------------------>
 INFO: task prctl:3042 blocked for more than 120 seconds.
 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message
 prctl         D fd5e3793     0  3042   2997
        f6050f38 00000046 00000001 fd5e3793 00000009 c06d8264 c06dae80 00000286
        f6050f40 f6050f00 f7d34d90 f7d34fc8 c1e1be80 00000001 f6050000 00000000
        f7e92d00 00000286 f6050f18 c0489d1a f6050f40 00006605 00000000 c0133a5b
 Call Trace:
  [<c04883a5>] schedule_timeout+0x6d/0x8b
  [<c04883d8>] schedule_timeout_uninterruptible+0x15/0x17
  [<c0133a76>] msleep+0x10/0x16
  [<c0138974>] sys_prctl+0x30/0x1e2
  [<c0104c52>] sysenter_past_esp+0x5f/0xa5
  =======================
 2 locks held by prctl/3042:
 #0:  (&sb->s_type->i_mutex_key#5){--..}, at: [<c0197d11>] do_fsync+0x38/0x7a
 #1:  (jbd_handle){--..}, at: [<c01ca3d2>] journal_start+0xc7/0xe9
 <------------------

the current default timeout is 120 seconds. Such messages are printed
up to 10 times per bootup. If the system has crashed already then the
messages are not printed.

if lockdep is enabled then all held locks are printed as well.

this feature is a natural extension to the softlockup-detector (kernel
locked up without scheduling) and to the NMI watchdog (kernel locked up
with IRQs disabled).

[ Gautham R Shenoy <ego@in.ibm.com>: CPU hotplug fixes. ]
[ Andrew Morton <akpm@linux-foundation.org>: build warning fix. ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>

Showing 7 changed files with 164 additions and 13 deletions Side-by-side Diff

include/linux/debug_locks.h
... ... @@ -47,11 +47,16 @@
47 47  
48 48 #ifdef CONFIG_LOCKDEP
49 49 extern void debug_show_all_locks(void);
  50 +extern void __debug_show_held_locks(struct task_struct *task);
50 51 extern void debug_show_held_locks(struct task_struct *task);
51 52 extern void debug_check_no_locks_freed(const void *from, unsigned long len);
52 53 extern void debug_check_no_locks_held(struct task_struct *task);
53 54 #else
54 55 static inline void debug_show_all_locks(void)
  56 +{
  57 +}
  58 +
  59 +static inline void __debug_show_held_locks(struct task_struct *task)
55 60 {
56 61 }
57 62  
include/linux/sched.h
... ... @@ -258,12 +258,17 @@
258 258 extern void update_process_times(int user);
259 259 extern void scheduler_tick(void);
260 260  
  261 +extern void sched_show_task(struct task_struct *p);
  262 +
261 263 #ifdef CONFIG_DETECT_SOFTLOCKUP
262 264 extern void softlockup_tick(void);
263 265 extern void spawn_softlockup_task(void);
264 266 extern void touch_softlockup_watchdog(void);
265 267 extern void touch_all_softlockup_watchdogs(void);
266 268 extern int softlockup_thresh;
  269 +extern unsigned long sysctl_hung_task_check_count;
  270 +extern unsigned long sysctl_hung_task_timeout_secs;
  271 +extern long sysctl_hung_task_warnings;
267 272 #else
268 273 static inline void softlockup_tick(void)
269 274 {
... ... @@ -1040,6 +1045,11 @@
1040 1045 #ifdef CONFIG_SYSVIPC
1041 1046 /* ipc stuff */
1042 1047 struct sysv_sem sysvsem;
  1048 +#endif
  1049 +#ifdef CONFIG_DETECT_SOFTLOCKUP
  1050 +/* hung task detection */
  1051 + unsigned long last_switch_timestamp;
  1052 + unsigned long last_switch_count;
1043 1053 #endif
1044 1054 /* CPU-specific state of this task */
1045 1055 struct thread_struct thread;
... ... @@ -1059,6 +1059,11 @@
1059 1059 p->prev_utime = cputime_zero;
1060 1060 p->prev_stime = cputime_zero;
1061 1061  
  1062 +#ifdef CONFIG_DETECT_SOFTLOCKUP
  1063 + p->last_switch_count = 0;
  1064 + p->last_switch_timestamp = 0;
  1065 +#endif
  1066 +
1062 1067 #ifdef CONFIG_TASK_XACCT
1063 1068 p->rchar = 0; /* I/O counter: bytes read */
1064 1069 p->wchar = 0; /* I/O counter: bytes written */
... ... @@ -3206,13 +3206,23 @@
3206 3206  
3207 3207 EXPORT_SYMBOL_GPL(debug_show_all_locks);
3208 3208  
3209   -void debug_show_held_locks(struct task_struct *task)
  3209 +/*
  3210 + * Careful: only use this function if you are sure that
  3211 + * the task cannot run in parallel!
  3212 + */
  3213 +void __debug_show_held_locks(struct task_struct *task)
3210 3214 {
3211 3215 if (unlikely(!debug_locks)) {
3212 3216 printk("INFO: lockdep is turned off.\n");
3213 3217 return;
3214 3218 }
3215 3219 lockdep_print_held_locks(task);
  3220 +}
  3221 +EXPORT_SYMBOL_GPL(__debug_show_held_locks);
  3222 +
  3223 +void debug_show_held_locks(struct task_struct *task)
  3224 +{
  3225 + __debug_show_held_locks(task);
3216 3226 }
3217 3227  
3218 3228 EXPORT_SYMBOL_GPL(debug_show_held_locks);
... ... @@ -4945,7 +4945,7 @@
4945 4945  
4946 4946 static const char stat_nam[] = "RSDTtZX";
4947 4947  
4948   -static void show_task(struct task_struct *p)
  4948 +void sched_show_task(struct task_struct *p)
4949 4949 {
4950 4950 unsigned long free = 0;
4951 4951 unsigned state;
... ... @@ -4998,7 +4998,7 @@
4998 4998 */
4999 4999 touch_nmi_watchdog();
5000 5000 if (!state_filter || (p->state & state_filter))
5001   - show_task(p);
  5001 + sched_show_task(p);
5002 5002 } while_each_thread(g, p);
5003 5003  
5004 5004 touch_all_softlockup_watchdogs();
... ... @@ -8,6 +8,7 @@
8 8 */
9 9 #include <linux/mm.h>
10 10 #include <linux/cpu.h>
  11 +#include <linux/nmi.h>
11 12 #include <linux/init.h>
12 13 #include <linux/delay.h>
13 14 #include <linux/freezer.h>
... ... @@ -24,7 +25,7 @@
24 25 static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
25 26  
26 27 static int did_panic;
27   -int softlockup_thresh = 10;
  28 +int softlockup_thresh = 60;
28 29  
29 30 static int
30 31 softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
... ... @@ -45,7 +46,7 @@
45 46 */
46 47 static unsigned long get_timestamp(int this_cpu)
47 48 {
48   - return cpu_clock(this_cpu) >> 30; /* 2^30 ~= 10^9 */
  49 + return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */
49 50 }
50 51  
51 52 void touch_softlockup_watchdog(void)
... ... @@ -100,11 +101,7 @@
100 101  
101 102 now = get_timestamp(this_cpu);
102 103  
103   - /* Wake up the high-prio watchdog task every second: */
104   - if (now > (touch_timestamp + 1))
105   - wake_up_process(per_cpu(watchdog_task, this_cpu));
106   -
107   - /* Warn about unreasonable 10+ seconds delays: */
  104 + /* Warn about unreasonable delays: */
108 105 if (now <= (touch_timestamp + softlockup_thresh))
109 106 return;
110 107  
111 108  
... ... @@ -122,11 +119,93 @@
122 119 }
123 120  
124 121 /*
  122 + * Have a reasonable limit on the number of tasks checked:
  123 + */
  124 +unsigned long sysctl_hung_task_check_count = 1024;
  125 +
  126 +/*
  127 + * Zero means infinite timeout - no checking done:
  128 + */
  129 +unsigned long sysctl_hung_task_timeout_secs = 120;
  130 +
  131 +long sysctl_hung_task_warnings = 10;
  132 +
  133 +/*
  134 + * Only do the hung-tasks check on one CPU:
  135 + */
  136 +static int check_cpu __read_mostly = -1;
  137 +
  138 +static void check_hung_task(struct task_struct *t, unsigned long now)
  139 +{
  140 + unsigned long switch_count = t->nvcsw + t->nivcsw;
  141 +
  142 + if (t->flags & PF_FROZEN)
  143 + return;
  144 +
  145 + if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
  146 + t->last_switch_count = switch_count;
  147 + t->last_switch_timestamp = now;
  148 + return;
  149 + }
  150 + if ((long)(now - t->last_switch_timestamp) <
  151 + sysctl_hung_task_timeout_secs)
  152 + return;
  153 + if (sysctl_hung_task_warnings < 0)
  154 + return;
  155 + sysctl_hung_task_warnings--;
  156 +
  157 + /*
  158 + * Ok, the task did not get scheduled for more than 2 minutes,
  159 + * complain:
  160 + */
  161 + printk(KERN_ERR "INFO: task %s:%d blocked for more than "
  162 + "%ld seconds.\n", t->comm, t->pid,
  163 + sysctl_hung_task_timeout_secs);
  164 + printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
  165 + " disables this message.\n");
  166 + sched_show_task(t);
  167 + __debug_show_held_locks(t);
  168 +
  169 + t->last_switch_timestamp = now;
  170 + touch_nmi_watchdog();
  171 +}
  172 +
  173 +/*
  174 + * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
  175 + * a really long time (120 seconds). If that happens, print out
  176 + * a warning.
  177 + */
  178 +static void check_hung_uninterruptible_tasks(int this_cpu)
  179 +{
  180 + int max_count = sysctl_hung_task_check_count;
  181 + unsigned long now = get_timestamp(this_cpu);
  182 + struct task_struct *g, *t;
  183 +
  184 + /*
  185 + * If the system crashed already then all bets are off,
  186 + * do not report extra hung tasks:
  187 + */
  188 + if ((tainted & TAINT_DIE) || did_panic)
  189 + return;
  190 +
  191 + read_lock(&tasklist_lock);
  192 + do_each_thread(g, t) {
  193 + if (!--max_count)
  194 + break;
  195 + if (t->state & TASK_UNINTERRUPTIBLE)
  196 + check_hung_task(t, now);
  197 + } while_each_thread(g, t);
  198 +
  199 + read_unlock(&tasklist_lock);
  200 +}
  201 +
  202 +/*
125 203 * The watchdog thread - runs every second and touches the timestamp.
126 204 */
127 205 static int watchdog(void *__bind_cpu)
128 206 {
129 207 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
  208 + int this_cpu = (long)__bind_cpu;
130 209  
131 210 sched_setscheduler(current, SCHED_FIFO, &param);
132 211  
133 212  
134 213  
... ... @@ -135,13 +214,18 @@
135 214  
136 215 /*
137 216 * Run briefly once per second to reset the softlockup timestamp.
138   - * If this gets delayed for more than 10 seconds then the
  217 + * If this gets delayed for more than 60 seconds then the
139 218 * debug-printout triggers in softlockup_tick().
140 219 */
141 220 while (!kthread_should_stop()) {
142   - set_current_state(TASK_INTERRUPTIBLE);
143 221 touch_softlockup_watchdog();
144   - schedule();
  222 + msleep_interruptible(10000);
  223 +
  224 + if (this_cpu != check_cpu)
  225 + continue;
  226 +
  227 + if (sysctl_hung_task_timeout_secs)
  228 + check_hung_uninterruptible_tasks(this_cpu);
145 229 }
146 230  
147 231 return 0;
... ... @@ -171,6 +255,7 @@
171 255 break;
172 256 case CPU_ONLINE:
173 257 case CPU_ONLINE_FROZEN:
  258 + check_cpu = any_online_cpu(cpu_online_map);
174 259 wake_up_process(per_cpu(watchdog_task, hotcpu));
175 260 break;
176 261 #ifdef CONFIG_HOTPLUG_CPU
... ... @@ -181,6 +266,15 @@
181 266 /* Unbind so it can run. Fall thru. */
182 267 kthread_bind(per_cpu(watchdog_task, hotcpu),
183 268 any_online_cpu(cpu_online_map));
  269 + case CPU_DOWN_PREPARE:
  270 + case CPU_DOWN_PREPARE_FROZEN:
  271 + if (hotcpu == check_cpu) {
  272 + cpumask_t temp_cpu_online_map = cpu_online_map;
  273 +
  274 + cpu_clear(hotcpu, temp_cpu_online_map);
  275 + check_cpu = any_online_cpu(temp_cpu_online_map);
  276 + }
  277 + break;
184 278 case CPU_DEAD:
185 279 case CPU_DEAD_FROZEN:
186 280 p = per_cpu(watchdog_task, hotcpu);
... ... @@ -753,6 +753,33 @@
753 753 .extra1 = &one,
754 754 .extra2 = &sixty,
755 755 },
  756 + {
  757 + .ctl_name = CTL_UNNUMBERED,
  758 + .procname = "hung_task_check_count",
  759 + .data = &sysctl_hung_task_check_count,
  760 + .maxlen = sizeof(int),
  761 + .mode = 0644,
  762 + .proc_handler = &proc_dointvec_minmax,
  763 + .strategy = &sysctl_intvec,
  764 + },
  765 + {
  766 + .ctl_name = CTL_UNNUMBERED,
  767 + .procname = "hung_task_timeout_secs",
  768 + .data = &sysctl_hung_task_timeout_secs,
  769 + .maxlen = sizeof(int),
  770 + .mode = 0644,
  771 + .proc_handler = &proc_dointvec_minmax,
  772 + .strategy = &sysctl_intvec,
  773 + },
  774 + {
  775 + .ctl_name = CTL_UNNUMBERED,
  776 + .procname = "hung_task_warnings",
  777 + .data = &sysctl_hung_task_warnings,
  778 + .maxlen = sizeof(int),
  779 + .mode = 0644,
  780 + .proc_handler = &proc_dointvec_minmax,
  781 + .strategy = &sysctl_intvec,
  782 + },
756 783 #endif
757 784 #ifdef CONFIG_COMPAT
758 785 {