Blame view

kernel/softlockup.c 9.08 KB
8446f1d39   Ingo Molnar   [PATCH] detect so...
1
2
3
  /*
   * Detect Soft Lockups
   *
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
4
   * started by Ingo Molnar, Copyright (C) 2005, 2006 Red Hat, Inc.
8446f1d39   Ingo Molnar   [PATCH] detect so...
5
6
7
8
   *
   * this code detects soft lockups: incidents in where on a CPU
   * the kernel does not reschedule for 10 seconds or more.
   */
8446f1d39   Ingo Molnar   [PATCH] detect so...
9
10
  #include <linux/mm.h>
  #include <linux/cpu.h>
82a1fcb90   Ingo Molnar   softlockup: autom...
11
  #include <linux/nmi.h>
8446f1d39   Ingo Molnar   [PATCH] detect so...
12
13
  #include <linux/init.h>
  #include <linux/delay.h>
831441862   Rafael J. Wysocki   Freezer: make ker...
14
  #include <linux/freezer.h>
8446f1d39   Ingo Molnar   [PATCH] detect so...
15
  #include <linux/kthread.h>
8d5be7f4e   Vegard Nossum   softlockup: show ...
16
  #include <linux/lockdep.h>
8446f1d39   Ingo Molnar   [PATCH] detect so...
17
18
  #include <linux/notifier.h>
  #include <linux/module.h>
baf48f657   Mandeep Singh Baines   softlock: fix fal...
19
  #include <linux/sysctl.h>
8446f1d39   Ingo Molnar   [PATCH] detect so...
20

43581a100   Ingo Molnar   softlockup: impro...
21
  #include <asm/irq_regs.h>
8446f1d39   Ingo Molnar   [PATCH] detect so...
22
  static DEFINE_SPINLOCK(print_lock);
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
23
24
  static DEFINE_PER_CPU(unsigned long, touch_timestamp);
  static DEFINE_PER_CPU(unsigned long, print_timestamp);
8446f1d39   Ingo Molnar   [PATCH] detect so...
25
  static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
90739081e   Ingo Molnar   softlockup: fix s...
26
  static int __read_mostly did_panic;
9383d9679   Dimitri Sivanich   softlockup: fix s...
27
  int __read_mostly softlockup_thresh = 60;
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
28

9c44bc03f   Ingo Molnar   softlockup: allow...
29
30
31
32
33
34
35
36
37
38
39
40
41
42
  /*
   * Should we panic (and reboot, if panic_timeout= is set) when a
   * soft-lockup occurs:
   */
  unsigned int __read_mostly softlockup_panic =
  				CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
  
  static int __init softlockup_panic_setup(char *str)
  {
  	softlockup_panic = simple_strtoul(str, NULL, 0);
  
  	return 1;
  }
  __setup("softlockup_panic=", softlockup_panic_setup);
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
43
44
  static int
  softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
8446f1d39   Ingo Molnar   [PATCH] detect so...
45
46
47
48
49
50
51
52
53
  {
  	did_panic = 1;
  
  	return NOTIFY_DONE;
  }
  
  static struct notifier_block panic_block = {
  	.notifier_call = softlock_panic,
  };
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
54
55
56
57
58
  /*
   * Returns seconds, approximately.  We don't need nanosecond
   * resolution, and we don't need to waste time with a big divide when
   * 2^30ns == 1.074s.
   */
a3b13c23f   Ingo Molnar   softlockup: use c...
59
  static unsigned long get_timestamp(int this_cpu)
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
60
  {
82a1fcb90   Ingo Molnar   softlockup: autom...
61
  	return cpu_clock(this_cpu) >> 30LL;  /* 2^30 ~= 10^9 */
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
62
  }
8c2238eaa   Jason Wessel   softlockup: fix N...
63
  static void __touch_softlockup_watchdog(void)
8446f1d39   Ingo Molnar   [PATCH] detect so...
64
  {
a3b13c23f   Ingo Molnar   softlockup: use c...
65
66
67
  	int this_cpu = raw_smp_processor_id();
  
  	__raw_get_cpu_var(touch_timestamp) = get_timestamp(this_cpu);
8446f1d39   Ingo Molnar   [PATCH] detect so...
68
  }
8c2238eaa   Jason Wessel   softlockup: fix N...
69
70
71
72
73
  
  void touch_softlockup_watchdog(void)
  {
  	__raw_get_cpu_var(touch_timestamp) = 0;
  }
8446f1d39   Ingo Molnar   [PATCH] detect so...
74
  EXPORT_SYMBOL(touch_softlockup_watchdog);
04c9167f9   Jeremy Fitzhardinge   add touch_all_sof...
75
76
77
78
79
80
81
82
83
  void touch_all_softlockup_watchdogs(void)
  {
  	int cpu;
  
  	/* Cause each CPU to re-update its timestamp rather than complain */
  	for_each_online_cpu(cpu)
  		per_cpu(touch_timestamp, cpu) = 0;
  }
  EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
baf48f657   Mandeep Singh Baines   softlock: fix fal...
84
85
86
87
88
89
90
  int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
  			     struct file *filp, void __user *buffer,
  			     size_t *lenp, loff_t *ppos)
  {
  	touch_all_softlockup_watchdogs();
  	return proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
  }
8446f1d39   Ingo Molnar   [PATCH] detect so...
91
92
93
94
  /*
   * This callback runs from the timer interrupt, and checks
   * whether the watchdog thread has hung or not:
   */
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
95
  void softlockup_tick(void)
8446f1d39   Ingo Molnar   [PATCH] detect so...
96
97
  {
  	int this_cpu = smp_processor_id();
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
98
  	unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu);
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
99
  	unsigned long print_timestamp;
43581a100   Ingo Molnar   softlockup: impro...
100
  	struct pt_regs *regs = get_irq_regs();
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
101
  	unsigned long now;
8446f1d39   Ingo Molnar   [PATCH] detect so...
102

9383d9679   Dimitri Sivanich   softlockup: fix s...
103
104
105
106
107
108
109
  	/* Is detection switched off? */
  	if (!per_cpu(watchdog_task, this_cpu) || softlockup_thresh <= 0) {
  		/* Be sure we don't false trigger if switched back on */
  		if (touch_timestamp)
  			per_cpu(touch_timestamp, this_cpu) = 0;
  		return;
  	}
04c9167f9   Jeremy Fitzhardinge   add touch_all_sof...
110
  	if (touch_timestamp == 0) {
8c2238eaa   Jason Wessel   softlockup: fix N...
111
  		__touch_softlockup_watchdog();
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
112
  		return;
04c9167f9   Jeremy Fitzhardinge   add touch_all_sof...
113
  	}
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
114
115
116
117
  
  	print_timestamp = per_cpu(print_timestamp, this_cpu);
  
  	/* report at most once a second */
3e2f69fdd   Johannes Weiner   softlockup: fix w...
118
  	if (print_timestamp == touch_timestamp || did_panic)
8446f1d39   Ingo Molnar   [PATCH] detect so...
119
  		return;
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
120
121
  	/* do not print during early bootup: */
  	if (unlikely(system_state != SYSTEM_RUNNING)) {
8c2238eaa   Jason Wessel   softlockup: fix N...
122
  		__touch_softlockup_watchdog();
8446f1d39   Ingo Molnar   [PATCH] detect so...
123
  		return;
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
124
  	}
8446f1d39   Ingo Molnar   [PATCH] detect so...
125

a3b13c23f   Ingo Molnar   softlockup: use c...
126
  	now = get_timestamp(this_cpu);
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
127

dd7a1e561   Johannes Weiner   softlockup: fix w...
128
129
130
131
132
  	/*
  	 * Wake up the high-prio watchdog task twice per
  	 * threshold timespan.
  	 */
  	if (now > touch_timestamp + softlockup_thresh/2)
ed50d6cbc   Peter Zijlstra   debug: softlockup...
133
  		wake_up_process(per_cpu(watchdog_task, this_cpu));
82a1fcb90   Ingo Molnar   softlockup: autom...
134
  	/* Warn about unreasonable delays: */
c4f3b63fe   Ravikiran G Thirumalai   softlockup: add a...
135
  	if (now <= (touch_timestamp + softlockup_thresh))
43581a100   Ingo Molnar   softlockup: impro...
136
  		return;
8446f1d39   Ingo Molnar   [PATCH] detect so...
137

43581a100   Ingo Molnar   softlockup: impro...
138
139
140
  	per_cpu(print_timestamp, this_cpu) = touch_timestamp;
  
  	spin_lock(&print_lock);
c4f3b63fe   Ravikiran G Thirumalai   softlockup: add a...
141
142
143
  	printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]
  ",
  			this_cpu, now - touch_timestamp,
ba25f9dcc   Pavel Emelyanov   Use helpers to ob...
144
  			current->comm, task_pid_nr(current));
688c91755   Arjan van de Ven   softlockup: print...
145
  	print_modules();
8d5be7f4e   Vegard Nossum   softlockup: show ...
146
  	print_irqtrace_events(current);
43581a100   Ingo Molnar   softlockup: impro...
147
148
149
  	if (regs)
  		show_regs(regs);
  	else
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
150
  		dump_stack();
43581a100   Ingo Molnar   softlockup: impro...
151
  	spin_unlock(&print_lock);
9c44bc03f   Ingo Molnar   softlockup: allow...
152
153
154
  
  	if (softlockup_panic)
  		panic("softlockup: hung tasks");
8446f1d39   Ingo Molnar   [PATCH] detect so...
155
156
157
  }
  
  /*
82a1fcb90   Ingo Molnar   softlockup: autom...
158
159
   * Have a reasonable limit on the number of tasks checked:
   */
90739081e   Ingo Molnar   softlockup: fix s...
160
  unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
82a1fcb90   Ingo Molnar   softlockup: autom...
161
162
163
164
  
  /*
   * Zero means infinite timeout - no checking done:
   */
fb822db46   Ingo Molnar   softlockup: incre...
165
  unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
82a1fcb90   Ingo Molnar   softlockup: autom...
166

90739081e   Ingo Molnar   softlockup: fix s...
167
  unsigned long __read_mostly sysctl_hung_task_warnings = 10;
82a1fcb90   Ingo Molnar   softlockup: autom...
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
  
  /*
   * Only do the hung-tasks check on one CPU:
   */
  static int check_cpu __read_mostly = -1;
  
  static void check_hung_task(struct task_struct *t, unsigned long now)
  {
  	unsigned long switch_count = t->nvcsw + t->nivcsw;
  
  	if (t->flags & PF_FROZEN)
  		return;
  
  	if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
  		t->last_switch_count = switch_count;
  		t->last_switch_timestamp = now;
  		return;
  	}
  	if ((long)(now - t->last_switch_timestamp) <
  					sysctl_hung_task_timeout_secs)
  		return;
201955463   Roel Kluin   check_hung_task()...
189
  	if (!sysctl_hung_task_warnings)
82a1fcb90   Ingo Molnar   softlockup: autom...
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
  		return;
  	sysctl_hung_task_warnings--;
  
  	/*
  	 * Ok, the task did not get scheduled for more than 2 minutes,
  	 * complain:
  	 */
  	printk(KERN_ERR "INFO: task %s:%d blocked for more than "
  			"%ld seconds.
  ", t->comm, t->pid,
  			sysctl_hung_task_timeout_secs);
  	printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
  			" disables this message.
  ");
  	sched_show_task(t);
  	__debug_show_held_locks(t);
  
  	t->last_switch_timestamp = now;
  	touch_nmi_watchdog();
9c44bc03f   Ingo Molnar   softlockup: allow...
209
210
211
  
  	if (softlockup_panic)
  		panic("softlockup: blocked tasks");
82a1fcb90   Ingo Molnar   softlockup: autom...
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
  }
  
  /*
   * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
   * a really long time (120 seconds). If that happens, print out
   * a warning.
   */
  static void check_hung_uninterruptible_tasks(int this_cpu)
  {
  	int max_count = sysctl_hung_task_check_count;
  	unsigned long now = get_timestamp(this_cpu);
  	struct task_struct *g, *t;
  
  	/*
  	 * If the system crashed already then all bets are off,
  	 * do not report extra hung tasks:
  	 */
25ddbb18a   Andi Kleen   Make the taint fl...
229
  	if (test_taint(TAINT_DIE) || did_panic)
82a1fcb90   Ingo Molnar   softlockup: autom...
230
231
232
233
234
  		return;
  
  	read_lock(&tasklist_lock);
  	do_each_thread(g, t) {
  		if (!--max_count)
ed50d6cbc   Peter Zijlstra   debug: softlockup...
235
  			goto unlock;
cbaed698f   Oleg Nesterov   softlockup: minor...
236
237
  		/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
  		if (t->state == TASK_UNINTERRUPTIBLE)
82a1fcb90   Ingo Molnar   softlockup: autom...
238
239
  			check_hung_task(t, now);
  	} while_each_thread(g, t);
ed50d6cbc   Peter Zijlstra   debug: softlockup...
240
   unlock:
82a1fcb90   Ingo Molnar   softlockup: autom...
241
242
243
244
  	read_unlock(&tasklist_lock);
  }
  
  /*
8446f1d39   Ingo Molnar   [PATCH] detect so...
245
246
   * The watchdog thread - runs every second and touches the timestamp.
   */
a5f2ce3c6   Ingo Molnar   softlockup watchd...
247
  static int watchdog(void *__bind_cpu)
8446f1d39   Ingo Molnar   [PATCH] detect so...
248
  {
02fb6149f   Oleg Nesterov   softlockup: s/99/...
249
  	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
82a1fcb90   Ingo Molnar   softlockup: autom...
250
  	int this_cpu = (long)__bind_cpu;
8446f1d39   Ingo Molnar   [PATCH] detect so...
251
252
  
  	sched_setscheduler(current, SCHED_FIFO, &param);
8446f1d39   Ingo Molnar   [PATCH] detect so...
253

966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
254
  	/* initialize timestamp */
8c2238eaa   Jason Wessel   softlockup: fix N...
255
  	__touch_softlockup_watchdog();
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
256

7be2a03e3   Dmitry Adamushko   softlockup: fix t...
257
  	set_current_state(TASK_INTERRUPTIBLE);
8446f1d39   Ingo Molnar   [PATCH] detect so...
258
  	/*
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
259
  	 * Run briefly once per second to reset the softlockup timestamp.
82a1fcb90   Ingo Molnar   softlockup: autom...
260
  	 * If this gets delayed for more than 60 seconds then the
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
261
  	 * debug-printout triggers in softlockup_tick().
8446f1d39   Ingo Molnar   [PATCH] detect so...
262
263
  	 */
  	while (!kthread_should_stop()) {
8c2238eaa   Jason Wessel   softlockup: fix N...
264
  		__touch_softlockup_watchdog();
ed50d6cbc   Peter Zijlstra   debug: softlockup...
265
266
267
268
  		schedule();
  
  		if (kthread_should_stop())
  			break;
82a1fcb90   Ingo Molnar   softlockup: autom...
269

7be2a03e3   Dmitry Adamushko   softlockup: fix t...
270
271
272
273
  		if (this_cpu == check_cpu) {
  			if (sysctl_hung_task_timeout_secs)
  				check_hung_uninterruptible_tasks(this_cpu);
  		}
ed50d6cbc   Peter Zijlstra   debug: softlockup...
274

7be2a03e3   Dmitry Adamushko   softlockup: fix t...
275
  		set_current_state(TASK_INTERRUPTIBLE);
8446f1d39   Ingo Molnar   [PATCH] detect so...
276
  	}
7be2a03e3   Dmitry Adamushko   softlockup: fix t...
277
  	__set_current_state(TASK_RUNNING);
8446f1d39   Ingo Molnar   [PATCH] detect so...
278
279
280
281
282
283
284
  
  	return 0;
  }
  
  /*
   * Create/destroy watchdog threads as CPUs come and go:
   */
8c78f3075   Chandra Seetharaman   [PATCH] cpu hotpl...
285
  static int __cpuinit
8446f1d39   Ingo Molnar   [PATCH] detect so...
286
287
288
289
290
291
292
  cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
  {
  	int hotcpu = (unsigned long)hcpu;
  	struct task_struct *p;
  
  	switch (action) {
  	case CPU_UP_PREPARE:
8bb784428   Rafael J. Wysocki   Add suspend-relat...
293
  	case CPU_UP_PREPARE_FROZEN:
8446f1d39   Ingo Molnar   [PATCH] detect so...
294
295
296
  		BUG_ON(per_cpu(watchdog_task, hotcpu));
  		p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu);
  		if (IS_ERR(p)) {
a5f2ce3c6   Ingo Molnar   softlockup watchd...
297
298
  			printk(KERN_ERR "watchdog for %i failed
  ", hotcpu);
8446f1d39   Ingo Molnar   [PATCH] detect so...
299
300
  			return NOTIFY_BAD;
  		}
a5f2ce3c6   Ingo Molnar   softlockup watchd...
301
302
  		per_cpu(touch_timestamp, hotcpu) = 0;
  		per_cpu(watchdog_task, hotcpu) = p;
8446f1d39   Ingo Molnar   [PATCH] detect so...
303
  		kthread_bind(p, hotcpu);
a5f2ce3c6   Ingo Molnar   softlockup watchd...
304
  		break;
8446f1d39   Ingo Molnar   [PATCH] detect so...
305
  	case CPU_ONLINE:
8bb784428   Rafael J. Wysocki   Add suspend-relat...
306
  	case CPU_ONLINE_FROZEN:
f1fc057c7   Rusty Russell   cpumask: remove a...
307
  		check_cpu = cpumask_any(cpu_online_mask);
8446f1d39   Ingo Molnar   [PATCH] detect so...
308
309
310
  		wake_up_process(per_cpu(watchdog_task, hotcpu));
  		break;
  #ifdef CONFIG_HOTPLUG_CPU
82a1fcb90   Ingo Molnar   softlockup: autom...
311
312
313
  	case CPU_DOWN_PREPARE:
  	case CPU_DOWN_PREPARE_FROZEN:
  		if (hotcpu == check_cpu) {
41c7bb958   Rusty Russell   cpumask: convert ...
314
315
  			/* Pick any other online cpu. */
  			check_cpu = cpumask_any_but(cpu_online_mask, hotcpu);
82a1fcb90   Ingo Molnar   softlockup: autom...
316
317
  		}
  		break;
ed50d6cbc   Peter Zijlstra   debug: softlockup...
318
319
320
321
322
323
324
  
  	case CPU_UP_CANCELED:
  	case CPU_UP_CANCELED_FROZEN:
  		if (!per_cpu(watchdog_task, hotcpu))
  			break;
  		/* Unbind so it can run.  Fall thru. */
  		kthread_bind(per_cpu(watchdog_task, hotcpu),
f1fc057c7   Rusty Russell   cpumask: remove a...
325
  			     cpumask_any(cpu_online_mask));
8446f1d39   Ingo Molnar   [PATCH] detect so...
326
  	case CPU_DEAD:
8bb784428   Rafael J. Wysocki   Add suspend-relat...
327
  	case CPU_DEAD_FROZEN:
8446f1d39   Ingo Molnar   [PATCH] detect so...
328
329
330
331
332
  		p = per_cpu(watchdog_task, hotcpu);
  		per_cpu(watchdog_task, hotcpu) = NULL;
  		kthread_stop(p);
  		break;
  #endif /* CONFIG_HOTPLUG_CPU */
a5f2ce3c6   Ingo Molnar   softlockup watchd...
333
  	}
8446f1d39   Ingo Molnar   [PATCH] detect so...
334
335
  	return NOTIFY_OK;
  }
8c78f3075   Chandra Seetharaman   [PATCH] cpu hotpl...
336
  static struct notifier_block __cpuinitdata cpu_nfb = {
8446f1d39   Ingo Molnar   [PATCH] detect so...
337
338
  	.notifier_call = cpu_callback
  };
7babe8db9   Eduard - Gabriel Munteanu   Full conversion t...
339
340
341
342
343
344
345
346
347
348
  static int __initdata nosoftlockup;
  
  static int __init nosoftlockup_setup(char *str)
  {
  	nosoftlockup = 1;
  	return 1;
  }
  __setup("nosoftlockup", nosoftlockup_setup);
  
  static int __init spawn_softlockup_task(void)
8446f1d39   Ingo Molnar   [PATCH] detect so...
349
350
  {
  	void *cpu = (void *)(long)smp_processor_id();
7babe8db9   Eduard - Gabriel Munteanu   Full conversion t...
351
  	int err;
8446f1d39   Ingo Molnar   [PATCH] detect so...
352

7babe8db9   Eduard - Gabriel Munteanu   Full conversion t...
353
354
355
356
357
358
359
360
  	if (nosoftlockup)
  		return 0;
  
  	err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
  	if (err == NOTIFY_BAD) {
  		BUG();
  		return 1;
  	}
8446f1d39   Ingo Molnar   [PATCH] detect so...
361
362
  	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
  	register_cpu_notifier(&cpu_nfb);
e041c6834   Alan Stern   [PATCH] Notifier ...
363
  	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
7babe8db9   Eduard - Gabriel Munteanu   Full conversion t...
364
365
  
  	return 0;
8446f1d39   Ingo Molnar   [PATCH] detect so...
366
  }
7babe8db9   Eduard - Gabriel Munteanu   Full conversion t...
367
  early_initcall(spawn_softlockup_task);