Blame view

kernel/softlockup.c 8.86 KB
8446f1d39   Ingo Molnar   [PATCH] detect so...
1
2
3
  /*
   * Detect Soft Lockups
   *
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
4
   * started by Ingo Molnar, Copyright (C) 2005, 2006 Red Hat, Inc.
8446f1d39   Ingo Molnar   [PATCH] detect so...
5
6
7
8
   *
   * this code detects soft lockups: incidents in where on a CPU
   * the kernel does not reschedule for 10 seconds or more.
   */
8446f1d39   Ingo Molnar   [PATCH] detect so...
9
10
  #include <linux/mm.h>
  #include <linux/cpu.h>
82a1fcb90   Ingo Molnar   softlockup: autom...
11
  #include <linux/nmi.h>
8446f1d39   Ingo Molnar   [PATCH] detect so...
12
13
  #include <linux/init.h>
  #include <linux/delay.h>
831441862   Rafael J. Wysocki   Freezer: make ker...
14
  #include <linux/freezer.h>
8446f1d39   Ingo Molnar   [PATCH] detect so...
15
  #include <linux/kthread.h>
8d5be7f4e   Vegard Nossum   softlockup: show ...
16
  #include <linux/lockdep.h>
8446f1d39   Ingo Molnar   [PATCH] detect so...
17
18
  #include <linux/notifier.h>
  #include <linux/module.h>
43581a100   Ingo Molnar   softlockup: impro...
19
  #include <asm/irq_regs.h>
8446f1d39   Ingo Molnar   [PATCH] detect so...
20
  static DEFINE_SPINLOCK(print_lock);
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
21
22
  static DEFINE_PER_CPU(unsigned long, touch_timestamp);
  static DEFINE_PER_CPU(unsigned long, print_timestamp);
8446f1d39   Ingo Molnar   [PATCH] detect so...
23
  static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
90739081e   Ingo Molnar   softlockup: fix s...
24
  static int __read_mostly did_panic;
9383d9679   Dimitri Sivanich   softlockup: fix s...
25
  int __read_mostly softlockup_thresh = 60;
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
26

9c44bc03f   Ingo Molnar   softlockup: allow...
27
28
29
30
31
32
33
34
35
36
37
38
39
40
  /*
   * Should we panic (and reboot, if panic_timeout= is set) when a
   * soft-lockup occurs:
   */
  unsigned int __read_mostly softlockup_panic =
  				CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
  
  static int __init softlockup_panic_setup(char *str)
  {
  	softlockup_panic = simple_strtoul(str, NULL, 0);
  
  	return 1;
  }
  __setup("softlockup_panic=", softlockup_panic_setup);
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
41
42
  static int
  softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
8446f1d39   Ingo Molnar   [PATCH] detect so...
43
44
45
46
47
48
49
50
51
  {
  	did_panic = 1;
  
  	return NOTIFY_DONE;
  }
  
  static struct notifier_block panic_block = {
  	.notifier_call = softlock_panic,
  };
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
52
53
54
55
56
  /*
   * Returns seconds, approximately.  We don't need nanosecond
   * resolution, and we don't need to waste time with a big divide when
   * 2^30ns == 1.074s.
   */
a3b13c23f   Ingo Molnar   softlockup: use c...
57
  static unsigned long get_timestamp(int this_cpu)
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
58
  {
82a1fcb90   Ingo Molnar   softlockup: autom...
59
  	return cpu_clock(this_cpu) >> 30LL;  /* 2^30 ~= 10^9 */
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
60
  }
8c2238eaa   Jason Wessel   softlockup: fix N...
61
  static void __touch_softlockup_watchdog(void)
8446f1d39   Ingo Molnar   [PATCH] detect so...
62
  {
a3b13c23f   Ingo Molnar   softlockup: use c...
63
64
65
  	int this_cpu = raw_smp_processor_id();
  
  	__raw_get_cpu_var(touch_timestamp) = get_timestamp(this_cpu);
8446f1d39   Ingo Molnar   [PATCH] detect so...
66
  }
8c2238eaa   Jason Wessel   softlockup: fix N...
67
68
69
70
71
  
  void touch_softlockup_watchdog(void)
  {
  	__raw_get_cpu_var(touch_timestamp) = 0;
  }
8446f1d39   Ingo Molnar   [PATCH] detect so...
72
  EXPORT_SYMBOL(touch_softlockup_watchdog);
04c9167f9   Jeremy Fitzhardinge   add touch_all_sof...
73
74
75
76
77
78
79
80
81
  void touch_all_softlockup_watchdogs(void)
  {
  	int cpu;
  
  	/* Cause each CPU to re-update its timestamp rather than complain */
  	for_each_online_cpu(cpu)
  		per_cpu(touch_timestamp, cpu) = 0;
  }
  EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
8446f1d39   Ingo Molnar   [PATCH] detect so...
82
83
84
85
  /*
   * This callback runs from the timer interrupt, and checks
   * whether the watchdog thread has hung or not:
   */
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
86
  void softlockup_tick(void)
8446f1d39   Ingo Molnar   [PATCH] detect so...
87
88
  {
  	int this_cpu = smp_processor_id();
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
89
  	unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu);
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
90
  	unsigned long print_timestamp;
43581a100   Ingo Molnar   softlockup: impro...
91
  	struct pt_regs *regs = get_irq_regs();
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
92
  	unsigned long now;
8446f1d39   Ingo Molnar   [PATCH] detect so...
93

9383d9679   Dimitri Sivanich   softlockup: fix s...
94
95
96
97
98
99
100
  	/* Is detection switched off? */
  	if (!per_cpu(watchdog_task, this_cpu) || softlockup_thresh <= 0) {
  		/* Be sure we don't false trigger if switched back on */
  		if (touch_timestamp)
  			per_cpu(touch_timestamp, this_cpu) = 0;
  		return;
  	}
04c9167f9   Jeremy Fitzhardinge   add touch_all_sof...
101
  	if (touch_timestamp == 0) {
8c2238eaa   Jason Wessel   softlockup: fix N...
102
  		__touch_softlockup_watchdog();
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
103
  		return;
04c9167f9   Jeremy Fitzhardinge   add touch_all_sof...
104
  	}
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
105
106
107
108
  
  	print_timestamp = per_cpu(print_timestamp, this_cpu);
  
  	/* report at most once a second */
3e2f69fdd   Johannes Weiner   softlockup: fix w...
109
  	if (print_timestamp == touch_timestamp || did_panic)
8446f1d39   Ingo Molnar   [PATCH] detect so...
110
  		return;
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
111
112
  	/* do not print during early bootup: */
  	if (unlikely(system_state != SYSTEM_RUNNING)) {
8c2238eaa   Jason Wessel   softlockup: fix N...
113
  		__touch_softlockup_watchdog();
8446f1d39   Ingo Molnar   [PATCH] detect so...
114
  		return;
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
115
  	}
8446f1d39   Ingo Molnar   [PATCH] detect so...
116

a3b13c23f   Ingo Molnar   softlockup: use c...
117
  	now = get_timestamp(this_cpu);
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
118

dd7a1e561   Johannes Weiner   softlockup: fix w...
119
120
121
122
123
  	/*
  	 * Wake up the high-prio watchdog task twice per
  	 * threshold timespan.
  	 */
  	if (now > touch_timestamp + softlockup_thresh/2)
ed50d6cbc   Peter Zijlstra   debug: softlockup...
124
  		wake_up_process(per_cpu(watchdog_task, this_cpu));
82a1fcb90   Ingo Molnar   softlockup: autom...
125
  	/* Warn about unreasonable delays: */
c4f3b63fe   Ravikiran G Thirumalai   softlockup: add a...
126
  	if (now <= (touch_timestamp + softlockup_thresh))
43581a100   Ingo Molnar   softlockup: impro...
127
  		return;
8446f1d39   Ingo Molnar   [PATCH] detect so...
128

43581a100   Ingo Molnar   softlockup: impro...
129
130
131
  	per_cpu(print_timestamp, this_cpu) = touch_timestamp;
  
  	spin_lock(&print_lock);
c4f3b63fe   Ravikiran G Thirumalai   softlockup: add a...
132
133
134
  	printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]
  ",
  			this_cpu, now - touch_timestamp,
ba25f9dcc   Pavel Emelyanov   Use helpers to ob...
135
  			current->comm, task_pid_nr(current));
688c91755   Arjan van de Ven   softlockup: print...
136
  	print_modules();
8d5be7f4e   Vegard Nossum   softlockup: show ...
137
  	print_irqtrace_events(current);
43581a100   Ingo Molnar   softlockup: impro...
138
139
140
  	if (regs)
  		show_regs(regs);
  	else
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
141
  		dump_stack();
43581a100   Ingo Molnar   softlockup: impro...
142
  	spin_unlock(&print_lock);
9c44bc03f   Ingo Molnar   softlockup: allow...
143
144
145
  
  	if (softlockup_panic)
  		panic("softlockup: hung tasks");
8446f1d39   Ingo Molnar   [PATCH] detect so...
146
147
148
  }
  
  /*
82a1fcb90   Ingo Molnar   softlockup: autom...
149
150
   * Have a reasonable limit on the number of tasks checked:
   */
90739081e   Ingo Molnar   softlockup: fix s...
151
  unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
82a1fcb90   Ingo Molnar   softlockup: autom...
152
153
154
155
  
  /*
   * Zero means infinite timeout - no checking done:
   */
fb822db46   Ingo Molnar   softlockup: incre...
156
  unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
82a1fcb90   Ingo Molnar   softlockup: autom...
157

90739081e   Ingo Molnar   softlockup: fix s...
158
  unsigned long __read_mostly sysctl_hung_task_warnings = 10;
82a1fcb90   Ingo Molnar   softlockup: autom...
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
  
  /*
   * Only do the hung-tasks check on one CPU:
   */
  static int check_cpu __read_mostly = -1;
  
  static void check_hung_task(struct task_struct *t, unsigned long now)
  {
  	unsigned long switch_count = t->nvcsw + t->nivcsw;
  
  	if (t->flags & PF_FROZEN)
  		return;
  
  	if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
  		t->last_switch_count = switch_count;
  		t->last_switch_timestamp = now;
  		return;
  	}
  	if ((long)(now - t->last_switch_timestamp) <
  					sysctl_hung_task_timeout_secs)
  		return;
201955463   Roel Kluin   check_hung_task()...
180
  	if (!sysctl_hung_task_warnings)
82a1fcb90   Ingo Molnar   softlockup: autom...
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
  		return;
  	sysctl_hung_task_warnings--;
  
  	/*
  	 * Ok, the task did not get scheduled for more than 2 minutes,
  	 * complain:
  	 */
  	printk(KERN_ERR "INFO: task %s:%d blocked for more than "
  			"%ld seconds.
  ", t->comm, t->pid,
  			sysctl_hung_task_timeout_secs);
  	printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
  			" disables this message.
  ");
  	sched_show_task(t);
  	__debug_show_held_locks(t);
  
  	t->last_switch_timestamp = now;
  	touch_nmi_watchdog();
9c44bc03f   Ingo Molnar   softlockup: allow...
200
201
202
  
  	if (softlockup_panic)
  		panic("softlockup: blocked tasks");
82a1fcb90   Ingo Molnar   softlockup: autom...
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
  }
  
  /*
   * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
   * a really long time (120 seconds). If that happens, print out
   * a warning.
   */
  static void check_hung_uninterruptible_tasks(int this_cpu)
  {
  	int max_count = sysctl_hung_task_check_count;
  	unsigned long now = get_timestamp(this_cpu);
  	struct task_struct *g, *t;
  
  	/*
  	 * If the system crashed already then all bets are off,
  	 * do not report extra hung tasks:
  	 */
25ddbb18a   Andi Kleen   Make the taint fl...
220
  	if (test_taint(TAINT_DIE) || did_panic)
82a1fcb90   Ingo Molnar   softlockup: autom...
221
222
223
224
225
  		return;
  
  	read_lock(&tasklist_lock);
  	do_each_thread(g, t) {
  		if (!--max_count)
ed50d6cbc   Peter Zijlstra   debug: softlockup...
226
  			goto unlock;
cbaed698f   Oleg Nesterov   softlockup: minor...
227
228
  		/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
  		if (t->state == TASK_UNINTERRUPTIBLE)
82a1fcb90   Ingo Molnar   softlockup: autom...
229
230
  			check_hung_task(t, now);
  	} while_each_thread(g, t);
ed50d6cbc   Peter Zijlstra   debug: softlockup...
231
   unlock:
82a1fcb90   Ingo Molnar   softlockup: autom...
232
233
234
235
  	read_unlock(&tasklist_lock);
  }
  
  /*
8446f1d39   Ingo Molnar   [PATCH] detect so...
236
237
   * The watchdog thread - runs every second and touches the timestamp.
   */
a5f2ce3c6   Ingo Molnar   softlockup watchd...
238
  static int watchdog(void *__bind_cpu)
8446f1d39   Ingo Molnar   [PATCH] detect so...
239
  {
02fb6149f   Oleg Nesterov   softlockup: s/99/...
240
  	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
82a1fcb90   Ingo Molnar   softlockup: autom...
241
  	int this_cpu = (long)__bind_cpu;
8446f1d39   Ingo Molnar   [PATCH] detect so...
242
243
  
  	sched_setscheduler(current, SCHED_FIFO, &param);
8446f1d39   Ingo Molnar   [PATCH] detect so...
244

966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
245
  	/* initialize timestamp */
8c2238eaa   Jason Wessel   softlockup: fix N...
246
  	__touch_softlockup_watchdog();
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
247

7be2a03e3   Dmitry Adamushko   softlockup: fix t...
248
  	set_current_state(TASK_INTERRUPTIBLE);
8446f1d39   Ingo Molnar   [PATCH] detect so...
249
  	/*
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
250
  	 * Run briefly once per second to reset the softlockup timestamp.
82a1fcb90   Ingo Molnar   softlockup: autom...
251
  	 * If this gets delayed for more than 60 seconds then the
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
252
  	 * debug-printout triggers in softlockup_tick().
8446f1d39   Ingo Molnar   [PATCH] detect so...
253
254
  	 */
  	while (!kthread_should_stop()) {
8c2238eaa   Jason Wessel   softlockup: fix N...
255
  		__touch_softlockup_watchdog();
ed50d6cbc   Peter Zijlstra   debug: softlockup...
256
257
258
259
  		schedule();
  
  		if (kthread_should_stop())
  			break;
82a1fcb90   Ingo Molnar   softlockup: autom...
260

7be2a03e3   Dmitry Adamushko   softlockup: fix t...
261
262
263
264
  		if (this_cpu == check_cpu) {
  			if (sysctl_hung_task_timeout_secs)
  				check_hung_uninterruptible_tasks(this_cpu);
  		}
ed50d6cbc   Peter Zijlstra   debug: softlockup...
265

7be2a03e3   Dmitry Adamushko   softlockup: fix t...
266
  		set_current_state(TASK_INTERRUPTIBLE);
8446f1d39   Ingo Molnar   [PATCH] detect so...
267
  	}
7be2a03e3   Dmitry Adamushko   softlockup: fix t...
268
  	__set_current_state(TASK_RUNNING);
8446f1d39   Ingo Molnar   [PATCH] detect so...
269
270
271
272
273
274
275
  
  	return 0;
  }
  
  /*
   * Create/destroy watchdog threads as CPUs come and go:
   */
8c78f3075   Chandra Seetharaman   [PATCH] cpu hotpl...
276
  static int __cpuinit
8446f1d39   Ingo Molnar   [PATCH] detect so...
277
278
279
280
281
282
283
  cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
  {
  	int hotcpu = (unsigned long)hcpu;
  	struct task_struct *p;
  
  	switch (action) {
  	case CPU_UP_PREPARE:
8bb784428   Rafael J. Wysocki   Add suspend-relat...
284
  	case CPU_UP_PREPARE_FROZEN:
8446f1d39   Ingo Molnar   [PATCH] detect so...
285
286
287
  		BUG_ON(per_cpu(watchdog_task, hotcpu));
  		p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu);
  		if (IS_ERR(p)) {
a5f2ce3c6   Ingo Molnar   softlockup watchd...
288
289
  			printk(KERN_ERR "watchdog for %i failed
  ", hotcpu);
8446f1d39   Ingo Molnar   [PATCH] detect so...
290
291
  			return NOTIFY_BAD;
  		}
a5f2ce3c6   Ingo Molnar   softlockup watchd...
292
293
  		per_cpu(touch_timestamp, hotcpu) = 0;
  		per_cpu(watchdog_task, hotcpu) = p;
8446f1d39   Ingo Molnar   [PATCH] detect so...
294
  		kthread_bind(p, hotcpu);
a5f2ce3c6   Ingo Molnar   softlockup watchd...
295
  		break;
8446f1d39   Ingo Molnar   [PATCH] detect so...
296
  	case CPU_ONLINE:
8bb784428   Rafael J. Wysocki   Add suspend-relat...
297
  	case CPU_ONLINE_FROZEN:
82a1fcb90   Ingo Molnar   softlockup: autom...
298
  		check_cpu = any_online_cpu(cpu_online_map);
8446f1d39   Ingo Molnar   [PATCH] detect so...
299
300
301
  		wake_up_process(per_cpu(watchdog_task, hotcpu));
  		break;
  #ifdef CONFIG_HOTPLUG_CPU
82a1fcb90   Ingo Molnar   softlockup: autom...
302
303
304
305
306
307
308
309
310
  	case CPU_DOWN_PREPARE:
  	case CPU_DOWN_PREPARE_FROZEN:
  		if (hotcpu == check_cpu) {
  			cpumask_t temp_cpu_online_map = cpu_online_map;
  
  			cpu_clear(hotcpu, temp_cpu_online_map);
  			check_cpu = any_online_cpu(temp_cpu_online_map);
  		}
  		break;
ed50d6cbc   Peter Zijlstra   debug: softlockup...
311
312
313
314
315
316
317
318
  
  	case CPU_UP_CANCELED:
  	case CPU_UP_CANCELED_FROZEN:
  		if (!per_cpu(watchdog_task, hotcpu))
  			break;
  		/* Unbind so it can run.  Fall thru. */
  		kthread_bind(per_cpu(watchdog_task, hotcpu),
  			     any_online_cpu(cpu_online_map));
8446f1d39   Ingo Molnar   [PATCH] detect so...
319
  	case CPU_DEAD:
8bb784428   Rafael J. Wysocki   Add suspend-relat...
320
  	case CPU_DEAD_FROZEN:
8446f1d39   Ingo Molnar   [PATCH] detect so...
321
322
323
324
325
  		p = per_cpu(watchdog_task, hotcpu);
  		per_cpu(watchdog_task, hotcpu) = NULL;
  		kthread_stop(p);
  		break;
  #endif /* CONFIG_HOTPLUG_CPU */
a5f2ce3c6   Ingo Molnar   softlockup watchd...
326
  	}
8446f1d39   Ingo Molnar   [PATCH] detect so...
327
328
  	return NOTIFY_OK;
  }
8c78f3075   Chandra Seetharaman   [PATCH] cpu hotpl...
329
  static struct notifier_block __cpuinitdata cpu_nfb = {
8446f1d39   Ingo Molnar   [PATCH] detect so...
330
331
  	.notifier_call = cpu_callback
  };
7babe8db9   Eduard - Gabriel Munteanu   Full conversion t...
332
333
334
335
336
337
338
339
340
341
  static int __initdata nosoftlockup;
  
  static int __init nosoftlockup_setup(char *str)
  {
  	nosoftlockup = 1;
  	return 1;
  }
  __setup("nosoftlockup", nosoftlockup_setup);
  
  static int __init spawn_softlockup_task(void)
8446f1d39   Ingo Molnar   [PATCH] detect so...
342
343
  {
  	void *cpu = (void *)(long)smp_processor_id();
7babe8db9   Eduard - Gabriel Munteanu   Full conversion t...
344
  	int err;
8446f1d39   Ingo Molnar   [PATCH] detect so...
345

7babe8db9   Eduard - Gabriel Munteanu   Full conversion t...
346
347
348
349
350
351
352
353
  	if (nosoftlockup)
  		return 0;
  
  	err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
  	if (err == NOTIFY_BAD) {
  		BUG();
  		return 1;
  	}
8446f1d39   Ingo Molnar   [PATCH] detect so...
354
355
  	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
  	register_cpu_notifier(&cpu_nfb);
e041c6834   Alan Stern   [PATCH] Notifier ...
356
  	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
7babe8db9   Eduard - Gabriel Munteanu   Full conversion t...
357
358
  
  	return 0;
8446f1d39   Ingo Molnar   [PATCH] detect so...
359
  }
7babe8db9   Eduard - Gabriel Munteanu   Full conversion t...
360
  early_initcall(spawn_softlockup_task);