Blame view

kernel/softlockup.c 7.7 KB
8446f1d39   Ingo Molnar   [PATCH] detect so...
1
2
3
  /*
   * Detect Soft Lockups
   *
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
4
   * started by Ingo Molnar, Copyright (C) 2005, 2006 Red Hat, Inc.
8446f1d39   Ingo Molnar   [PATCH] detect so...
5
6
7
8
   *
   * this code detects soft lockups: incidents in where on a CPU
   * the kernel does not reschedule for 10 seconds or more.
   */
8446f1d39   Ingo Molnar   [PATCH] detect so...
9
10
  #include <linux/mm.h>
  #include <linux/cpu.h>
82a1fcb90   Ingo Molnar   softlockup: autom...
11
  #include <linux/nmi.h>
8446f1d39   Ingo Molnar   [PATCH] detect so...
12
13
  #include <linux/init.h>
  #include <linux/delay.h>
831441862   Rafael J. Wysocki   Freezer: make ker...
14
  #include <linux/freezer.h>
8446f1d39   Ingo Molnar   [PATCH] detect so...
15
16
17
  #include <linux/kthread.h>
  #include <linux/notifier.h>
  #include <linux/module.h>
43581a100   Ingo Molnar   softlockup: impro...
18
  #include <asm/irq_regs.h>
8446f1d39   Ingo Molnar   [PATCH] detect so...
19
  static DEFINE_SPINLOCK(print_lock);
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
20
21
  static DEFINE_PER_CPU(unsigned long, touch_timestamp);
  static DEFINE_PER_CPU(unsigned long, print_timestamp);
8446f1d39   Ingo Molnar   [PATCH] detect so...
22
  static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
90739081e   Ingo Molnar   softlockup: fix s...
23
24
  static int __read_mostly did_panic;
  unsigned long __read_mostly softlockup_thresh = 60;
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
25
26
27
  
  static int
  softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
8446f1d39   Ingo Molnar   [PATCH] detect so...
28
29
30
31
32
33
34
35
36
  {
  	did_panic = 1;
  
  	return NOTIFY_DONE;
  }
  
  static struct notifier_block panic_block = {
  	.notifier_call = softlock_panic,
  };
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
37
38
39
40
41
  /*
   * Returns seconds, approximately.  We don't need nanosecond
   * resolution, and we don't need to waste time with a big divide when
   * 2^30ns == 1.074s.
   */
a3b13c23f   Ingo Molnar   softlockup: use c...
42
  static unsigned long get_timestamp(int this_cpu)
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
43
  {
82a1fcb90   Ingo Molnar   softlockup: autom...
44
  	return cpu_clock(this_cpu) >> 30LL;  /* 2^30 ~= 10^9 */
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
45
  }
8446f1d39   Ingo Molnar   [PATCH] detect so...
46
47
  void touch_softlockup_watchdog(void)
  {
a3b13c23f   Ingo Molnar   softlockup: use c...
48
49
50
  	int this_cpu = raw_smp_processor_id();
  
  	__raw_get_cpu_var(touch_timestamp) = get_timestamp(this_cpu);
8446f1d39   Ingo Molnar   [PATCH] detect so...
51
52
  }
  EXPORT_SYMBOL(touch_softlockup_watchdog);
04c9167f9   Jeremy Fitzhardinge   add touch_all_sof...
53
54
55
56
57
58
59
60
61
  void touch_all_softlockup_watchdogs(void)
  {
  	int cpu;
  
  	/* Cause each CPU to re-update its timestamp rather than complain */
  	for_each_online_cpu(cpu)
  		per_cpu(touch_timestamp, cpu) = 0;
  }
  EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
8446f1d39   Ingo Molnar   [PATCH] detect so...
62
63
64
65
  /*
   * This callback runs from the timer interrupt, and checks
   * whether the watchdog thread has hung or not:
   */
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
66
  void softlockup_tick(void)
8446f1d39   Ingo Molnar   [PATCH] detect so...
67
68
  {
  	int this_cpu = smp_processor_id();
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
69
  	unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu);
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
70
  	unsigned long print_timestamp;
43581a100   Ingo Molnar   softlockup: impro...
71
  	struct pt_regs *regs = get_irq_regs();
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
72
  	unsigned long now;
8446f1d39   Ingo Molnar   [PATCH] detect so...
73

04c9167f9   Jeremy Fitzhardinge   add touch_all_sof...
74
75
  	if (touch_timestamp == 0) {
  		touch_softlockup_watchdog();
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
76
  		return;
04c9167f9   Jeremy Fitzhardinge   add touch_all_sof...
77
  	}
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
78
79
80
81
  
  	print_timestamp = per_cpu(print_timestamp, this_cpu);
  
  	/* report at most once a second */
a115d5cac   Ingo Molnar   fix the softlocku...
82
83
84
  	if ((print_timestamp >= touch_timestamp &&
  			print_timestamp < (touch_timestamp + 1)) ||
  			did_panic || !per_cpu(watchdog_task, this_cpu)) {
8446f1d39   Ingo Molnar   [PATCH] detect so...
85
  		return;
a115d5cac   Ingo Molnar   fix the softlocku...
86
  	}
8446f1d39   Ingo Molnar   [PATCH] detect so...
87

6687a97d4   Ingo Molnar   [PATCH] timer-irq...
88
89
90
  	/* do not print during early bootup: */
  	if (unlikely(system_state != SYSTEM_RUNNING)) {
  		touch_softlockup_watchdog();
8446f1d39   Ingo Molnar   [PATCH] detect so...
91
  		return;
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
92
  	}
8446f1d39   Ingo Molnar   [PATCH] detect so...
93

a3b13c23f   Ingo Molnar   softlockup: use c...
94
  	now = get_timestamp(this_cpu);
966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
95

ed50d6cbc   Peter Zijlstra   debug: softlockup...
96
97
98
  	/* Wake up the high-prio watchdog task every second: */
  	if (now > (touch_timestamp + 1))
  		wake_up_process(per_cpu(watchdog_task, this_cpu));
82a1fcb90   Ingo Molnar   softlockup: autom...
99
  	/* Warn about unreasonable delays: */
c4f3b63fe   Ravikiran G Thirumalai   softlockup: add a...
100
  	if (now <= (touch_timestamp + softlockup_thresh))
43581a100   Ingo Molnar   softlockup: impro...
101
  		return;
8446f1d39   Ingo Molnar   [PATCH] detect so...
102

43581a100   Ingo Molnar   softlockup: impro...
103
104
105
  	per_cpu(print_timestamp, this_cpu) = touch_timestamp;
  
  	spin_lock(&print_lock);
c4f3b63fe   Ravikiran G Thirumalai   softlockup: add a...
106
107
108
  	printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]
  ",
  			this_cpu, now - touch_timestamp,
ba25f9dcc   Pavel Emelyanov   Use helpers to ob...
109
  			current->comm, task_pid_nr(current));
43581a100   Ingo Molnar   softlockup: impro...
110
111
112
  	if (regs)
  		show_regs(regs);
  	else
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
113
  		dump_stack();
43581a100   Ingo Molnar   softlockup: impro...
114
  	spin_unlock(&print_lock);
8446f1d39   Ingo Molnar   [PATCH] detect so...
115
116
117
  }
  
  /*
82a1fcb90   Ingo Molnar   softlockup: autom...
118
119
   * Have a reasonable limit on the number of tasks checked:
   */
90739081e   Ingo Molnar   softlockup: fix s...
120
  unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
82a1fcb90   Ingo Molnar   softlockup: autom...
121
122
123
124
  
  /*
   * Zero means infinite timeout - no checking done:
   */
90739081e   Ingo Molnar   softlockup: fix s...
125
  unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
82a1fcb90   Ingo Molnar   softlockup: autom...
126

90739081e   Ingo Molnar   softlockup: fix s...
127
  unsigned long __read_mostly sysctl_hung_task_warnings = 10;
82a1fcb90   Ingo Molnar   softlockup: autom...
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
  
  /*
   * Only do the hung-tasks check on one CPU:
   */
  static int check_cpu __read_mostly = -1;
  
  static void check_hung_task(struct task_struct *t, unsigned long now)
  {
  	unsigned long switch_count = t->nvcsw + t->nivcsw;
  
  	if (t->flags & PF_FROZEN)
  		return;
  
  	if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
  		t->last_switch_count = switch_count;
  		t->last_switch_timestamp = now;
  		return;
  	}
  	if ((long)(now - t->last_switch_timestamp) <
  					sysctl_hung_task_timeout_secs)
  		return;
  	if (sysctl_hung_task_warnings < 0)
  		return;
  	sysctl_hung_task_warnings--;
  
  	/*
  	 * Ok, the task did not get scheduled for more than 2 minutes,
  	 * complain:
  	 */
  	printk(KERN_ERR "INFO: task %s:%d blocked for more than "
  			"%ld seconds.
  ", t->comm, t->pid,
  			sysctl_hung_task_timeout_secs);
  	printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
  			" disables this message.
  ");
  	sched_show_task(t);
  	__debug_show_held_locks(t);
  
  	t->last_switch_timestamp = now;
  	touch_nmi_watchdog();
  }
  
  /*
   * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
   * a really long time (120 seconds). If that happens, print out
   * a warning.
   */
  static void check_hung_uninterruptible_tasks(int this_cpu)
  {
  	int max_count = sysctl_hung_task_check_count;
  	unsigned long now = get_timestamp(this_cpu);
  	struct task_struct *g, *t;
  
  	/*
  	 * If the system crashed already then all bets are off,
  	 * do not report extra hung tasks:
  	 */
  	if ((tainted & TAINT_DIE) || did_panic)
  		return;
  
  	read_lock(&tasklist_lock);
  	do_each_thread(g, t) {
  		if (!--max_count)
ed50d6cbc   Peter Zijlstra   debug: softlockup...
192
  			goto unlock;
82a1fcb90   Ingo Molnar   softlockup: autom...
193
194
195
  		if (t->state & TASK_UNINTERRUPTIBLE)
  			check_hung_task(t, now);
  	} while_each_thread(g, t);
ed50d6cbc   Peter Zijlstra   debug: softlockup...
196
   unlock:
82a1fcb90   Ingo Molnar   softlockup: autom...
197
198
199
200
  	read_unlock(&tasklist_lock);
  }
  
  /*
8446f1d39   Ingo Molnar   [PATCH] detect so...
201
202
   * The watchdog thread - runs every second and touches the timestamp.
   */
a5f2ce3c6   Ingo Molnar   softlockup watchd...
203
  static int watchdog(void *__bind_cpu)
8446f1d39   Ingo Molnar   [PATCH] detect so...
204
  {
02fb6149f   Oleg Nesterov   softlockup: s/99/...
205
  	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
82a1fcb90   Ingo Molnar   softlockup: autom...
206
  	int this_cpu = (long)__bind_cpu;
8446f1d39   Ingo Molnar   [PATCH] detect so...
207
208
  
  	sched_setscheduler(current, SCHED_FIFO, &param);
8446f1d39   Ingo Molnar   [PATCH] detect so...
209

966812dc9   Jeremy Fitzhardinge   Ignore stolen tim...
210
211
  	/* initialize timestamp */
  	touch_softlockup_watchdog();
7be2a03e3   Dmitry Adamushko   softlockup: fix t...
212
  	set_current_state(TASK_INTERRUPTIBLE);
8446f1d39   Ingo Molnar   [PATCH] detect so...
213
  	/*
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
214
  	 * Run briefly once per second to reset the softlockup timestamp.
82a1fcb90   Ingo Molnar   softlockup: autom...
215
  	 * If this gets delayed for more than 60 seconds then the
6687a97d4   Ingo Molnar   [PATCH] timer-irq...
216
  	 * debug-printout triggers in softlockup_tick().
8446f1d39   Ingo Molnar   [PATCH] detect so...
217
218
  	 */
  	while (!kthread_should_stop()) {
8446f1d39   Ingo Molnar   [PATCH] detect so...
219
  		touch_softlockup_watchdog();
ed50d6cbc   Peter Zijlstra   debug: softlockup...
220
221
222
223
  		schedule();
  
  		if (kthread_should_stop())
  			break;
82a1fcb90   Ingo Molnar   softlockup: autom...
224

7be2a03e3   Dmitry Adamushko   softlockup: fix t...
225
226
227
228
  		if (this_cpu == check_cpu) {
  			if (sysctl_hung_task_timeout_secs)
  				check_hung_uninterruptible_tasks(this_cpu);
  		}
ed50d6cbc   Peter Zijlstra   debug: softlockup...
229

7be2a03e3   Dmitry Adamushko   softlockup: fix t...
230
  		set_current_state(TASK_INTERRUPTIBLE);
8446f1d39   Ingo Molnar   [PATCH] detect so...
231
  	}
7be2a03e3   Dmitry Adamushko   softlockup: fix t...
232
  	__set_current_state(TASK_RUNNING);
8446f1d39   Ingo Molnar   [PATCH] detect so...
233
234
235
236
237
238
239
  
  	return 0;
  }
  
  /*
   * Create/destroy watchdog threads as CPUs come and go:
   */
8c78f3075   Chandra Seetharaman   [PATCH] cpu hotpl...
240
  static int __cpuinit
8446f1d39   Ingo Molnar   [PATCH] detect so...
241
242
243
244
245
246
247
  cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
  {
  	int hotcpu = (unsigned long)hcpu;
  	struct task_struct *p;
  
  	switch (action) {
  	case CPU_UP_PREPARE:
8bb784428   Rafael J. Wysocki   Add suspend-relat...
248
  	case CPU_UP_PREPARE_FROZEN:
8446f1d39   Ingo Molnar   [PATCH] detect so...
249
250
251
  		BUG_ON(per_cpu(watchdog_task, hotcpu));
  		p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu);
  		if (IS_ERR(p)) {
a5f2ce3c6   Ingo Molnar   softlockup watchd...
252
253
  			printk(KERN_ERR "watchdog for %i failed
  ", hotcpu);
8446f1d39   Ingo Molnar   [PATCH] detect so...
254
255
  			return NOTIFY_BAD;
  		}
a5f2ce3c6   Ingo Molnar   softlockup watchd...
256
257
  		per_cpu(touch_timestamp, hotcpu) = 0;
  		per_cpu(watchdog_task, hotcpu) = p;
8446f1d39   Ingo Molnar   [PATCH] detect so...
258
  		kthread_bind(p, hotcpu);
a5f2ce3c6   Ingo Molnar   softlockup watchd...
259
  		break;
8446f1d39   Ingo Molnar   [PATCH] detect so...
260
  	case CPU_ONLINE:
8bb784428   Rafael J. Wysocki   Add suspend-relat...
261
  	case CPU_ONLINE_FROZEN:
82a1fcb90   Ingo Molnar   softlockup: autom...
262
  		check_cpu = any_online_cpu(cpu_online_map);
8446f1d39   Ingo Molnar   [PATCH] detect so...
263
264
265
  		wake_up_process(per_cpu(watchdog_task, hotcpu));
  		break;
  #ifdef CONFIG_HOTPLUG_CPU
82a1fcb90   Ingo Molnar   softlockup: autom...
266
267
268
269
270
271
272
273
274
  	case CPU_DOWN_PREPARE:
  	case CPU_DOWN_PREPARE_FROZEN:
  		if (hotcpu == check_cpu) {
  			cpumask_t temp_cpu_online_map = cpu_online_map;
  
  			cpu_clear(hotcpu, temp_cpu_online_map);
  			check_cpu = any_online_cpu(temp_cpu_online_map);
  		}
  		break;
ed50d6cbc   Peter Zijlstra   debug: softlockup...
275
276
277
278
279
280
281
282
  
  	case CPU_UP_CANCELED:
  	case CPU_UP_CANCELED_FROZEN:
  		if (!per_cpu(watchdog_task, hotcpu))
  			break;
  		/* Unbind so it can run.  Fall thru. */
  		kthread_bind(per_cpu(watchdog_task, hotcpu),
  			     any_online_cpu(cpu_online_map));
8446f1d39   Ingo Molnar   [PATCH] detect so...
283
  	case CPU_DEAD:
8bb784428   Rafael J. Wysocki   Add suspend-relat...
284
  	case CPU_DEAD_FROZEN:
8446f1d39   Ingo Molnar   [PATCH] detect so...
285
286
287
288
289
  		p = per_cpu(watchdog_task, hotcpu);
  		per_cpu(watchdog_task, hotcpu) = NULL;
  		kthread_stop(p);
  		break;
  #endif /* CONFIG_HOTPLUG_CPU */
a5f2ce3c6   Ingo Molnar   softlockup watchd...
290
  	}
8446f1d39   Ingo Molnar   [PATCH] detect so...
291
292
  	return NOTIFY_OK;
  }
8c78f3075   Chandra Seetharaman   [PATCH] cpu hotpl...
293
  static struct notifier_block __cpuinitdata cpu_nfb = {
8446f1d39   Ingo Molnar   [PATCH] detect so...
294
295
296
297
298
299
  	.notifier_call = cpu_callback
  };
  
  __init void spawn_softlockup_task(void)
  {
  	void *cpu = (void *)(long)smp_processor_id();
07dccf334   Akinobu Mita   [PATCH] check ret...
300
  	int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
8446f1d39   Ingo Molnar   [PATCH] detect so...
301

07dccf334   Akinobu Mita   [PATCH] check ret...
302
  	BUG_ON(err == NOTIFY_BAD);
8446f1d39   Ingo Molnar   [PATCH] detect so...
303
304
  	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
  	register_cpu_notifier(&cpu_nfb);
e041c6834   Alan Stern   [PATCH] Notifier ...
305
  	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
8446f1d39   Ingo Molnar   [PATCH] detect so...
306
  }