Blame view

kernel/watchdog.c 14.4 KB
58687acba   Don Zickus   lockup_detector: ...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
  /*
   * Detect hard and soft lockups on a system
   *
   * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
   *
   * this code detects hard lockups: incidents in where on a CPU
   * the kernel does not respond to anything except NMI.
   *
   * Note: Most of this code is borrowed heavily from softlockup.c,
   * so thanks to Ingo for the initial implementation.
   * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks
   * to those contributors as well.
   */
  
  #include <linux/mm.h>
  #include <linux/cpu.h>
  #include <linux/nmi.h>
  #include <linux/init.h>
  #include <linux/delay.h>
  #include <linux/freezer.h>
  #include <linux/kthread.h>
  #include <linux/lockdep.h>
  #include <linux/notifier.h>
  #include <linux/module.h>
  #include <linux/sysctl.h>
  
  #include <asm/irq_regs.h>
  #include <linux/perf_event.h>
4135038a5   Marcin Slusarz   watchdog: Fix bro...
29
  int watchdog_enabled = 1;
4eec42f39   Mandeep Singh Baines   watchdog: Change ...
30
  int __read_mostly watchdog_thresh = 10;
58687acba   Don Zickus   lockup_detector: ...
31
32
33
34
35
  
  static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
  static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
  static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
  static DEFINE_PER_CPU(bool, softlockup_touch_sync);
58687acba   Don Zickus   lockup_detector: ...
36
  static DEFINE_PER_CPU(bool, soft_watchdog_warn);
23637d477   Frederic Weisbecker   lockup_detector: ...
37
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
cafcd80d2   Don Zickus   lockup_detector: ...
38
39
  static DEFINE_PER_CPU(bool, hard_watchdog_warn);
  static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
58687acba   Don Zickus   lockup_detector: ...
40
41
42
43
  static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
  static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
  static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
  #endif
58687acba   Don Zickus   lockup_detector: ...
44
45
46
47
  /* boot commands */
  /*
   * Should we panic when a soft-lockup or hard-lockup occurs:
   */
23637d477   Frederic Weisbecker   lockup_detector: ...
48
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
fef2c9bc1   Don Zickus   kernel/watchdog.c...
49
50
  static int hardlockup_panic =
  			CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
58687acba   Don Zickus   lockup_detector: ...
51
52
53
54
55
  
  static int __init hardlockup_panic_setup(char *str)
  {
  	if (!strncmp(str, "panic", 5))
  		hardlockup_panic = 1;
fef2c9bc1   Don Zickus   kernel/watchdog.c...
56
57
  	else if (!strncmp(str, "nopanic", 7))
  		hardlockup_panic = 0;
5dc305587   Don Zickus   x86, NMI: Add bac...
58
  	else if (!strncmp(str, "0", 1))
4135038a5   Marcin Slusarz   watchdog: Fix bro...
59
  		watchdog_enabled = 0;
58687acba   Don Zickus   lockup_detector: ...
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
  	return 1;
  }
  __setup("nmi_watchdog=", hardlockup_panic_setup);
  #endif
  
  unsigned int __read_mostly softlockup_panic =
  			CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
  
  static int __init softlockup_panic_setup(char *str)
  {
  	softlockup_panic = simple_strtoul(str, NULL, 0);
  
  	return 1;
  }
  __setup("softlockup_panic=", softlockup_panic_setup);
  
  static int __init nowatchdog_setup(char *str)
  {
4135038a5   Marcin Slusarz   watchdog: Fix bro...
78
  	watchdog_enabled = 0;
58687acba   Don Zickus   lockup_detector: ...
79
80
81
82
83
84
85
  	return 1;
  }
  __setup("nowatchdog", nowatchdog_setup);
  
  /* deprecated */
  static int __init nosoftlockup_setup(char *str)
  {
4135038a5   Marcin Slusarz   watchdog: Fix bro...
86
  	watchdog_enabled = 0;
58687acba   Don Zickus   lockup_detector: ...
87
88
89
90
  	return 1;
  }
  __setup("nosoftlockup", nosoftlockup_setup);
  /*  */
4eec42f39   Mandeep Singh Baines   watchdog: Change ...
91
92
93
94
95
96
97
  /*
   * Hard-lockup warnings should be triggered after just a few seconds. Soft-
   * lockups can have false positives under extreme conditions. So we generally
   * want a higher threshold for soft lockups than for hard lockups. So we couple
   * the thresholds with a factor: we make the soft threshold twice the amount of
   * time the hard threshold is.
   */
6e9101aee   Ingo Molnar   watchdog: Fix non...
98
  static int get_softlockup_thresh(void)
4eec42f39   Mandeep Singh Baines   watchdog: Change ...
99
100
101
  {
  	return watchdog_thresh * 2;
  }
58687acba   Don Zickus   lockup_detector: ...
102
103
104
105
106
107
108
109
110
111
112
113
114
115
  
  /*
   * Returns seconds, approximately.  We don't need nanosecond
   * resolution, and we don't need to waste time with a big divide when
   * 2^30ns == 1.074s.
   */
  static unsigned long get_timestamp(int this_cpu)
  {
  	return cpu_clock(this_cpu) >> 30LL;  /* 2^30 ~= 10^9 */
  }
  
  static unsigned long get_sample_period(void)
  {
  	/*
586692a5a   Mandeep Singh Baines   watchdog: Disable...
116
  	 * convert watchdog_thresh from seconds to ns
58687acba   Don Zickus   lockup_detector: ...
117
118
119
120
  	 * the divide by 5 is to give hrtimer 5 chances to
  	 * increment before the hardlockup detector generates
  	 * a warning
  	 */
4eec42f39   Mandeep Singh Baines   watchdog: Change ...
121
  	return get_softlockup_thresh() * (NSEC_PER_SEC / 5);
58687acba   Don Zickus   lockup_detector: ...
122
123
124
125
126
  }
  
  /* Commands for resetting the watchdog */
  static void __touch_watchdog(void)
  {
26e09c6ee   Don Zickus   lockup_detector: ...
127
  	int this_cpu = smp_processor_id();
58687acba   Don Zickus   lockup_detector: ...
128

909ea9646   Christoph Lameter   core: Replace __g...
129
  	__this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu));
58687acba   Don Zickus   lockup_detector: ...
130
  }
332fbdbca   Don Zickus   lockup_detector: ...
131
  void touch_softlockup_watchdog(void)
58687acba   Don Zickus   lockup_detector: ...
132
  {
909ea9646   Christoph Lameter   core: Replace __g...
133
  	__this_cpu_write(watchdog_touch_ts, 0);
58687acba   Don Zickus   lockup_detector: ...
134
  }
0167c7819   Ingo Molnar   watchdog: Export ...
135
  EXPORT_SYMBOL(touch_softlockup_watchdog);
58687acba   Don Zickus   lockup_detector: ...
136

332fbdbca   Don Zickus   lockup_detector: ...
137
  void touch_all_softlockup_watchdogs(void)
58687acba   Don Zickus   lockup_detector: ...
138
139
140
141
142
143
144
145
146
147
148
  {
  	int cpu;
  
  	/*
  	 * this is done lockless
  	 * do we care if a 0 races with a timestamp?
  	 * all it means is the softlock check starts one cycle later
  	 */
  	for_each_online_cpu(cpu)
  		per_cpu(watchdog_touch_ts, cpu) = 0;
  }
cafcd80d2   Don Zickus   lockup_detector: ...
149
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
58687acba   Don Zickus   lockup_detector: ...
150
151
  void touch_nmi_watchdog(void)
  {
68d3f1d81   Don Zickus   lockup_detector: ...
152
153
154
155
156
157
158
159
  	if (watchdog_enabled) {
  		unsigned cpu;
  
  		for_each_present_cpu(cpu) {
  			if (per_cpu(watchdog_nmi_touch, cpu) != true)
  				per_cpu(watchdog_nmi_touch, cpu) = true;
  		}
  	}
332fbdbca   Don Zickus   lockup_detector: ...
160
  	touch_softlockup_watchdog();
58687acba   Don Zickus   lockup_detector: ...
161
162
  }
  EXPORT_SYMBOL(touch_nmi_watchdog);
cafcd80d2   Don Zickus   lockup_detector: ...
163
  #endif
58687acba   Don Zickus   lockup_detector: ...
164
165
166
167
168
  void touch_softlockup_watchdog_sync(void)
  {
  	__raw_get_cpu_var(softlockup_touch_sync) = true;
  	__raw_get_cpu_var(watchdog_touch_ts) = 0;
  }
23637d477   Frederic Weisbecker   lockup_detector: ...
169
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
58687acba   Don Zickus   lockup_detector: ...
170
  /* watchdog detector functions */
26e09c6ee   Don Zickus   lockup_detector: ...
171
  static int is_hardlockup(void)
58687acba   Don Zickus   lockup_detector: ...
172
  {
909ea9646   Christoph Lameter   core: Replace __g...
173
  	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
58687acba   Don Zickus   lockup_detector: ...
174

909ea9646   Christoph Lameter   core: Replace __g...
175
  	if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
58687acba   Don Zickus   lockup_detector: ...
176
  		return 1;
909ea9646   Christoph Lameter   core: Replace __g...
177
  	__this_cpu_write(hrtimer_interrupts_saved, hrint);
58687acba   Don Zickus   lockup_detector: ...
178
179
180
  	return 0;
  }
  #endif
26e09c6ee   Don Zickus   lockup_detector: ...
181
  static int is_softlockup(unsigned long touch_ts)
58687acba   Don Zickus   lockup_detector: ...
182
  {
26e09c6ee   Don Zickus   lockup_detector: ...
183
  	unsigned long now = get_timestamp(smp_processor_id());
58687acba   Don Zickus   lockup_detector: ...
184
185
  
  	/* Warn about unreasonable delays: */
4eec42f39   Mandeep Singh Baines   watchdog: Change ...
186
  	if (time_after(now, touch_ts + get_softlockup_thresh()))
58687acba   Don Zickus   lockup_detector: ...
187
188
189
190
  		return now - touch_ts;
  
  	return 0;
  }
23637d477   Frederic Weisbecker   lockup_detector: ...
191
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
1880c4ae1   Cyrill Gorcunov   perf, x86: Add hw...
192

58687acba   Don Zickus   lockup_detector: ...
193
194
195
196
197
198
199
200
201
  static struct perf_event_attr wd_hw_attr = {
  	.type		= PERF_TYPE_HARDWARE,
  	.config		= PERF_COUNT_HW_CPU_CYCLES,
  	.size		= sizeof(struct perf_event_attr),
  	.pinned		= 1,
  	.disabled	= 1,
  };
  
  /* Callback function for perf event subsystem */
a8b0ca17b   Peter Zijlstra   perf: Remove the ...
202
  static void watchdog_overflow_callback(struct perf_event *event,
58687acba   Don Zickus   lockup_detector: ...
203
204
205
  		 struct perf_sample_data *data,
  		 struct pt_regs *regs)
  {
c6db67cda   Peter Zijlstra   watchdog: Don't t...
206
207
  	/* Ensure the watchdog never gets throttled */
  	event->hw.interrupts = 0;
909ea9646   Christoph Lameter   core: Replace __g...
208
209
  	if (__this_cpu_read(watchdog_nmi_touch) == true) {
  		__this_cpu_write(watchdog_nmi_touch, false);
58687acba   Don Zickus   lockup_detector: ...
210
211
212
213
214
215
216
217
218
  		return;
  	}
  
  	/* check for a hardlockup
  	 * This is done by making sure our timer interrupt
  	 * is incrementing.  The timer interrupt should have
  	 * fired multiple times before we overflow'd.  If it hasn't
  	 * then this is a good indication the cpu is stuck
  	 */
26e09c6ee   Don Zickus   lockup_detector: ...
219
220
  	if (is_hardlockup()) {
  		int this_cpu = smp_processor_id();
58687acba   Don Zickus   lockup_detector: ...
221
  		/* only print hardlockups once */
909ea9646   Christoph Lameter   core: Replace __g...
222
  		if (__this_cpu_read(hard_watchdog_warn) == true)
58687acba   Don Zickus   lockup_detector: ...
223
224
225
226
227
228
  			return;
  
  		if (hardlockup_panic)
  			panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
  		else
  			WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
909ea9646   Christoph Lameter   core: Replace __g...
229
  		__this_cpu_write(hard_watchdog_warn, true);
58687acba   Don Zickus   lockup_detector: ...
230
231
  		return;
  	}
909ea9646   Christoph Lameter   core: Replace __g...
232
  	__this_cpu_write(hard_watchdog_warn, false);
58687acba   Don Zickus   lockup_detector: ...
233
234
235
236
  	return;
  }
  static void watchdog_interrupt_count(void)
  {
909ea9646   Christoph Lameter   core: Replace __g...
237
  	__this_cpu_inc(hrtimer_interrupts);
58687acba   Don Zickus   lockup_detector: ...
238
239
240
  }
  #else
  static inline void watchdog_interrupt_count(void) { return; }
23637d477   Frederic Weisbecker   lockup_detector: ...
241
  #endif /* CONFIG_HARDLOCKUP_DETECTOR */
58687acba   Don Zickus   lockup_detector: ...
242
243
244
245
  
  /* watchdog kicker functions */
  static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
  {
909ea9646   Christoph Lameter   core: Replace __g...
246
  	unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
58687acba   Don Zickus   lockup_detector: ...
247
248
249
250
251
252
253
  	struct pt_regs *regs = get_irq_regs();
  	int duration;
  
  	/* kick the hardlockup detector */
  	watchdog_interrupt_count();
  
  	/* kick the softlockup detector */
909ea9646   Christoph Lameter   core: Replace __g...
254
  	wake_up_process(__this_cpu_read(softlockup_watchdog));
58687acba   Don Zickus   lockup_detector: ...
255
256
257
258
259
  
  	/* .. and repeat */
  	hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
  
  	if (touch_ts == 0) {
909ea9646   Christoph Lameter   core: Replace __g...
260
  		if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
58687acba   Don Zickus   lockup_detector: ...
261
262
263
264
  			/*
  			 * If the time stamp was touched atomically
  			 * make sure the scheduler tick is up to date.
  			 */
909ea9646   Christoph Lameter   core: Replace __g...
265
  			__this_cpu_write(softlockup_touch_sync, false);
58687acba   Don Zickus   lockup_detector: ...
266
267
268
269
270
271
272
273
274
275
276
277
  			sched_clock_tick();
  		}
  		__touch_watchdog();
  		return HRTIMER_RESTART;
  	}
  
  	/* check for a softlockup
  	 * This is done by making sure a high priority task is
  	 * being scheduled.  The task touches the watchdog to
  	 * indicate it is getting cpu time.  If it hasn't then
  	 * this is a good indication some task is hogging the cpu
  	 */
26e09c6ee   Don Zickus   lockup_detector: ...
278
  	duration = is_softlockup(touch_ts);
58687acba   Don Zickus   lockup_detector: ...
279
280
  	if (unlikely(duration)) {
  		/* only warn once */
909ea9646   Christoph Lameter   core: Replace __g...
281
  		if (__this_cpu_read(soft_watchdog_warn) == true)
58687acba   Don Zickus   lockup_detector: ...
282
283
284
285
  			return HRTIMER_RESTART;
  
  		printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]
  ",
26e09c6ee   Don Zickus   lockup_detector: ...
286
  			smp_processor_id(), duration,
58687acba   Don Zickus   lockup_detector: ...
287
288
289
290
291
292
293
294
295
296
  			current->comm, task_pid_nr(current));
  		print_modules();
  		print_irqtrace_events(current);
  		if (regs)
  			show_regs(regs);
  		else
  			dump_stack();
  
  		if (softlockup_panic)
  			panic("softlockup: hung tasks");
909ea9646   Christoph Lameter   core: Replace __g...
297
  		__this_cpu_write(soft_watchdog_warn, true);
58687acba   Don Zickus   lockup_detector: ...
298
  	} else
909ea9646   Christoph Lameter   core: Replace __g...
299
  		__this_cpu_write(soft_watchdog_warn, false);
58687acba   Don Zickus   lockup_detector: ...
300
301
302
303
304
305
306
307
  
  	return HRTIMER_RESTART;
  }
  
  
  /*
   * The watchdog thread - touches the timestamp.
   */
26e09c6ee   Don Zickus   lockup_detector: ...
308
  static int watchdog(void *unused)
58687acba   Don Zickus   lockup_detector: ...
309
  {
cba9bd22a   Thomas Gleixner   watchdog: Drop FI...
310
  	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
26e09c6ee   Don Zickus   lockup_detector: ...
311
  	struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
58687acba   Don Zickus   lockup_detector: ...
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
  
  	sched_setscheduler(current, SCHED_FIFO, &param);
  
  	/* initialize timestamp */
  	__touch_watchdog();
  
  	/* kick off the timer for the hardlockup detector */
  	/* done here because hrtimer_start can only pin to smp_processor_id() */
  	hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()),
  		      HRTIMER_MODE_REL_PINNED);
  
  	set_current_state(TASK_INTERRUPTIBLE);
  	/*
  	 * Run briefly once per second to reset the softlockup timestamp.
  	 * If this gets delayed for more than 60 seconds then the
26e09c6ee   Don Zickus   lockup_detector: ...
327
  	 * debug-printout triggers in watchdog_timer_fn().
58687acba   Don Zickus   lockup_detector: ...
328
329
330
331
332
333
334
335
336
337
338
  	 */
  	while (!kthread_should_stop()) {
  		__touch_watchdog();
  		schedule();
  
  		if (kthread_should_stop())
  			break;
  
  		set_current_state(TASK_INTERRUPTIBLE);
  	}
  	__set_current_state(TASK_RUNNING);
cba9bd22a   Thomas Gleixner   watchdog: Drop FI...
339
340
  	param.sched_priority = 0;
  	sched_setscheduler(current, SCHED_NORMAL, &param);
58687acba   Don Zickus   lockup_detector: ...
341
342
  	return 0;
  }
23637d477   Frederic Weisbecker   lockup_detector: ...
343
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
58687acba   Don Zickus   lockup_detector: ...
344
345
346
347
348
349
350
351
352
353
354
355
  static int watchdog_nmi_enable(int cpu)
  {
  	struct perf_event_attr *wd_attr;
  	struct perf_event *event = per_cpu(watchdog_ev, cpu);
  
  	/* is it already setup and enabled? */
  	if (event && event->state > PERF_EVENT_STATE_OFF)
  		goto out;
  
  	/* it is setup but not enabled */
  	if (event != NULL)
  		goto out_enable;
58687acba   Don Zickus   lockup_detector: ...
356
  	wd_attr = &wd_hw_attr;
4eec42f39   Mandeep Singh Baines   watchdog: Change ...
357
  	wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
1880c4ae1   Cyrill Gorcunov   perf, x86: Add hw...
358
359
  
  	/* Try to register using hardware perf events */
4dc0da869   Avi Kivity   perf: Add context...
360
  	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
58687acba   Don Zickus   lockup_detector: ...
361
362
363
364
365
  	if (!IS_ERR(event)) {
  		printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.
  ");
  		goto out_save;
  	}
5651f7f47   Don Zickus   watchdog, nmi: Lo...
366
367
368
369
370
371
372
373
374
375
376
  
  	/* vary the KERN level based on the returned errno */
  	if (PTR_ERR(event) == -EOPNOTSUPP)
  		printk(KERN_INFO "NMI watchdog disabled (cpu%i): not supported (no LAPIC?)
  ", cpu);
  	else if (PTR_ERR(event) == -ENOENT)
  		printk(KERN_WARNING "NMI watchdog disabled (cpu%i): hardware events not enabled
  ", cpu);
  	else
  		printk(KERN_ERR "NMI watchdog disabled (cpu%i): unable to create perf event: %ld
  ", cpu, PTR_ERR(event));
eac243355   Akinobu Mita   lockup_detector: ...
377
  	return PTR_ERR(event);
58687acba   Don Zickus   lockup_detector: ...
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
  
  	/* success path */
  out_save:
  	per_cpu(watchdog_ev, cpu) = event;
  out_enable:
  	perf_event_enable(per_cpu(watchdog_ev, cpu));
  out:
  	return 0;
  }
  
  static void watchdog_nmi_disable(int cpu)
  {
  	struct perf_event *event = per_cpu(watchdog_ev, cpu);
  
  	if (event) {
  		perf_event_disable(event);
  		per_cpu(watchdog_ev, cpu) = NULL;
  
  		/* should be in cleanup, but blocks oprofile */
  		perf_event_release_kernel(event);
  	}
  	return;
  }
  #else
  static int watchdog_nmi_enable(int cpu) { return 0; }
  static void watchdog_nmi_disable(int cpu) { return; }
23637d477   Frederic Weisbecker   lockup_detector: ...
404
  #endif /* CONFIG_HARDLOCKUP_DETECTOR */
58687acba   Don Zickus   lockup_detector: ...
405
406
  
  /* prepare/enable/disable routines */
6e9101aee   Ingo Molnar   watchdog: Fix non...
407
  static void watchdog_prepare_cpu(int cpu)
58687acba   Don Zickus   lockup_detector: ...
408
409
410
411
412
413
  {
  	struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
  
  	WARN_ON(per_cpu(softlockup_watchdog, cpu));
  	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  	hrtimer->function = watchdog_timer_fn;
58687acba   Don Zickus   lockup_detector: ...
414
415
416
417
418
  }
  
  static int watchdog_enable(int cpu)
  {
  	struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
f99a99330   Don Zickus   kernel/watchdog.c...
419
  	int err = 0;
58687acba   Don Zickus   lockup_detector: ...
420
421
  
  	/* enable the perf event */
eac243355   Akinobu Mita   lockup_detector: ...
422
  	err = watchdog_nmi_enable(cpu);
f99a99330   Don Zickus   kernel/watchdog.c...
423
424
  
  	/* Regardless of err above, fall through and start softlockup */
58687acba   Don Zickus   lockup_detector: ...
425
426
427
  
  	/* create the watchdog thread */
  	if (!p) {
18e5a45db   Eric Dumazet   watchdog: Make th...
428
  		p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu);
58687acba   Don Zickus   lockup_detector: ...
429
430
431
  		if (IS_ERR(p)) {
  			printk(KERN_ERR "softlockup watchdog for %i failed
  ", cpu);
1409f141a   Hillf Danton   kernel/watchdog.c...
432
  			if (!err) {
f99a99330   Don Zickus   kernel/watchdog.c...
433
434
  				/* if hardlockup hasn't already set this */
  				err = PTR_ERR(p);
1409f141a   Hillf Danton   kernel/watchdog.c...
435
436
437
  				/* and disable the perf event */
  				watchdog_nmi_disable(cpu);
  			}
f99a99330   Don Zickus   kernel/watchdog.c...
438
  			goto out;
58687acba   Don Zickus   lockup_detector: ...
439
440
441
442
443
444
  		}
  		kthread_bind(p, cpu);
  		per_cpu(watchdog_touch_ts, cpu) = 0;
  		per_cpu(softlockup_watchdog, cpu) = p;
  		wake_up_process(p);
  	}
f99a99330   Don Zickus   kernel/watchdog.c...
445
446
  out:
  	return err;
58687acba   Don Zickus   lockup_detector: ...
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
  }
  
  static void watchdog_disable(int cpu)
  {
  	struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
  	struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
  
  	/*
  	 * cancel the timer first to stop incrementing the stats
  	 * and waking up the kthread
  	 */
  	hrtimer_cancel(hrtimer);
  
  	/* disable the perf event */
  	watchdog_nmi_disable(cpu);
  
  	/* stop the watchdog thread */
  	if (p) {
  		per_cpu(softlockup_watchdog, cpu) = NULL;
  		kthread_stop(p);
  	}
58687acba   Don Zickus   lockup_detector: ...
468
  }
4ff819515   Vasily Averin   watchdog: move wa...
469
470
  /* sysctl functions */
  #ifdef CONFIG_SYSCTL
58687acba   Don Zickus   lockup_detector: ...
471
472
473
  static void watchdog_enable_all_cpus(void)
  {
  	int cpu;
397357666   Marcin Slusarz   watchdog: Fix sys...
474
475
  
  	watchdog_enabled = 0;
58687acba   Don Zickus   lockup_detector: ...
476
477
  
  	for_each_online_cpu(cpu)
397357666   Marcin Slusarz   watchdog: Fix sys...
478
479
480
481
  		if (!watchdog_enable(cpu))
  			/* if any cpu succeeds, watchdog is considered
  			   enabled for the system */
  			watchdog_enabled = 1;
58687acba   Don Zickus   lockup_detector: ...
482

397357666   Marcin Slusarz   watchdog: Fix sys...
483
  	if (!watchdog_enabled)
58687acba   Don Zickus   lockup_detector: ...
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
  		printk(KERN_ERR "watchdog: failed to be enabled on some cpus
  ");
  
  }
  
  static void watchdog_disable_all_cpus(void)
  {
  	int cpu;
  
  	for_each_online_cpu(cpu)
  		watchdog_disable(cpu);
  
  	/* if all watchdogs are disabled, then they are disabled for the system */
  	watchdog_enabled = 0;
  }
58687acba   Don Zickus   lockup_detector: ...
499
  /*
586692a5a   Mandeep Singh Baines   watchdog: Disable...
500
   * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh
58687acba   Don Zickus   lockup_detector: ...
501
   */
586692a5a   Mandeep Singh Baines   watchdog: Disable...
502
503
  int proc_dowatchdog(struct ctl_table *table, int write,
  		    void __user *buffer, size_t *lenp, loff_t *ppos)
58687acba   Don Zickus   lockup_detector: ...
504
  {
e04ab2bc4   Mandeep Singh Baines   watchdog: Only di...
505
  	int ret;
58687acba   Don Zickus   lockup_detector: ...
506

586692a5a   Mandeep Singh Baines   watchdog: Disable...
507
  	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
e04ab2bc4   Mandeep Singh Baines   watchdog: Only di...
508
509
  	if (ret || !write)
  		goto out;
586692a5a   Mandeep Singh Baines   watchdog: Disable...
510
  	if (watchdog_enabled && watchdog_thresh)
e04ab2bc4   Mandeep Singh Baines   watchdog: Only di...
511
512
513
514
515
516
  		watchdog_enable_all_cpus();
  	else
  		watchdog_disable_all_cpus();
  
  out:
  	return ret;
58687acba   Don Zickus   lockup_detector: ...
517
  }
58687acba   Don Zickus   lockup_detector: ...
518
519
520
521
522
523
524
525
526
527
528
529
530
531
  #endif /* CONFIG_SYSCTL */
  
  
  /*
   * Create/destroy watchdog threads as CPUs come and go:
   */
  static int __cpuinit
  cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
  {
  	int hotcpu = (unsigned long)hcpu;
  
  	switch (action) {
  	case CPU_UP_PREPARE:
  	case CPU_UP_PREPARE_FROZEN:
6e9101aee   Ingo Molnar   watchdog: Fix non...
532
  		watchdog_prepare_cpu(hotcpu);
58687acba   Don Zickus   lockup_detector: ...
533
534
535
  		break;
  	case CPU_ONLINE:
  	case CPU_ONLINE_FROZEN:
4135038a5   Marcin Slusarz   watchdog: Fix bro...
536
  		if (watchdog_enabled)
6e9101aee   Ingo Molnar   watchdog: Fix non...
537
  			watchdog_enable(hotcpu);
58687acba   Don Zickus   lockup_detector: ...
538
539
540
541
542
543
544
545
546
547
548
549
  		break;
  #ifdef CONFIG_HOTPLUG_CPU
  	case CPU_UP_CANCELED:
  	case CPU_UP_CANCELED_FROZEN:
  		watchdog_disable(hotcpu);
  		break;
  	case CPU_DEAD:
  	case CPU_DEAD_FROZEN:
  		watchdog_disable(hotcpu);
  		break;
  #endif /* CONFIG_HOTPLUG_CPU */
  	}
f99a99330   Don Zickus   kernel/watchdog.c...
550
551
552
553
554
555
556
  
  	/*
  	 * hardlockup and softlockup are not important enough
  	 * to block cpu bring up.  Just always succeed and
  	 * rely on printk output to flag problems.
  	 */
  	return NOTIFY_OK;
58687acba   Don Zickus   lockup_detector: ...
557
558
559
560
561
  }
  
  static struct notifier_block __cpuinitdata cpu_nfb = {
  	.notifier_call = cpu_callback
  };
004417a6d   Peter Zijlstra   perf, arch: Clean...
562
  void __init lockup_detector_init(void)
58687acba   Don Zickus   lockup_detector: ...
563
564
565
  {
  	void *cpu = (void *)(long)smp_processor_id();
  	int err;
58687acba   Don Zickus   lockup_detector: ...
566
  	err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
eac243355   Akinobu Mita   lockup_detector: ...
567
  	WARN_ON(notifier_to_errno(err));
58687acba   Don Zickus   lockup_detector: ...
568
569
570
  
  	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
  	register_cpu_notifier(&cpu_nfb);
004417a6d   Peter Zijlstra   perf, arch: Clean...
571
  	return;
58687acba   Don Zickus   lockup_detector: ...
572
  }