Blame view

kernel/watchdog.c 15.6 KB
58687acba   Don Zickus   lockup_detector: ...
1
2
3
4
5
  /*
   * Detect hard and soft lockups on a system
   *
   * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
   *
86f5e6a7b   Fernando Luis Vázquez Cao   watchdog: Fix cod...
6
7
8
   * Note: Most of this code is borrowed heavily from the original softlockup
   * detector, so thanks to Ingo for the initial implementation.
   * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
58687acba   Don Zickus   lockup_detector: ...
9
10
   * to those contributors as well.
   */
4501980aa   Andrew Morton   kernel/watchdog.c...
11
  #define pr_fmt(fmt) "NMI watchdog: " fmt
58687acba   Don Zickus   lockup_detector: ...
12
13
14
15
16
17
18
19
20
21
22
  #include <linux/mm.h>
  #include <linux/cpu.h>
  #include <linux/nmi.h>
  #include <linux/init.h>
  #include <linux/delay.h>
  #include <linux/freezer.h>
  #include <linux/kthread.h>
  #include <linux/lockdep.h>
  #include <linux/notifier.h>
  #include <linux/module.h>
  #include <linux/sysctl.h>
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
23
  #include <linux/smpboot.h>
8bd75c77b   Clark Williams   sched/rt: Move rt...
24
  #include <linux/sched/rt.h>
58687acba   Don Zickus   lockup_detector: ...
25
26
  
  #include <asm/irq_regs.h>
5d1c0f4a8   Eric B Munson   watchdog: add che...
27
  #include <linux/kvm_para.h>
58687acba   Don Zickus   lockup_detector: ...
28
  #include <linux/perf_event.h>
3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
29
  int watchdog_user_enabled = 1;
4eec42f39   Mandeep Singh Baines   watchdog: Change ...
30
  int __read_mostly watchdog_thresh = 10;
3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
31
  static int __read_mostly watchdog_running;
0f34c4009   Chuansheng Liu   watchdog: store t...
32
  static u64 __read_mostly sample_period;
58687acba   Don Zickus   lockup_detector: ...
33
34
35
36
37
  
  static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
  static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
  static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
  static DEFINE_PER_CPU(bool, softlockup_touch_sync);
58687acba   Don Zickus   lockup_detector: ...
38
  static DEFINE_PER_CPU(bool, soft_watchdog_warn);
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
39
40
  static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
  static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
23637d477   Frederic Weisbecker   lockup_detector: ...
41
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
cafcd80d2   Don Zickus   lockup_detector: ...
42
43
  static DEFINE_PER_CPU(bool, hard_watchdog_warn);
  static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
58687acba   Don Zickus   lockup_detector: ...
44
45
46
  static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
  static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
  #endif
58687acba   Don Zickus   lockup_detector: ...
47
48
49
50
  /* boot commands */
  /*
   * Should we panic when a soft-lockup or hard-lockup occurs:
   */
23637d477   Frederic Weisbecker   lockup_detector: ...
51
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
fef2c9bc1   Don Zickus   kernel/watchdog.c...
52
53
  static int hardlockup_panic =
  			CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
58687acba   Don Zickus   lockup_detector: ...
54
55
56
57
58
  
  static int __init hardlockup_panic_setup(char *str)
  {
  	if (!strncmp(str, "panic", 5))
  		hardlockup_panic = 1;
fef2c9bc1   Don Zickus   kernel/watchdog.c...
59
60
  	else if (!strncmp(str, "nopanic", 7))
  		hardlockup_panic = 0;
5dc305587   Don Zickus   x86, NMI: Add bac...
61
  	else if (!strncmp(str, "0", 1))
3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
62
  		watchdog_user_enabled = 0;
58687acba   Don Zickus   lockup_detector: ...
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
  	return 1;
  }
  __setup("nmi_watchdog=", hardlockup_panic_setup);
  #endif
  
  unsigned int __read_mostly softlockup_panic =
  			CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
  
  static int __init softlockup_panic_setup(char *str)
  {
  	softlockup_panic = simple_strtoul(str, NULL, 0);
  
  	return 1;
  }
  __setup("softlockup_panic=", softlockup_panic_setup);
  
  static int __init nowatchdog_setup(char *str)
  {
3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
81
  	watchdog_user_enabled = 0;
58687acba   Don Zickus   lockup_detector: ...
82
83
84
85
86
87
88
  	return 1;
  }
  __setup("nowatchdog", nowatchdog_setup);
  
  /* deprecated */
  static int __init nosoftlockup_setup(char *str)
  {
3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
89
  	watchdog_user_enabled = 0;
58687acba   Don Zickus   lockup_detector: ...
90
91
92
93
  	return 1;
  }
  __setup("nosoftlockup", nosoftlockup_setup);
  /*  */
4eec42f39   Mandeep Singh Baines   watchdog: Change ...
94
95
96
97
98
99
100
  /*
   * Hard-lockup warnings should be triggered after just a few seconds. Soft-
   * lockups can have false positives under extreme conditions. So we generally
   * want a higher threshold for soft lockups than for hard lockups. So we couple
   * the thresholds with a factor: we make the soft threshold twice the amount of
   * time the hard threshold is.
   */
6e9101aee   Ingo Molnar   watchdog: Fix non...
101
  static int get_softlockup_thresh(void)
4eec42f39   Mandeep Singh Baines   watchdog: Change ...
102
103
104
  {
  	return watchdog_thresh * 2;
  }
58687acba   Don Zickus   lockup_detector: ...
105
106
107
108
109
110
  
  /*
   * Returns seconds, approximately.  We don't need nanosecond
   * resolution, and we don't need to waste time with a big divide when
   * 2^30ns == 1.074s.
   */
c06b4f194   Namhyung Kim   watchdog: Use loc...
111
  static unsigned long get_timestamp(void)
58687acba   Don Zickus   lockup_detector: ...
112
  {
c06b4f194   Namhyung Kim   watchdog: Use loc...
113
  	return local_clock() >> 30LL;  /* 2^30 ~= 10^9 */
58687acba   Don Zickus   lockup_detector: ...
114
  }
0f34c4009   Chuansheng Liu   watchdog: store t...
115
  static void set_sample_period(void)
58687acba   Don Zickus   lockup_detector: ...
116
117
  {
  	/*
586692a5a   Mandeep Singh Baines   watchdog: Disable...
118
  	 * convert watchdog_thresh from seconds to ns
86f5e6a7b   Fernando Luis Vázquez Cao   watchdog: Fix cod...
119
120
121
122
  	 * the divide by 5 is to give hrtimer several chances (two
  	 * or three with the current relation between the soft
  	 * and hard thresholds) to increment before the
  	 * hardlockup detector generates a warning
58687acba   Don Zickus   lockup_detector: ...
123
  	 */
0f34c4009   Chuansheng Liu   watchdog: store t...
124
  	sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
58687acba   Don Zickus   lockup_detector: ...
125
126
127
128
129
  }
  
  /* Commands for resetting the watchdog */
  static void __touch_watchdog(void)
  {
c06b4f194   Namhyung Kim   watchdog: Use loc...
130
  	__this_cpu_write(watchdog_touch_ts, get_timestamp());
58687acba   Don Zickus   lockup_detector: ...
131
  }
332fbdbca   Don Zickus   lockup_detector: ...
132
  void touch_softlockup_watchdog(void)
58687acba   Don Zickus   lockup_detector: ...
133
  {
909ea9646   Christoph Lameter   core: Replace __g...
134
  	__this_cpu_write(watchdog_touch_ts, 0);
58687acba   Don Zickus   lockup_detector: ...
135
  }
0167c7819   Ingo Molnar   watchdog: Export ...
136
  EXPORT_SYMBOL(touch_softlockup_watchdog);
58687acba   Don Zickus   lockup_detector: ...
137

332fbdbca   Don Zickus   lockup_detector: ...
138
  void touch_all_softlockup_watchdogs(void)
58687acba   Don Zickus   lockup_detector: ...
139
140
141
142
143
144
145
146
147
148
149
  {
  	int cpu;
  
  	/*
  	 * this is done lockless
  	 * do we care if a 0 races with a timestamp?
  	 * all it means is the softlock check starts one cycle later
  	 */
  	for_each_online_cpu(cpu)
  		per_cpu(watchdog_touch_ts, cpu) = 0;
  }
cafcd80d2   Don Zickus   lockup_detector: ...
150
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
58687acba   Don Zickus   lockup_detector: ...
151
152
  void touch_nmi_watchdog(void)
  {
3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
153
  	if (watchdog_user_enabled) {
68d3f1d81   Don Zickus   lockup_detector: ...
154
155
156
157
158
159
160
  		unsigned cpu;
  
  		for_each_present_cpu(cpu) {
  			if (per_cpu(watchdog_nmi_touch, cpu) != true)
  				per_cpu(watchdog_nmi_touch, cpu) = true;
  		}
  	}
332fbdbca   Don Zickus   lockup_detector: ...
161
  	touch_softlockup_watchdog();
58687acba   Don Zickus   lockup_detector: ...
162
163
  }
  EXPORT_SYMBOL(touch_nmi_watchdog);
cafcd80d2   Don Zickus   lockup_detector: ...
164
  #endif
58687acba   Don Zickus   lockup_detector: ...
165
166
167
168
169
  void touch_softlockup_watchdog_sync(void)
  {
  	__raw_get_cpu_var(softlockup_touch_sync) = true;
  	__raw_get_cpu_var(watchdog_touch_ts) = 0;
  }
23637d477   Frederic Weisbecker   lockup_detector: ...
170
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
58687acba   Don Zickus   lockup_detector: ...
171
  /* watchdog detector functions */
26e09c6ee   Don Zickus   lockup_detector: ...
172
  static int is_hardlockup(void)
58687acba   Don Zickus   lockup_detector: ...
173
  {
909ea9646   Christoph Lameter   core: Replace __g...
174
  	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
58687acba   Don Zickus   lockup_detector: ...
175

909ea9646   Christoph Lameter   core: Replace __g...
176
  	if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
58687acba   Don Zickus   lockup_detector: ...
177
  		return 1;
909ea9646   Christoph Lameter   core: Replace __g...
178
  	__this_cpu_write(hrtimer_interrupts_saved, hrint);
58687acba   Don Zickus   lockup_detector: ...
179
180
181
  	return 0;
  }
  #endif
26e09c6ee   Don Zickus   lockup_detector: ...
182
  static int is_softlockup(unsigned long touch_ts)
58687acba   Don Zickus   lockup_detector: ...
183
  {
c06b4f194   Namhyung Kim   watchdog: Use loc...
184
  	unsigned long now = get_timestamp();
58687acba   Don Zickus   lockup_detector: ...
185
186
  
  	/* Warn about unreasonable delays: */
4eec42f39   Mandeep Singh Baines   watchdog: Change ...
187
  	if (time_after(now, touch_ts + get_softlockup_thresh()))
58687acba   Don Zickus   lockup_detector: ...
188
189
190
191
  		return now - touch_ts;
  
  	return 0;
  }
23637d477   Frederic Weisbecker   lockup_detector: ...
192
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
1880c4ae1   Cyrill Gorcunov   perf, x86: Add hw...
193

58687acba   Don Zickus   lockup_detector: ...
194
195
196
197
198
199
200
201
202
  static struct perf_event_attr wd_hw_attr = {
  	.type		= PERF_TYPE_HARDWARE,
  	.config		= PERF_COUNT_HW_CPU_CYCLES,
  	.size		= sizeof(struct perf_event_attr),
  	.pinned		= 1,
  	.disabled	= 1,
  };
  
  /* Callback function for perf event subsystem */
a8b0ca17b   Peter Zijlstra   perf: Remove the ...
203
  static void watchdog_overflow_callback(struct perf_event *event,
58687acba   Don Zickus   lockup_detector: ...
204
205
206
  		 struct perf_sample_data *data,
  		 struct pt_regs *regs)
  {
c6db67cda   Peter Zijlstra   watchdog: Don't t...
207
208
  	/* Ensure the watchdog never gets throttled */
  	event->hw.interrupts = 0;
909ea9646   Christoph Lameter   core: Replace __g...
209
210
  	if (__this_cpu_read(watchdog_nmi_touch) == true) {
  		__this_cpu_write(watchdog_nmi_touch, false);
58687acba   Don Zickus   lockup_detector: ...
211
212
213
214
215
216
217
218
219
  		return;
  	}
  
  	/* check for a hardlockup
  	 * This is done by making sure our timer interrupt
  	 * is incrementing.  The timer interrupt should have
  	 * fired multiple times before we overflow'd.  If it hasn't
  	 * then this is a good indication the cpu is stuck
  	 */
26e09c6ee   Don Zickus   lockup_detector: ...
220
221
  	if (is_hardlockup()) {
  		int this_cpu = smp_processor_id();
58687acba   Don Zickus   lockup_detector: ...
222
  		/* only print hardlockups once */
909ea9646   Christoph Lameter   core: Replace __g...
223
  		if (__this_cpu_read(hard_watchdog_warn) == true)
58687acba   Don Zickus   lockup_detector: ...
224
225
226
227
228
229
  			return;
  
  		if (hardlockup_panic)
  			panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
  		else
  			WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
909ea9646   Christoph Lameter   core: Replace __g...
230
  		__this_cpu_write(hard_watchdog_warn, true);
58687acba   Don Zickus   lockup_detector: ...
231
232
  		return;
  	}
909ea9646   Christoph Lameter   core: Replace __g...
233
  	__this_cpu_write(hard_watchdog_warn, false);
58687acba   Don Zickus   lockup_detector: ...
234
235
  	return;
  }
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
236
  #endif /* CONFIG_HARDLOCKUP_DETECTOR */
58687acba   Don Zickus   lockup_detector: ...
237
238
  static void watchdog_interrupt_count(void)
  {
909ea9646   Christoph Lameter   core: Replace __g...
239
  	__this_cpu_inc(hrtimer_interrupts);
58687acba   Don Zickus   lockup_detector: ...
240
  }
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
241
242
243
  
  static int watchdog_nmi_enable(unsigned int cpu);
  static void watchdog_nmi_disable(unsigned int cpu);
58687acba   Don Zickus   lockup_detector: ...
244
245
246
247
  
  /* watchdog kicker functions */
  static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
  {
909ea9646   Christoph Lameter   core: Replace __g...
248
  	unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
58687acba   Don Zickus   lockup_detector: ...
249
250
251
252
253
254
255
  	struct pt_regs *regs = get_irq_regs();
  	int duration;
  
  	/* kick the hardlockup detector */
  	watchdog_interrupt_count();
  
  	/* kick the softlockup detector */
909ea9646   Christoph Lameter   core: Replace __g...
256
  	wake_up_process(__this_cpu_read(softlockup_watchdog));
58687acba   Don Zickus   lockup_detector: ...
257
258
  
  	/* .. and repeat */
0f34c4009   Chuansheng Liu   watchdog: store t...
259
  	hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
58687acba   Don Zickus   lockup_detector: ...
260
261
  
  	if (touch_ts == 0) {
909ea9646   Christoph Lameter   core: Replace __g...
262
  		if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
58687acba   Don Zickus   lockup_detector: ...
263
264
265
266
  			/*
  			 * If the time stamp was touched atomically
  			 * make sure the scheduler tick is up to date.
  			 */
909ea9646   Christoph Lameter   core: Replace __g...
267
  			__this_cpu_write(softlockup_touch_sync, false);
58687acba   Don Zickus   lockup_detector: ...
268
269
  			sched_clock_tick();
  		}
5d1c0f4a8   Eric B Munson   watchdog: add che...
270
271
272
  
  		/* Clear the guest paused flag on watchdog reset */
  		kvm_check_and_clear_guest_paused();
58687acba   Don Zickus   lockup_detector: ...
273
274
275
276
277
278
279
280
281
282
  		__touch_watchdog();
  		return HRTIMER_RESTART;
  	}
  
  	/* check for a softlockup
  	 * This is done by making sure a high priority task is
  	 * being scheduled.  The task touches the watchdog to
  	 * indicate it is getting cpu time.  If it hasn't then
  	 * this is a good indication some task is hogging the cpu
  	 */
26e09c6ee   Don Zickus   lockup_detector: ...
283
  	duration = is_softlockup(touch_ts);
58687acba   Don Zickus   lockup_detector: ...
284
  	if (unlikely(duration)) {
5d1c0f4a8   Eric B Munson   watchdog: add che...
285
286
287
288
289
290
291
  		/*
  		 * If a virtual machine is stopped by the host it can look to
  		 * the watchdog like a soft lockup, check to see if the host
  		 * stopped the vm before we issue the warning
  		 */
  		if (kvm_check_and_clear_guest_paused())
  			return HRTIMER_RESTART;
58687acba   Don Zickus   lockup_detector: ...
292
  		/* only warn once */
909ea9646   Christoph Lameter   core: Replace __g...
293
  		if (__this_cpu_read(soft_watchdog_warn) == true)
58687acba   Don Zickus   lockup_detector: ...
294
  			return HRTIMER_RESTART;
b0f4c4b32   Prarit Bhargava   bugs, x86: Fix pr...
295
296
  		printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]
  ",
26e09c6ee   Don Zickus   lockup_detector: ...
297
  			smp_processor_id(), duration,
58687acba   Don Zickus   lockup_detector: ...
298
299
300
301
302
303
304
305
306
307
  			current->comm, task_pid_nr(current));
  		print_modules();
  		print_irqtrace_events(current);
  		if (regs)
  			show_regs(regs);
  		else
  			dump_stack();
  
  		if (softlockup_panic)
  			panic("softlockup: hung tasks");
909ea9646   Christoph Lameter   core: Replace __g...
308
  		__this_cpu_write(soft_watchdog_warn, true);
58687acba   Don Zickus   lockup_detector: ...
309
  	} else
909ea9646   Christoph Lameter   core: Replace __g...
310
  		__this_cpu_write(soft_watchdog_warn, false);
58687acba   Don Zickus   lockup_detector: ...
311
312
313
  
  	return HRTIMER_RESTART;
  }
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
314
315
316
  static void watchdog_set_prio(unsigned int policy, unsigned int prio)
  {
  	struct sched_param param = { .sched_priority = prio };
58687acba   Don Zickus   lockup_detector: ...
317

bcd951cf1   Thomas Gleixner   watchdog: Use hot...
318
319
320
321
  	sched_setscheduler(current, policy, &param);
  }
  
  static void watchdog_enable(unsigned int cpu)
58687acba   Don Zickus   lockup_detector: ...
322
  {
26e09c6ee   Don Zickus   lockup_detector: ...
323
  	struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
58687acba   Don Zickus   lockup_detector: ...
324

3935e8950   Bjørn Mork   watchdog: Fix dis...
325
326
327
  	/* kick off the timer for the hardlockup detector */
  	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  	hrtimer->function = watchdog_timer_fn;
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
328
329
  	/* Enable the perf event */
  	watchdog_nmi_enable(cpu);
58687acba   Don Zickus   lockup_detector: ...
330

58687acba   Don Zickus   lockup_detector: ...
331
  	/* done here because hrtimer_start can only pin to smp_processor_id() */
0f34c4009   Chuansheng Liu   watchdog: store t...
332
  	hrtimer_start(hrtimer, ns_to_ktime(sample_period),
58687acba   Don Zickus   lockup_detector: ...
333
  		      HRTIMER_MODE_REL_PINNED);
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
334
335
336
337
  	/* initialize timestamp */
  	watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
  	__touch_watchdog();
  }
58687acba   Don Zickus   lockup_detector: ...
338

bcd951cf1   Thomas Gleixner   watchdog: Use hot...
339
340
341
  static void watchdog_disable(unsigned int cpu)
  {
  	struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
58687acba   Don Zickus   lockup_detector: ...
342

bcd951cf1   Thomas Gleixner   watchdog: Use hot...
343
344
345
346
  	watchdog_set_prio(SCHED_NORMAL, 0);
  	hrtimer_cancel(hrtimer);
  	/* disable the perf event */
  	watchdog_nmi_disable(cpu);
58687acba   Don Zickus   lockup_detector: ...
347
  }
b8900bc02   Frederic Weisbecker   watchdog: Registe...
348
349
350
351
  static void watchdog_cleanup(unsigned int cpu, bool online)
  {
  	watchdog_disable(cpu);
  }
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
352
353
354
355
356
357
358
359
360
  static int watchdog_should_run(unsigned int cpu)
  {
  	return __this_cpu_read(hrtimer_interrupts) !=
  		__this_cpu_read(soft_lockup_hrtimer_cnt);
  }
  
  /*
   * The watchdog thread function - touches the timestamp.
   *
0f34c4009   Chuansheng Liu   watchdog: store t...
361
   * It only runs once every sample_period seconds (4 seconds by
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
362
363
364
365
366
367
368
369
370
371
   * default) to reset the softlockup timestamp. If this gets delayed
   * for more than 2*watchdog_thresh seconds then the debug-printout
   * triggers in watchdog_timer_fn().
   */
  static void watchdog(unsigned int cpu)
  {
  	__this_cpu_write(soft_lockup_hrtimer_cnt,
  			 __this_cpu_read(hrtimer_interrupts));
  	__touch_watchdog();
  }
58687acba   Don Zickus   lockup_detector: ...
372

23637d477   Frederic Weisbecker   lockup_detector: ...
373
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
a70270468   Don Zickus   watchdog: Quiet d...
374
375
376
377
378
379
  /*
   * People like the simple clean cpu node info on boot.
   * Reduce the watchdog noise by only printing messages
   * that are different from what cpu0 displayed.
   */
  static unsigned long cpu0_err;
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
380
  static int watchdog_nmi_enable(unsigned int cpu)
58687acba   Don Zickus   lockup_detector: ...
381
382
383
384
385
386
387
388
389
390
391
  {
  	struct perf_event_attr *wd_attr;
  	struct perf_event *event = per_cpu(watchdog_ev, cpu);
  
  	/* is it already setup and enabled? */
  	if (event && event->state > PERF_EVENT_STATE_OFF)
  		goto out;
  
  	/* it is setup but not enabled */
  	if (event != NULL)
  		goto out_enable;
58687acba   Don Zickus   lockup_detector: ...
392
  	wd_attr = &wd_hw_attr;
4eec42f39   Mandeep Singh Baines   watchdog: Change ...
393
  	wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
1880c4ae1   Cyrill Gorcunov   perf, x86: Add hw...
394
395
  
  	/* Try to register using hardware perf events */
4dc0da869   Avi Kivity   perf: Add context...
396
  	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
a70270468   Don Zickus   watchdog: Quiet d...
397
398
399
400
  
  	/* save cpu0 error for future comparision */
  	if (cpu == 0 && IS_ERR(event))
  		cpu0_err = PTR_ERR(event);
58687acba   Don Zickus   lockup_detector: ...
401
  	if (!IS_ERR(event)) {
a70270468   Don Zickus   watchdog: Quiet d...
402
403
404
405
  		/* only print for cpu0 or different than cpu0 */
  		if (cpu == 0 || cpu0_err)
  			pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.
  ");
58687acba   Don Zickus   lockup_detector: ...
406
407
  		goto out_save;
  	}
a70270468   Don Zickus   watchdog: Quiet d...
408
409
410
  	/* skip displaying the same error again */
  	if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
  		return PTR_ERR(event);
5651f7f47   Don Zickus   watchdog, nmi: Lo...
411
412
413
  
  	/* vary the KERN level based on the returned errno */
  	if (PTR_ERR(event) == -EOPNOTSUPP)
4501980aa   Andrew Morton   kernel/watchdog.c...
414
415
  		pr_info("disabled (cpu%i): not supported (no LAPIC?)
  ", cpu);
5651f7f47   Don Zickus   watchdog, nmi: Lo...
416
  	else if (PTR_ERR(event) == -ENOENT)
4501980aa   Andrew Morton   kernel/watchdog.c...
417
418
419
  		pr_warning("disabled (cpu%i): hardware events not enabled
  ",
  			 cpu);
5651f7f47   Don Zickus   watchdog, nmi: Lo...
420
  	else
4501980aa   Andrew Morton   kernel/watchdog.c...
421
422
423
  		pr_err("disabled (cpu%i): unable to create perf event: %ld
  ",
  			cpu, PTR_ERR(event));
eac243355   Akinobu Mita   lockup_detector: ...
424
  	return PTR_ERR(event);
58687acba   Don Zickus   lockup_detector: ...
425
426
427
428
429
430
431
432
433
  
  	/* success path */
  out_save:
  	per_cpu(watchdog_ev, cpu) = event;
  out_enable:
  	perf_event_enable(per_cpu(watchdog_ev, cpu));
  out:
  	return 0;
  }
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
434
  static void watchdog_nmi_disable(unsigned int cpu)
58687acba   Don Zickus   lockup_detector: ...
435
436
437
438
439
440
441
442
443
444
445
446
447
  {
  	struct perf_event *event = per_cpu(watchdog_ev, cpu);
  
  	if (event) {
  		perf_event_disable(event);
  		per_cpu(watchdog_ev, cpu) = NULL;
  
  		/* should be in cleanup, but blocks oprofile */
  		perf_event_release_kernel(event);
  	}
  	return;
  }
  #else
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
448
449
  static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
  static void watchdog_nmi_disable(unsigned int cpu) { return; }
23637d477   Frederic Weisbecker   lockup_detector: ...
450
  #endif /* CONFIG_HARDLOCKUP_DETECTOR */
58687acba   Don Zickus   lockup_detector: ...
451

b8900bc02   Frederic Weisbecker   watchdog: Registe...
452
453
454
455
456
457
458
459
460
461
  static struct smp_hotplug_thread watchdog_threads = {
  	.store			= &softlockup_watchdog,
  	.thread_should_run	= watchdog_should_run,
  	.thread_fn		= watchdog,
  	.thread_comm		= "watchdog/%u",
  	.setup			= watchdog_enable,
  	.cleanup		= watchdog_cleanup,
  	.park			= watchdog_disable,
  	.unpark			= watchdog_enable,
  };
9809b18fc   Michal Hocko   watchdog: update ...
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
  static void restart_watchdog_hrtimer(void *info)
  {
  	struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
  	int ret;
  
  	/*
  	 * No need to cancel and restart hrtimer if it is currently executing
  	 * because it will reprogram itself with the new period now.
  	 * We should never see it unqueued here because we are running per-cpu
  	 * with interrupts disabled.
  	 */
  	ret = hrtimer_try_to_cancel(hrtimer);
  	if (ret == 1)
  		hrtimer_start(hrtimer, ns_to_ktime(sample_period),
  				HRTIMER_MODE_REL_PINNED);
  }
  
  static void update_timers(int cpu)
  {
  	struct call_single_data data = {.func = restart_watchdog_hrtimer};
  	/*
  	 * Make sure that perf event counter will adopt to a new
  	 * sampling period. Updating the sampling period directly would
  	 * be much nicer but we do not have an API for that now so
  	 * let's use a big hammer.
  	 * Hrtimer will adopt the new period on the next tick but this
  	 * might be late already so we have to restart the timer as well.
  	 */
  	watchdog_nmi_disable(cpu);
  	__smp_call_function_single(cpu, &data, 1);
  	watchdog_nmi_enable(cpu);
  }
  
  static void update_timers_all_cpus(void)
  {
  	int cpu;
  
  	get_online_cpus();
  	preempt_disable();
  	for_each_online_cpu(cpu)
  		update_timers(cpu);
  	preempt_enable();
  	put_online_cpus();
  }
  
  static int watchdog_enable_all_cpus(bool sample_period_changed)
58687acba   Don Zickus   lockup_detector: ...
508
  {
b8900bc02   Frederic Weisbecker   watchdog: Registe...
509
  	int err = 0;
58687acba   Don Zickus   lockup_detector: ...
510

3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
511
  	if (!watchdog_running) {
b8900bc02   Frederic Weisbecker   watchdog: Registe...
512
513
514
515
516
  		err = smpboot_register_percpu_thread(&watchdog_threads);
  		if (err)
  			pr_err("Failed to create watchdog threads, disabled
  ");
  		else
3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
517
  			watchdog_running = 1;
9809b18fc   Michal Hocko   watchdog: update ...
518
519
  	} else if (sample_period_changed) {
  		update_timers_all_cpus();
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
520
  	}
b8900bc02   Frederic Weisbecker   watchdog: Registe...
521
522
  
  	return err;
58687acba   Don Zickus   lockup_detector: ...
523
  }
b8900bc02   Frederic Weisbecker   watchdog: Registe...
524
525
526
  /* prepare/enable/disable routines */
  /* sysctl functions */
  #ifdef CONFIG_SYSCTL
58687acba   Don Zickus   lockup_detector: ...
527
528
  static void watchdog_disable_all_cpus(void)
  {
3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
529
530
  	if (watchdog_running) {
  		watchdog_running = 0;
b8900bc02   Frederic Weisbecker   watchdog: Registe...
531
  		smpboot_unregister_percpu_thread(&watchdog_threads);
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
532
  	}
58687acba   Don Zickus   lockup_detector: ...
533
  }
58687acba   Don Zickus   lockup_detector: ...
534
  /*
586692a5a   Mandeep Singh Baines   watchdog: Disable...
535
   * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh
58687acba   Don Zickus   lockup_detector: ...
536
   */
586692a5a   Mandeep Singh Baines   watchdog: Disable...
537
538
  int proc_dowatchdog(struct ctl_table *table, int write,
  		    void __user *buffer, size_t *lenp, loff_t *ppos)
58687acba   Don Zickus   lockup_detector: ...
539
  {
b8900bc02   Frederic Weisbecker   watchdog: Registe...
540
  	int err, old_thresh, old_enabled;
359e6fab6   Michal Hocko   watchdog: update ...
541
  	static DEFINE_MUTEX(watchdog_proc_mutex);
58687acba   Don Zickus   lockup_detector: ...
542

359e6fab6   Michal Hocko   watchdog: update ...
543
  	mutex_lock(&watchdog_proc_mutex);
b8900bc02   Frederic Weisbecker   watchdog: Registe...
544
  	old_thresh = ACCESS_ONCE(watchdog_thresh);
3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
545
  	old_enabled = ACCESS_ONCE(watchdog_user_enabled);
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
546

b8900bc02   Frederic Weisbecker   watchdog: Registe...
547
548
  	err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  	if (err || !write)
359e6fab6   Michal Hocko   watchdog: update ...
549
  		goto out;
e04ab2bc4   Mandeep Singh Baines   watchdog: Only di...
550

0f34c4009   Chuansheng Liu   watchdog: store t...
551
  	set_sample_period();
b66a2356d   anish kumar   watchdog: Add com...
552
553
  	/*
  	 * Watchdog threads shouldn't be enabled if they are
3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
554
  	 * disabled. The 'watchdog_running' variable check in
b66a2356d   anish kumar   watchdog: Add com...
555
556
  	 * watchdog_*_all_cpus() function takes care of this.
  	 */
3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
557
  	if (watchdog_user_enabled && watchdog_thresh)
9809b18fc   Michal Hocko   watchdog: update ...
558
  		err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh);
e04ab2bc4   Mandeep Singh Baines   watchdog: Only di...
559
560
  	else
  		watchdog_disable_all_cpus();
b8900bc02   Frederic Weisbecker   watchdog: Registe...
561
562
563
  	/* Restore old values on failure */
  	if (err) {
  		watchdog_thresh = old_thresh;
3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
564
  		watchdog_user_enabled = old_enabled;
b8900bc02   Frederic Weisbecker   watchdog: Registe...
565
  	}
359e6fab6   Michal Hocko   watchdog: update ...
566
567
  out:
  	mutex_unlock(&watchdog_proc_mutex);
b8900bc02   Frederic Weisbecker   watchdog: Registe...
568
  	return err;
58687acba   Don Zickus   lockup_detector: ...
569
  }
58687acba   Don Zickus   lockup_detector: ...
570
  #endif /* CONFIG_SYSCTL */
004417a6d   Peter Zijlstra   perf, arch: Clean...
571
  void __init lockup_detector_init(void)
58687acba   Don Zickus   lockup_detector: ...
572
  {
0f34c4009   Chuansheng Liu   watchdog: store t...
573
  	set_sample_period();
b8900bc02   Frederic Weisbecker   watchdog: Registe...
574

3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
575
  	if (watchdog_user_enabled)
9809b18fc   Michal Hocko   watchdog: update ...
576
  		watchdog_enable_all_cpus(false);
58687acba   Don Zickus   lockup_detector: ...
577
  }