Blame view

kernel/watchdog.c 27.2 KB
58687acba   Don Zickus   lockup_detector: ...
1
2
3
4
5
  /*
   * Detect hard and soft lockups on a system
   *
   * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
   *
86f5e6a7b   Fernando Luis Vázquez Cao   watchdog: Fix cod...
6
7
8
   * Note: Most of this code is borrowed heavily from the original softlockup
   * detector, so thanks to Ingo for the initial implementation.
   * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
58687acba   Don Zickus   lockup_detector: ...
9
10
   * to those contributors as well.
   */
4501980aa   Andrew Morton   kernel/watchdog.c...
11
  #define pr_fmt(fmt) "NMI watchdog: " fmt
58687acba   Don Zickus   lockup_detector: ...
12
13
14
15
  #include <linux/mm.h>
  #include <linux/cpu.h>
  #include <linux/nmi.h>
  #include <linux/init.h>
58687acba   Don Zickus   lockup_detector: ...
16
17
  #include <linux/module.h>
  #include <linux/sysctl.h>
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
18
  #include <linux/smpboot.h>
8bd75c77b   Clark Williams   sched/rt: Move rt...
19
  #include <linux/sched/rt.h>
58687acba   Don Zickus   lockup_detector: ...
20
21
  
  #include <asm/irq_regs.h>
5d1c0f4a8   Eric B Munson   watchdog: add che...
22
  #include <linux/kvm_para.h>
58687acba   Don Zickus   lockup_detector: ...
23
  #include <linux/perf_event.h>
84d56e66b   Ulrich Obergfell   watchdog: new def...
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
  /*
   * The run state of the lockup detectors is controlled by the content of the
   * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
   * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
   *
   * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
   * are variables that are only used as an 'interface' between the parameters
   * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
   * 'watchdog_thresh' variable is handled differently because its value is not
   * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
   * is equal zero.
   */
  #define NMI_WATCHDOG_ENABLED_BIT   0
  #define SOFT_WATCHDOG_ENABLED_BIT  1
  #define NMI_WATCHDOG_ENABLED      (1 << NMI_WATCHDOG_ENABLED_BIT)
  #define SOFT_WATCHDOG_ENABLED     (1 << SOFT_WATCHDOG_ENABLED_BIT)
ab992dc38   Peter Zijlstra   watchdog: Fix mer...
40
  static DEFINE_MUTEX(watchdog_proc_mutex);
84d56e66b   Ulrich Obergfell   watchdog: new def...
41
42
43
44
45
46
47
48
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
  static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
  #else
  static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
  #endif
  int __read_mostly nmi_watchdog_enabled;
  int __read_mostly soft_watchdog_enabled;
  int __read_mostly watchdog_user_enabled;
4eec42f39   Mandeep Singh Baines   watchdog: Change ...
49
  int __read_mostly watchdog_thresh = 10;
84d56e66b   Ulrich Obergfell   watchdog: new def...
50

ed235875e   Aaron Tomlin   kernel/watchdog.c...
51
52
53
54
55
  #ifdef CONFIG_SMP
  int __read_mostly sysctl_softlockup_all_cpu_backtrace;
  #else
  #define sysctl_softlockup_all_cpu_backtrace 0
  #endif
3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
56
  static int __read_mostly watchdog_running;
0f34c4009   Chuansheng Liu   watchdog: store t...
57
  static u64 __read_mostly sample_period;
58687acba   Don Zickus   lockup_detector: ...
58
59
60
61
62
  
  static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
  static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
  static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
  static DEFINE_PER_CPU(bool, softlockup_touch_sync);
58687acba   Don Zickus   lockup_detector: ...
63
  static DEFINE_PER_CPU(bool, soft_watchdog_warn);
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
64
65
  static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
  static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
b1a8de1f5   chai wen   softlockup: make ...
66
  static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
23637d477   Frederic Weisbecker   lockup_detector: ...
67
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
cafcd80d2   Don Zickus   lockup_detector: ...
68
69
  static DEFINE_PER_CPU(bool, hard_watchdog_warn);
  static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
58687acba   Don Zickus   lockup_detector: ...
70
  static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
942008b35   Colin Cross   hardlockup: detec...
71
72
73
74
75
  #endif
  #ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
  static cpumask_t __read_mostly watchdog_cpus;
  #endif
  #ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
58687acba   Don Zickus   lockup_detector: ...
76
77
  static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
  #endif
ed235875e   Aaron Tomlin   kernel/watchdog.c...
78
  static unsigned long soft_lockup_nmi_warn;
58687acba   Don Zickus   lockup_detector: ...
79

58687acba   Don Zickus   lockup_detector: ...
80
81
82
83
  /* boot commands */
  /*
   * Should we panic when a soft-lockup or hard-lockup occurs:
   */
23637d477   Frederic Weisbecker   lockup_detector: ...
84
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
fef2c9bc1   Don Zickus   kernel/watchdog.c...
85
86
  static int hardlockup_panic =
  			CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
6e7458a6f   Ulrich Obergfell   kernel/watchdog.c...
87
88
89
90
91
92
93
94
  /*
   * We may not want to enable hard lockup detection by default in all cases,
   * for example when running the kernel as a guest on a hypervisor. In these
   * cases this function can be called to disable hard lockup detection. This
   * function should only be executed once by the boot processor before the
   * kernel command line parameters are parsed, because otherwise it is not
   * possible to override this in hardlockup_panic_setup().
   */
692297d8f   Ulrich Obergfell   watchdog: introdu...
95
  void hardlockup_detector_disable(void)
6e7458a6f   Ulrich Obergfell   kernel/watchdog.c...
96
  {
692297d8f   Ulrich Obergfell   watchdog: introdu...
97
  	watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
6e7458a6f   Ulrich Obergfell   kernel/watchdog.c...
98
  }
58687acba   Don Zickus   lockup_detector: ...
99
100
101
102
  static int __init hardlockup_panic_setup(char *str)
  {
  	if (!strncmp(str, "panic", 5))
  		hardlockup_panic = 1;
fef2c9bc1   Don Zickus   kernel/watchdog.c...
103
104
  	else if (!strncmp(str, "nopanic", 7))
  		hardlockup_panic = 0;
5dc305587   Don Zickus   x86, NMI: Add bac...
105
  	else if (!strncmp(str, "0", 1))
195daf665   Ulrich Obergfell   watchdog: enable ...
106
107
108
  		watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
  	else if (!strncmp(str, "1", 1))
  		watchdog_enabled |= NMI_WATCHDOG_ENABLED;
58687acba   Don Zickus   lockup_detector: ...
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
  	return 1;
  }
  __setup("nmi_watchdog=", hardlockup_panic_setup);
  #endif
  
  unsigned int __read_mostly softlockup_panic =
  			CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
  
  static int __init softlockup_panic_setup(char *str)
  {
  	softlockup_panic = simple_strtoul(str, NULL, 0);
  
  	return 1;
  }
  __setup("softlockup_panic=", softlockup_panic_setup);
  
  static int __init nowatchdog_setup(char *str)
  {
195daf665   Ulrich Obergfell   watchdog: enable ...
127
  	watchdog_enabled = 0;
58687acba   Don Zickus   lockup_detector: ...
128
129
130
  	return 1;
  }
  __setup("nowatchdog", nowatchdog_setup);
58687acba   Don Zickus   lockup_detector: ...
131
132
  static int __init nosoftlockup_setup(char *str)
  {
195daf665   Ulrich Obergfell   watchdog: enable ...
133
  	watchdog_enabled &= ~SOFT_WATCHDOG_ENABLED;
58687acba   Don Zickus   lockup_detector: ...
134
135
136
  	return 1;
  }
  __setup("nosoftlockup", nosoftlockup_setup);
195daf665   Ulrich Obergfell   watchdog: enable ...
137

ed235875e   Aaron Tomlin   kernel/watchdog.c...
138
139
140
141
142
143
144
145
146
  #ifdef CONFIG_SMP
  static int __init softlockup_all_cpu_backtrace_setup(char *str)
  {
  	sysctl_softlockup_all_cpu_backtrace =
  		!!simple_strtol(str, NULL, 0);
  	return 1;
  }
  __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
  #endif
58687acba   Don Zickus   lockup_detector: ...
147

4eec42f39   Mandeep Singh Baines   watchdog: Change ...
148
149
150
151
152
153
154
  /*
   * Hard-lockup warnings should be triggered after just a few seconds. Soft-
   * lockups can have false positives under extreme conditions. So we generally
   * want a higher threshold for soft lockups than for hard lockups. So we couple
   * the thresholds with a factor: we make the soft threshold twice the amount of
   * time the hard threshold is.
   */
6e9101aee   Ingo Molnar   watchdog: Fix non...
155
  static int get_softlockup_thresh(void)
4eec42f39   Mandeep Singh Baines   watchdog: Change ...
156
157
158
  {
  	return watchdog_thresh * 2;
  }
58687acba   Don Zickus   lockup_detector: ...
159
160
161
162
163
164
  
  /*
   * Returns seconds, approximately.  We don't need nanosecond
   * resolution, and we don't need to waste time with a big divide when
   * 2^30ns == 1.074s.
   */
c06b4f194   Namhyung Kim   watchdog: Use loc...
165
  static unsigned long get_timestamp(void)
58687acba   Don Zickus   lockup_detector: ...
166
  {
545a2bf74   Cyril Bur   kernel/sched/cloc...
167
  	return running_clock() >> 30LL;  /* 2^30 ~= 10^9 */
58687acba   Don Zickus   lockup_detector: ...
168
  }
0f34c4009   Chuansheng Liu   watchdog: store t...
169
  static void set_sample_period(void)
58687acba   Don Zickus   lockup_detector: ...
170
171
  {
  	/*
586692a5a   Mandeep Singh Baines   watchdog: Disable...
172
  	 * convert watchdog_thresh from seconds to ns
86f5e6a7b   Fernando Luis Vázquez Cao   watchdog: Fix cod...
173
174
175
176
  	 * the divide by 5 is to give hrtimer several chances (two
  	 * or three with the current relation between the soft
  	 * and hard thresholds) to increment before the
  	 * hardlockup detector generates a warning
58687acba   Don Zickus   lockup_detector: ...
177
  	 */
0f34c4009   Chuansheng Liu   watchdog: store t...
178
  	sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
58687acba   Don Zickus   lockup_detector: ...
179
180
181
182
183
  }
  
  /* Commands for resetting the watchdog */
  static void __touch_watchdog(void)
  {
c06b4f194   Namhyung Kim   watchdog: Use loc...
184
  	__this_cpu_write(watchdog_touch_ts, get_timestamp());
58687acba   Don Zickus   lockup_detector: ...
185
  }
332fbdbca   Don Zickus   lockup_detector: ...
186
  void touch_softlockup_watchdog(void)
58687acba   Don Zickus   lockup_detector: ...
187
  {
7861144b8   Andrew Morton   kernel/watchdog.c...
188
189
190
191
192
  	/*
  	 * Preemption can be enabled.  It doesn't matter which CPU's timestamp
  	 * gets zeroed here, so use the raw_ operation.
  	 */
  	raw_cpu_write(watchdog_touch_ts, 0);
58687acba   Don Zickus   lockup_detector: ...
193
  }
0167c7819   Ingo Molnar   watchdog: Export ...
194
  EXPORT_SYMBOL(touch_softlockup_watchdog);
58687acba   Don Zickus   lockup_detector: ...
195

332fbdbca   Don Zickus   lockup_detector: ...
196
  void touch_all_softlockup_watchdogs(void)
58687acba   Don Zickus   lockup_detector: ...
197
198
199
200
201
202
203
204
205
206
207
  {
  	int cpu;
  
  	/*
  	 * this is done lockless
  	 * do we care if a 0 races with a timestamp?
  	 * all it means is the softlock check starts one cycle later
  	 */
  	for_each_online_cpu(cpu)
  		per_cpu(watchdog_touch_ts, cpu) = 0;
  }
cafcd80d2   Don Zickus   lockup_detector: ...
208
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
58687acba   Don Zickus   lockup_detector: ...
209
210
  void touch_nmi_watchdog(void)
  {
62572e29b   Ben Zhang   kernel/watchdog.c...
211
212
213
214
215
216
217
  	/*
  	 * Using __raw here because some code paths have
  	 * preemption enabled.  If preemption is enabled
  	 * then interrupts should be enabled too, in which
  	 * case we shouldn't have to worry about the watchdog
  	 * going off.
  	 */
f7f66b05a   Christoph Lameter   watchdog: Replace...
218
  	raw_cpu_write(watchdog_nmi_touch, true);
332fbdbca   Don Zickus   lockup_detector: ...
219
  	touch_softlockup_watchdog();
58687acba   Don Zickus   lockup_detector: ...
220
221
  }
  EXPORT_SYMBOL(touch_nmi_watchdog);
cafcd80d2   Don Zickus   lockup_detector: ...
222
  #endif
58687acba   Don Zickus   lockup_detector: ...
223
224
  void touch_softlockup_watchdog_sync(void)
  {
f7f66b05a   Christoph Lameter   watchdog: Replace...
225
226
  	__this_cpu_write(softlockup_touch_sync, true);
  	__this_cpu_write(watchdog_touch_ts, 0);
58687acba   Don Zickus   lockup_detector: ...
227
  }
942008b35   Colin Cross   hardlockup: detec...
228
  #ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
58687acba   Don Zickus   lockup_detector: ...
229
  /* watchdog detector functions */
26e09c6ee   Don Zickus   lockup_detector: ...
230
  static int is_hardlockup(void)
58687acba   Don Zickus   lockup_detector: ...
231
  {
909ea9646   Christoph Lameter   core: Replace __g...
232
  	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
58687acba   Don Zickus   lockup_detector: ...
233

909ea9646   Christoph Lameter   core: Replace __g...
234
  	if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
58687acba   Don Zickus   lockup_detector: ...
235
  		return 1;
909ea9646   Christoph Lameter   core: Replace __g...
236
  	__this_cpu_write(hrtimer_interrupts_saved, hrint);
58687acba   Don Zickus   lockup_detector: ...
237
238
239
  	return 0;
  }
  #endif
942008b35   Colin Cross   hardlockup: detec...
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
  #ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
  static unsigned int watchdog_next_cpu(unsigned int cpu)
  {
  	cpumask_t cpus = watchdog_cpus;
  	unsigned int next_cpu;
  
  	next_cpu = cpumask_next(cpu, &cpus);
  	if (next_cpu >= nr_cpu_ids)
  		next_cpu = cpumask_first(&cpus);
  
  	if (next_cpu == cpu)
  		return nr_cpu_ids;
  
  	return next_cpu;
  }
  
  static int is_hardlockup_other_cpu(unsigned int cpu)
  {
  	unsigned long hrint = per_cpu(hrtimer_interrupts, cpu);
  
  	if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
  		return 1;
  
  	per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
  	return 0;
  }
  
  static void watchdog_check_hardlockup_other_cpu(void)
  {
  	unsigned int next_cpu;
  
  	/*
  	 * Test for hardlockups every 3 samples.  The sample period is
  	 *  watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over
  	 *  watchdog_thresh (over by 20%).
  	 */
  	if (__this_cpu_read(hrtimer_interrupts) % 3 != 0)
  		return;
  
  	/* check for a hardlockup on the next cpu */
  	next_cpu = watchdog_next_cpu(smp_processor_id());
  	if (next_cpu >= nr_cpu_ids)
  		return;
  
  	smp_rmb();
  
  	if (per_cpu(watchdog_nmi_touch, next_cpu) == true) {
  		per_cpu(watchdog_nmi_touch, next_cpu) = false;
  		return;
  	}
  
  	if (is_hardlockup_other_cpu(next_cpu)) {
  		/* only warn once */
  		if (per_cpu(hard_watchdog_warn, next_cpu) == true)
  			return;
  
  		if (hardlockup_panic)
  			panic("Watchdog detected hard LOCKUP on cpu %u", next_cpu);
  		else
  			WARN(1, "Watchdog detected hard LOCKUP on cpu %u", next_cpu);
  
  		per_cpu(hard_watchdog_warn, next_cpu) = true;
  	} else {
  		per_cpu(hard_watchdog_warn, next_cpu) = false;
  	}
  }
  #else
  static inline void watchdog_check_hardlockup_other_cpu(void) { return; }
  #endif
26e09c6ee   Don Zickus   lockup_detector: ...
309
  static int is_softlockup(unsigned long touch_ts)
58687acba   Don Zickus   lockup_detector: ...
310
  {
c06b4f194   Namhyung Kim   watchdog: Use loc...
311
  	unsigned long now = get_timestamp();
58687acba   Don Zickus   lockup_detector: ...
312

195daf665   Ulrich Obergfell   watchdog: enable ...
313
314
315
316
317
  	if (watchdog_enabled & SOFT_WATCHDOG_ENABLED) {
  		/* Warn about unreasonable delays. */
  		if (time_after(now, touch_ts + get_softlockup_thresh()))
  			return now - touch_ts;
  	}
58687acba   Don Zickus   lockup_detector: ...
318
319
  	return 0;
  }
942008b35   Colin Cross   hardlockup: detec...
320
  #ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
1880c4ae1   Cyrill Gorcunov   perf, x86: Add hw...
321

58687acba   Don Zickus   lockup_detector: ...
322
323
324
325
326
327
328
329
330
  static struct perf_event_attr wd_hw_attr = {
  	.type		= PERF_TYPE_HARDWARE,
  	.config		= PERF_COUNT_HW_CPU_CYCLES,
  	.size		= sizeof(struct perf_event_attr),
  	.pinned		= 1,
  	.disabled	= 1,
  };
  
  /* Callback function for perf event subsystem */
a8b0ca17b   Peter Zijlstra   perf: Remove the ...
331
  static void watchdog_overflow_callback(struct perf_event *event,
58687acba   Don Zickus   lockup_detector: ...
332
333
334
  		 struct perf_sample_data *data,
  		 struct pt_regs *regs)
  {
c6db67cda   Peter Zijlstra   watchdog: Don't t...
335
336
  	/* Ensure the watchdog never gets throttled */
  	event->hw.interrupts = 0;
909ea9646   Christoph Lameter   core: Replace __g...
337
338
  	if (__this_cpu_read(watchdog_nmi_touch) == true) {
  		__this_cpu_write(watchdog_nmi_touch, false);
58687acba   Don Zickus   lockup_detector: ...
339
340
341
342
343
344
345
346
347
  		return;
  	}
  
  	/* check for a hardlockup
  	 * This is done by making sure our timer interrupt
  	 * is incrementing.  The timer interrupt should have
  	 * fired multiple times before we overflow'd.  If it hasn't
  	 * then this is a good indication the cpu is stuck
  	 */
26e09c6ee   Don Zickus   lockup_detector: ...
348
349
  	if (is_hardlockup()) {
  		int this_cpu = smp_processor_id();
58687acba   Don Zickus   lockup_detector: ...
350
  		/* only print hardlockups once */
909ea9646   Christoph Lameter   core: Replace __g...
351
  		if (__this_cpu_read(hard_watchdog_warn) == true)
58687acba   Don Zickus   lockup_detector: ...
352
353
354
  			return;
  
  		if (hardlockup_panic)
656c3b79f   Fabian Frederick   kernel/watchdog.c...
355
356
  			panic("Watchdog detected hard LOCKUP on cpu %d",
  			      this_cpu);
58687acba   Don Zickus   lockup_detector: ...
357
  		else
656c3b79f   Fabian Frederick   kernel/watchdog.c...
358
359
  			WARN(1, "Watchdog detected hard LOCKUP on cpu %d",
  			     this_cpu);
58687acba   Don Zickus   lockup_detector: ...
360

909ea9646   Christoph Lameter   core: Replace __g...
361
  		__this_cpu_write(hard_watchdog_warn, true);
58687acba   Don Zickus   lockup_detector: ...
362
363
  		return;
  	}
909ea9646   Christoph Lameter   core: Replace __g...
364
  	__this_cpu_write(hard_watchdog_warn, false);
58687acba   Don Zickus   lockup_detector: ...
365
366
  	return;
  }
942008b35   Colin Cross   hardlockup: detec...
367
  #endif /* CONFIG_HARDLOCKUP_DETECTOR_NMI */
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
368

58687acba   Don Zickus   lockup_detector: ...
369
370
  static void watchdog_interrupt_count(void)
  {
909ea9646   Christoph Lameter   core: Replace __g...
371
  	__this_cpu_inc(hrtimer_interrupts);
58687acba   Don Zickus   lockup_detector: ...
372
  }
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
373
374
375
  
  static int watchdog_nmi_enable(unsigned int cpu);
  static void watchdog_nmi_disable(unsigned int cpu);
58687acba   Don Zickus   lockup_detector: ...
376
377
378
379
  
  /* watchdog kicker functions */
  static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
  {
909ea9646   Christoph Lameter   core: Replace __g...
380
  	unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
58687acba   Don Zickus   lockup_detector: ...
381
382
  	struct pt_regs *regs = get_irq_regs();
  	int duration;
ed235875e   Aaron Tomlin   kernel/watchdog.c...
383
  	int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
58687acba   Don Zickus   lockup_detector: ...
384
385
386
  
  	/* kick the hardlockup detector */
  	watchdog_interrupt_count();
942008b35   Colin Cross   hardlockup: detec...
387
388
  	/* test for hardlockups on the next cpu */
  	watchdog_check_hardlockup_other_cpu();
58687acba   Don Zickus   lockup_detector: ...
389
  	/* kick the softlockup detector */
909ea9646   Christoph Lameter   core: Replace __g...
390
  	wake_up_process(__this_cpu_read(softlockup_watchdog));
58687acba   Don Zickus   lockup_detector: ...
391
392
  
  	/* .. and repeat */
0f34c4009   Chuansheng Liu   watchdog: store t...
393
  	hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
58687acba   Don Zickus   lockup_detector: ...
394
395
  
  	if (touch_ts == 0) {
909ea9646   Christoph Lameter   core: Replace __g...
396
  		if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
58687acba   Don Zickus   lockup_detector: ...
397
398
399
400
  			/*
  			 * If the time stamp was touched atomically
  			 * make sure the scheduler tick is up to date.
  			 */
909ea9646   Christoph Lameter   core: Replace __g...
401
  			__this_cpu_write(softlockup_touch_sync, false);
58687acba   Don Zickus   lockup_detector: ...
402
403
  			sched_clock_tick();
  		}
5d1c0f4a8   Eric B Munson   watchdog: add che...
404
405
406
  
  		/* Clear the guest paused flag on watchdog reset */
  		kvm_check_and_clear_guest_paused();
58687acba   Don Zickus   lockup_detector: ...
407
408
409
410
411
412
413
414
415
416
  		__touch_watchdog();
  		return HRTIMER_RESTART;
  	}
  
  	/* check for a softlockup
  	 * This is done by making sure a high priority task is
  	 * being scheduled.  The task touches the watchdog to
  	 * indicate it is getting cpu time.  If it hasn't then
  	 * this is a good indication some task is hogging the cpu
  	 */
26e09c6ee   Don Zickus   lockup_detector: ...
417
  	duration = is_softlockup(touch_ts);
58687acba   Don Zickus   lockup_detector: ...
418
  	if (unlikely(duration)) {
5d1c0f4a8   Eric B Munson   watchdog: add che...
419
420
421
422
423
424
425
  		/*
  		 * If a virtual machine is stopped by the host it can look to
  		 * the watchdog like a soft lockup, check to see if the host
  		 * stopped the vm before we issue the warning
  		 */
  		if (kvm_check_and_clear_guest_paused())
  			return HRTIMER_RESTART;
58687acba   Don Zickus   lockup_detector: ...
426
  		/* only warn once */
b1a8de1f5   chai wen   softlockup: make ...
427
428
429
430
431
432
433
434
435
436
437
438
439
440
  		if (__this_cpu_read(soft_watchdog_warn) == true) {
  			/*
  			 * When multiple processes are causing softlockups the
  			 * softlockup detector only warns on the first one
  			 * because the code relies on a full quiet cycle to
  			 * re-arm.  The second process prevents the quiet cycle
  			 * and never gets reported.  Use task pointers to detect
  			 * this.
  			 */
  			if (__this_cpu_read(softlockup_task_ptr_saved) !=
  			    current) {
  				__this_cpu_write(soft_watchdog_warn, false);
  				__touch_watchdog();
  			}
58687acba   Don Zickus   lockup_detector: ...
441
  			return HRTIMER_RESTART;
b1a8de1f5   chai wen   softlockup: make ...
442
  		}
58687acba   Don Zickus   lockup_detector: ...
443

ed235875e   Aaron Tomlin   kernel/watchdog.c...
444
445
446
447
448
449
450
451
452
453
  		if (softlockup_all_cpu_backtrace) {
  			/* Prevent multiple soft-lockup reports if one cpu is already
  			 * engaged in dumping cpu back traces
  			 */
  			if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
  				/* Someone else will report us. Let's give up */
  				__this_cpu_write(soft_watchdog_warn, true);
  				return HRTIMER_RESTART;
  			}
  		}
656c3b79f   Fabian Frederick   kernel/watchdog.c...
454
455
  		pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]
  ",
26e09c6ee   Don Zickus   lockup_detector: ...
456
  			smp_processor_id(), duration,
58687acba   Don Zickus   lockup_detector: ...
457
  			current->comm, task_pid_nr(current));
b1a8de1f5   chai wen   softlockup: make ...
458
  		__this_cpu_write(softlockup_task_ptr_saved, current);
58687acba   Don Zickus   lockup_detector: ...
459
460
461
462
463
464
  		print_modules();
  		print_irqtrace_events(current);
  		if (regs)
  			show_regs(regs);
  		else
  			dump_stack();
ed235875e   Aaron Tomlin   kernel/watchdog.c...
465
466
467
468
469
470
471
472
473
474
  		if (softlockup_all_cpu_backtrace) {
  			/* Avoid generating two back traces for current
  			 * given that one is already made above
  			 */
  			trigger_allbutself_cpu_backtrace();
  
  			clear_bit(0, &soft_lockup_nmi_warn);
  			/* Barrier to sync with other cpus */
  			smp_mb__after_atomic();
  		}
69361eef9   Josh Hunt   panic: add TAINT_...
475
  		add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
58687acba   Don Zickus   lockup_detector: ...
476
477
  		if (softlockup_panic)
  			panic("softlockup: hung tasks");
909ea9646   Christoph Lameter   core: Replace __g...
478
  		__this_cpu_write(soft_watchdog_warn, true);
58687acba   Don Zickus   lockup_detector: ...
479
  	} else
909ea9646   Christoph Lameter   core: Replace __g...
480
  		__this_cpu_write(soft_watchdog_warn, false);
58687acba   Don Zickus   lockup_detector: ...
481
482
483
  
  	return HRTIMER_RESTART;
  }
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
484
485
486
  static void watchdog_set_prio(unsigned int policy, unsigned int prio)
  {
  	struct sched_param param = { .sched_priority = prio };
58687acba   Don Zickus   lockup_detector: ...
487

bcd951cf1   Thomas Gleixner   watchdog: Use hot...
488
489
490
491
  	sched_setscheduler(current, policy, &param);
  }
  
  static void watchdog_enable(unsigned int cpu)
58687acba   Don Zickus   lockup_detector: ...
492
  {
f7f66b05a   Christoph Lameter   watchdog: Replace...
493
  	struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
58687acba   Don Zickus   lockup_detector: ...
494

3935e8950   Bjørn Mork   watchdog: Fix dis...
495
496
497
  	/* kick off the timer for the hardlockup detector */
  	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  	hrtimer->function = watchdog_timer_fn;
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
498
499
  	/* Enable the perf event */
  	watchdog_nmi_enable(cpu);
58687acba   Don Zickus   lockup_detector: ...
500

58687acba   Don Zickus   lockup_detector: ...
501
  	/* done here because hrtimer_start can only pin to smp_processor_id() */
0f34c4009   Chuansheng Liu   watchdog: store t...
502
  	hrtimer_start(hrtimer, ns_to_ktime(sample_period),
58687acba   Don Zickus   lockup_detector: ...
503
  		      HRTIMER_MODE_REL_PINNED);
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
504
505
506
507
  	/* initialize timestamp */
  	watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
  	__touch_watchdog();
  }
58687acba   Don Zickus   lockup_detector: ...
508

bcd951cf1   Thomas Gleixner   watchdog: Use hot...
509
510
  static void watchdog_disable(unsigned int cpu)
  {
f7f66b05a   Christoph Lameter   watchdog: Replace...
511
  	struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
58687acba   Don Zickus   lockup_detector: ...
512

bcd951cf1   Thomas Gleixner   watchdog: Use hot...
513
514
515
516
  	watchdog_set_prio(SCHED_NORMAL, 0);
  	hrtimer_cancel(hrtimer);
  	/* disable the perf event */
  	watchdog_nmi_disable(cpu);
58687acba   Don Zickus   lockup_detector: ...
517
  }
b8900bc02   Frederic Weisbecker   watchdog: Registe...
518
519
520
521
  static void watchdog_cleanup(unsigned int cpu, bool online)
  {
  	watchdog_disable(cpu);
  }
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
522
523
524
525
526
527
528
529
530
  static int watchdog_should_run(unsigned int cpu)
  {
  	return __this_cpu_read(hrtimer_interrupts) !=
  		__this_cpu_read(soft_lockup_hrtimer_cnt);
  }
  
  /*
   * The watchdog thread function - touches the timestamp.
   *
0f34c4009   Chuansheng Liu   watchdog: store t...
531
   * It only runs once every sample_period seconds (4 seconds by
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
532
533
534
535
536
537
538
539
540
   * default) to reset the softlockup timestamp. If this gets delayed
   * for more than 2*watchdog_thresh seconds then the debug-printout
   * triggers in watchdog_timer_fn().
   */
  static void watchdog(unsigned int cpu)
  {
  	__this_cpu_write(soft_lockup_hrtimer_cnt,
  			 __this_cpu_read(hrtimer_interrupts));
  	__touch_watchdog();
bcfba4f4b   Ulrich Obergfell   watchdog: impleme...
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
  
  	/*
  	 * watchdog_nmi_enable() clears the NMI_WATCHDOG_ENABLED bit in the
  	 * failure path. Check for failures that can occur asynchronously -
  	 * for example, when CPUs are on-lined - and shut down the hardware
  	 * perf event on each CPU accordingly.
  	 *
  	 * The only non-obvious place this bit can be cleared is through
  	 * watchdog_nmi_enable(), so a pr_info() is placed there.  Placing a
  	 * pr_info here would be too noisy as it would result in a message
  	 * every few seconds if the hardlockup was disabled but the softlockup
  	 * enabled.
  	 */
  	if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
  		watchdog_nmi_disable(cpu);
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
556
  }
58687acba   Don Zickus   lockup_detector: ...
557

942008b35   Colin Cross   hardlockup: detec...
558
  #ifdef CONFIG_HARDLOCKUP_DETECTOR_NMI
a70270468   Don Zickus   watchdog: Quiet d...
559
560
561
562
563
564
  /*
   * People like the simple clean cpu node info on boot.
   * Reduce the watchdog noise by only printing messages
   * that are different from what cpu0 displayed.
   */
  static unsigned long cpu0_err;
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
565
  static int watchdog_nmi_enable(unsigned int cpu)
58687acba   Don Zickus   lockup_detector: ...
566
567
568
  {
  	struct perf_event_attr *wd_attr;
  	struct perf_event *event = per_cpu(watchdog_ev, cpu);
195daf665   Ulrich Obergfell   watchdog: enable ...
569
570
571
  	/* nothing to do if the hard lockup detector is disabled */
  	if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
  		goto out;
6e7458a6f   Ulrich Obergfell   kernel/watchdog.c...
572

58687acba   Don Zickus   lockup_detector: ...
573
574
575
576
577
578
579
  	/* is it already setup and enabled? */
  	if (event && event->state > PERF_EVENT_STATE_OFF)
  		goto out;
  
  	/* it is setup but not enabled */
  	if (event != NULL)
  		goto out_enable;
58687acba   Don Zickus   lockup_detector: ...
580
  	wd_attr = &wd_hw_attr;
4eec42f39   Mandeep Singh Baines   watchdog: Change ...
581
  	wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
1880c4ae1   Cyrill Gorcunov   perf, x86: Add hw...
582
583
  
  	/* Try to register using hardware perf events */
4dc0da869   Avi Kivity   perf: Add context...
584
  	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
a70270468   Don Zickus   watchdog: Quiet d...
585
586
587
588
  
  	/* save cpu0 error for future comparision */
  	if (cpu == 0 && IS_ERR(event))
  		cpu0_err = PTR_ERR(event);
58687acba   Don Zickus   lockup_detector: ...
589
  	if (!IS_ERR(event)) {
a70270468   Don Zickus   watchdog: Quiet d...
590
591
592
593
  		/* only print for cpu0 or different than cpu0 */
  		if (cpu == 0 || cpu0_err)
  			pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.
  ");
58687acba   Don Zickus   lockup_detector: ...
594
595
  		goto out_save;
  	}
bcfba4f4b   Ulrich Obergfell   watchdog: impleme...
596
597
598
599
600
601
602
603
604
605
606
  	/*
  	 * Disable the hard lockup detector if _any_ CPU fails to set up
  	 * set up the hardware perf event. The watchdog() function checks
  	 * the NMI_WATCHDOG_ENABLED bit periodically.
  	 *
  	 * The barriers are for syncing up watchdog_enabled across all the
  	 * cpus, as clear_bit() does not use barriers.
  	 */
  	smp_mb__before_atomic();
  	clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
  	smp_mb__after_atomic();
a70270468   Don Zickus   watchdog: Quiet d...
607
608
609
  	/* skip displaying the same error again */
  	if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
  		return PTR_ERR(event);
5651f7f47   Don Zickus   watchdog, nmi: Lo...
610
611
612
  
  	/* vary the KERN level based on the returned errno */
  	if (PTR_ERR(event) == -EOPNOTSUPP)
4501980aa   Andrew Morton   kernel/watchdog.c...
613
614
  		pr_info("disabled (cpu%i): not supported (no LAPIC?)
  ", cpu);
5651f7f47   Don Zickus   watchdog, nmi: Lo...
615
  	else if (PTR_ERR(event) == -ENOENT)
656c3b79f   Fabian Frederick   kernel/watchdog.c...
616
617
  		pr_warn("disabled (cpu%i): hardware events not enabled
  ",
4501980aa   Andrew Morton   kernel/watchdog.c...
618
  			 cpu);
5651f7f47   Don Zickus   watchdog, nmi: Lo...
619
  	else
4501980aa   Andrew Morton   kernel/watchdog.c...
620
621
622
  		pr_err("disabled (cpu%i): unable to create perf event: %ld
  ",
  			cpu, PTR_ERR(event));
bcfba4f4b   Ulrich Obergfell   watchdog: impleme...
623
624
625
  
  	pr_info("Shutting down hard lockup detector on all cpus
  ");
eac243355   Akinobu Mita   lockup_detector: ...
626
  	return PTR_ERR(event);
58687acba   Don Zickus   lockup_detector: ...
627
628
629
630
631
632
633
634
635
  
  	/* success path */
  out_save:
  	per_cpu(watchdog_ev, cpu) = event;
  out_enable:
  	perf_event_enable(per_cpu(watchdog_ev, cpu));
  out:
  	return 0;
  }
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
636
  static void watchdog_nmi_disable(unsigned int cpu)
58687acba   Don Zickus   lockup_detector: ...
637
638
639
640
641
642
643
644
645
646
  {
  	struct perf_event *event = per_cpu(watchdog_ev, cpu);
  
  	if (event) {
  		perf_event_disable(event);
  		per_cpu(watchdog_ev, cpu) = NULL;
  
  		/* should be in cleanup, but blocks oprofile */
  		perf_event_release_kernel(event);
  	}
df5771495   Ulrich Obergfell   watchdog: Fix pri...
647
648
649
650
  	if (cpu == 0) {
  		/* watchdog_nmi_enable() expects this to be zero initially. */
  		cpu0_err = 0;
  	}
58687acba   Don Zickus   lockup_detector: ...
651
  }
b3738d293   Stephane Eranian   watchdog: Add wat...
652
653
654
655
  
  void watchdog_nmi_enable_all(void)
  {
  	int cpu;
ab992dc38   Peter Zijlstra   watchdog: Fix mer...
656
657
658
659
  	mutex_lock(&watchdog_proc_mutex);
  
  	if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
  		goto unlock;
b3738d293   Stephane Eranian   watchdog: Add wat...
660
661
662
663
664
  
  	get_online_cpus();
  	for_each_online_cpu(cpu)
  		watchdog_nmi_enable(cpu);
  	put_online_cpus();
ab992dc38   Peter Zijlstra   watchdog: Fix mer...
665
666
  
  unlock:
1173ff09b   Michal Hocko   watchdog: fix dou...
667
  	mutex_unlock(&watchdog_proc_mutex);
b3738d293   Stephane Eranian   watchdog: Add wat...
668
669
670
671
672
  }
  
  void watchdog_nmi_disable_all(void)
  {
  	int cpu;
ab992dc38   Peter Zijlstra   watchdog: Fix mer...
673
  	mutex_lock(&watchdog_proc_mutex);
b3738d293   Stephane Eranian   watchdog: Add wat...
674
  	if (!watchdog_running)
ab992dc38   Peter Zijlstra   watchdog: Fix mer...
675
  		goto unlock;
b3738d293   Stephane Eranian   watchdog: Add wat...
676
677
678
679
680
  
  	get_online_cpus();
  	for_each_online_cpu(cpu)
  		watchdog_nmi_disable(cpu);
  	put_online_cpus();
ab992dc38   Peter Zijlstra   watchdog: Fix mer...
681
682
683
  
  unlock:
  	mutex_unlock(&watchdog_proc_mutex);
b3738d293   Stephane Eranian   watchdog: Add wat...
684
  }
58687acba   Don Zickus   lockup_detector: ...
685
  #else
942008b35   Colin Cross   hardlockup: detec...
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
  #ifdef CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU
  static int watchdog_nmi_enable(unsigned int cpu)
  {
  	/*
  	 * The new cpu will be marked online before the first hrtimer interrupt
  	 * runs on it.  If another cpu tests for a hardlockup on the new cpu
  	 * before it has run its first hrtimer, it will get a false positive.
  	 * Touch the watchdog on the new cpu to delay the first check for at
  	 * least 3 sampling periods to guarantee one hrtimer has run on the new
  	 * cpu.
  	 */
  	per_cpu(watchdog_nmi_touch, cpu) = true;
  	smp_wmb();
  	cpumask_set_cpu(cpu, &watchdog_cpus);
  	return 0;
  }
  
  static void watchdog_nmi_disable(unsigned int cpu)
  {
  	unsigned int next_cpu = watchdog_next_cpu(cpu);
  
  	/*
  	 * Offlining this cpu will cause the cpu before this one to start
  	 * checking the one after this one.  If this cpu just finished checking
  	 * the next cpu and updating hrtimer_interrupts_saved, and then the
  	 * previous cpu checks it within one sample period, it will trigger a
  	 * false positive.  Touch the watchdog on the next cpu to prevent it.
  	 */
  	if (next_cpu < nr_cpu_ids)
  		per_cpu(watchdog_nmi_touch, next_cpu) = true;
  	smp_wmb();
  	cpumask_clear_cpu(cpu, &watchdog_cpus);
  }
  #else
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
720
721
  static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
  static void watchdog_nmi_disable(unsigned int cpu) { return; }
b3738d293   Stephane Eranian   watchdog: Add wat...
722
723
  void watchdog_nmi_enable_all(void) {}
  void watchdog_nmi_disable_all(void) {}
942008b35   Colin Cross   hardlockup: detec...
724
725
  #endif /* CONFIG_HARDLOCKUP_DETECTOR_OTHER_CPU */
  #endif /* CONFIG_HARDLOCKUP_DETECTOR_NMI */
58687acba   Don Zickus   lockup_detector: ...
726

b8900bc02   Frederic Weisbecker   watchdog: Registe...
727
728
729
730
731
732
733
734
735
736
  static struct smp_hotplug_thread watchdog_threads = {
  	.store			= &softlockup_watchdog,
  	.thread_should_run	= watchdog_should_run,
  	.thread_fn		= watchdog,
  	.thread_comm		= "watchdog/%u",
  	.setup			= watchdog_enable,
  	.cleanup		= watchdog_cleanup,
  	.park			= watchdog_disable,
  	.unpark			= watchdog_enable,
  };
9809b18fc   Michal Hocko   watchdog: update ...
737
738
  static void restart_watchdog_hrtimer(void *info)
  {
f7f66b05a   Christoph Lameter   watchdog: Replace...
739
  	struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
9809b18fc   Michal Hocko   watchdog: update ...
740
741
742
743
744
745
746
747
748
749
750
751
752
  	int ret;
  
  	/*
  	 * No need to cancel and restart hrtimer if it is currently executing
  	 * because it will reprogram itself with the new period now.
  	 * We should never see it unqueued here because we are running per-cpu
  	 * with interrupts disabled.
  	 */
  	ret = hrtimer_try_to_cancel(hrtimer);
  	if (ret == 1)
  		hrtimer_start(hrtimer, ns_to_ktime(sample_period),
  				HRTIMER_MODE_REL_PINNED);
  }
b2f57c3a0   Ulrich Obergfell   watchdog: clean u...
753
  static void update_watchdog(int cpu)
9809b18fc   Michal Hocko   watchdog: update ...
754
  {
9809b18fc   Michal Hocko   watchdog: update ...
755
756
757
758
759
760
761
762
763
  	/*
  	 * Make sure that perf event counter will adopt to a new
  	 * sampling period. Updating the sampling period directly would
  	 * be much nicer but we do not have an API for that now so
  	 * let's use a big hammer.
  	 * Hrtimer will adopt the new period on the next tick but this
  	 * might be late already so we have to restart the timer as well.
  	 */
  	watchdog_nmi_disable(cpu);
e0a23b062   Frederic Weisbecker   watchdog: Simplif...
764
  	smp_call_function_single(cpu, restart_watchdog_hrtimer, NULL, 1);
9809b18fc   Michal Hocko   watchdog: update ...
765
766
  	watchdog_nmi_enable(cpu);
  }
b2f57c3a0   Ulrich Obergfell   watchdog: clean u...
767
  static void update_watchdog_all_cpus(void)
9809b18fc   Michal Hocko   watchdog: update ...
768
769
770
771
  {
  	int cpu;
  
  	get_online_cpus();
9809b18fc   Michal Hocko   watchdog: update ...
772
  	for_each_online_cpu(cpu)
b2f57c3a0   Ulrich Obergfell   watchdog: clean u...
773
  		update_watchdog(cpu);
9809b18fc   Michal Hocko   watchdog: update ...
774
775
  	put_online_cpus();
  }
b2f57c3a0   Ulrich Obergfell   watchdog: clean u...
776
  static int watchdog_enable_all_cpus(void)
58687acba   Don Zickus   lockup_detector: ...
777
  {
b8900bc02   Frederic Weisbecker   watchdog: Registe...
778
  	int err = 0;
58687acba   Don Zickus   lockup_detector: ...
779

3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
780
  	if (!watchdog_running) {
b8900bc02   Frederic Weisbecker   watchdog: Registe...
781
782
783
784
785
  		err = smpboot_register_percpu_thread(&watchdog_threads);
  		if (err)
  			pr_err("Failed to create watchdog threads, disabled
  ");
  		else
3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
786
  			watchdog_running = 1;
b2f57c3a0   Ulrich Obergfell   watchdog: clean u...
787
788
789
790
791
792
  	} else {
  		/*
  		 * Enable/disable the lockup detectors or
  		 * change the sample period 'on the fly'.
  		 */
  		update_watchdog_all_cpus();
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
793
  	}
b8900bc02   Frederic Weisbecker   watchdog: Registe...
794
795
  
  	return err;
58687acba   Don Zickus   lockup_detector: ...
796
  }
b8900bc02   Frederic Weisbecker   watchdog: Registe...
797
798
799
  /* prepare/enable/disable routines */
  /* sysctl functions */
  #ifdef CONFIG_SYSCTL
58687acba   Don Zickus   lockup_detector: ...
800
801
  static void watchdog_disable_all_cpus(void)
  {
3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
802
803
  	if (watchdog_running) {
  		watchdog_running = 0;
b8900bc02   Frederic Weisbecker   watchdog: Registe...
804
  		smpboot_unregister_percpu_thread(&watchdog_threads);
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
805
  	}
58687acba   Don Zickus   lockup_detector: ...
806
  }
58687acba   Don Zickus   lockup_detector: ...
807
  /*
a0c9cbb93   Ulrich Obergfell   watchdog: introdu...
808
809
810
811
812
813
814
815
816
817
818
819
820
821
   * Update the run state of the lockup detectors.
   */
  static int proc_watchdog_update(void)
  {
  	int err = 0;
  
  	/*
  	 * Watchdog threads won't be started if they are already active.
  	 * The 'watchdog_running' variable in watchdog_*_all_cpus() takes
  	 * care of this. If those threads are already active, the sample
  	 * period will be updated and the lockup detectors will be enabled
  	 * or disabled 'on the fly'.
  	 */
  	if (watchdog_enabled && watchdog_thresh)
b2f57c3a0   Ulrich Obergfell   watchdog: clean u...
822
  		err = watchdog_enable_all_cpus();
a0c9cbb93   Ulrich Obergfell   watchdog: introdu...
823
824
825
826
827
828
829
830
  	else
  		watchdog_disable_all_cpus();
  
  	return err;
  
  }
  
  /*
ef246a216   Ulrich Obergfell   watchdog: introdu...
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
   * common function for watchdog, nmi_watchdog and soft_watchdog parameter
   *
   * caller             | table->data points to | 'which' contains the flag(s)
   * -------------------|-----------------------|-----------------------------
   * proc_watchdog      | watchdog_user_enabled | NMI_WATCHDOG_ENABLED or'ed
   *                    |                       | with SOFT_WATCHDOG_ENABLED
   * -------------------|-----------------------|-----------------------------
   * proc_nmi_watchdog  | nmi_watchdog_enabled  | NMI_WATCHDOG_ENABLED
   * -------------------|-----------------------|-----------------------------
   * proc_soft_watchdog | soft_watchdog_enabled | SOFT_WATCHDOG_ENABLED
   */
  static int proc_watchdog_common(int which, struct ctl_table *table, int write,
  				void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	int err, old, new;
  	int *watchdog_param = (int *)table->data;
  
  	mutex_lock(&watchdog_proc_mutex);
  
  	/*
  	 * If the parameter is being read return the state of the corresponding
  	 * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the
  	 * run state of the lockup detectors.
  	 */
  	if (!write) {
  		*watchdog_param = (watchdog_enabled & which) != 0;
  		err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  	} else {
  		err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  		if (err)
  			goto out;
  
  		/*
  		 * There is a race window between fetching the current value
  		 * from 'watchdog_enabled' and storing the new value. During
  		 * this race window, watchdog_nmi_enable() can sneak in and
  		 * clear the NMI_WATCHDOG_ENABLED bit in 'watchdog_enabled'.
  		 * The 'cmpxchg' detects this race and the loop retries.
  		 */
  		do {
  			old = watchdog_enabled;
  			/*
  			 * If the parameter value is not zero set the
  			 * corresponding bit(s), else clear it(them).
  			 */
  			if (*watchdog_param)
  				new = old | which;
  			else
  				new = old & ~which;
  		} while (cmpxchg(&watchdog_enabled, old, new) != old);
  
  		/*
  		 * Update the run state of the lockup detectors.
  		 * Restore 'watchdog_enabled' on failure.
  		 */
  		err = proc_watchdog_update();
  		if (err)
  			watchdog_enabled = old;
  	}
  out:
  	mutex_unlock(&watchdog_proc_mutex);
  	return err;
  }
  
  /*
83a80a390   Ulrich Obergfell   watchdog: introdu...
896
897
898
899
900
901
902
903
904
905
906
   * /proc/sys/kernel/watchdog
   */
  int proc_watchdog(struct ctl_table *table, int write,
  		  void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED,
  				    table, write, buffer, lenp, ppos);
  }
  
  /*
   * /proc/sys/kernel/nmi_watchdog
58687acba   Don Zickus   lockup_detector: ...
907
   */
83a80a390   Ulrich Obergfell   watchdog: introdu...
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
  int proc_nmi_watchdog(struct ctl_table *table, int write,
  		      void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	return proc_watchdog_common(NMI_WATCHDOG_ENABLED,
  				    table, write, buffer, lenp, ppos);
  }
  
  /*
   * /proc/sys/kernel/soft_watchdog
   */
  int proc_soft_watchdog(struct ctl_table *table, int write,
  			void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	return proc_watchdog_common(SOFT_WATCHDOG_ENABLED,
  				    table, write, buffer, lenp, ppos);
  }
58687acba   Don Zickus   lockup_detector: ...
924

83a80a390   Ulrich Obergfell   watchdog: introdu...
925
926
927
928
929
  /*
   * /proc/sys/kernel/watchdog_thresh
   */
  int proc_watchdog_thresh(struct ctl_table *table, int write,
  			 void __user *buffer, size_t *lenp, loff_t *ppos)
58687acba   Don Zickus   lockup_detector: ...
930
  {
83a80a390   Ulrich Obergfell   watchdog: introdu...
931
  	int err, old;
58687acba   Don Zickus   lockup_detector: ...
932

359e6fab6   Michal Hocko   watchdog: update ...
933
  	mutex_lock(&watchdog_proc_mutex);
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
934

83a80a390   Ulrich Obergfell   watchdog: introdu...
935
  	old = ACCESS_ONCE(watchdog_thresh);
b8900bc02   Frederic Weisbecker   watchdog: Registe...
936
  	err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
83a80a390   Ulrich Obergfell   watchdog: introdu...
937

b8900bc02   Frederic Weisbecker   watchdog: Registe...
938
  	if (err || !write)
359e6fab6   Michal Hocko   watchdog: update ...
939
  		goto out;
e04ab2bc4   Mandeep Singh Baines   watchdog: Only di...
940

b66a2356d   anish kumar   watchdog: Add com...
941
  	/*
83a80a390   Ulrich Obergfell   watchdog: introdu...
942
943
  	 * Update the sample period.
  	 * Restore 'watchdog_thresh' on failure.
b66a2356d   anish kumar   watchdog: Add com...
944
  	 */
83a80a390   Ulrich Obergfell   watchdog: introdu...
945
946
947
948
  	set_sample_period();
  	err = proc_watchdog_update();
  	if (err)
  		watchdog_thresh = old;
359e6fab6   Michal Hocko   watchdog: update ...
949
950
  out:
  	mutex_unlock(&watchdog_proc_mutex);
b8900bc02   Frederic Weisbecker   watchdog: Registe...
951
  	return err;
58687acba   Don Zickus   lockup_detector: ...
952
  }
58687acba   Don Zickus   lockup_detector: ...
953
  #endif /* CONFIG_SYSCTL */
004417a6d   Peter Zijlstra   perf, arch: Clean...
954
  void __init lockup_detector_init(void)
58687acba   Don Zickus   lockup_detector: ...
955
  {
0f34c4009   Chuansheng Liu   watchdog: store t...
956
  	set_sample_period();
b8900bc02   Frederic Weisbecker   watchdog: Registe...
957

195daf665   Ulrich Obergfell   watchdog: enable ...
958
  	if (watchdog_enabled)
b2f57c3a0   Ulrich Obergfell   watchdog: clean u...
959
  		watchdog_enable_all_cpus();
58687acba   Don Zickus   lockup_detector: ...
960
  }