Blame view

kernel/watchdog.c 29.6 KB
58687acba   Don Zickus   lockup_detector: ...
1
2
3
4
5
  /*
   * Detect hard and soft lockups on a system
   *
   * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
   *
86f5e6a7b   Fernando Luis Vázquez Cao   watchdog: Fix cod...
6
7
8
   * Note: Most of this code is borrowed heavily from the original softlockup
   * detector, so thanks to Ingo for the initial implementation.
   * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
58687acba   Don Zickus   lockup_detector: ...
9
10
   * to those contributors as well.
   */
4501980aa   Andrew Morton   kernel/watchdog.c...
11
  #define pr_fmt(fmt) "NMI watchdog: " fmt
58687acba   Don Zickus   lockup_detector: ...
12
13
14
15
  #include <linux/mm.h>
  #include <linux/cpu.h>
  #include <linux/nmi.h>
  #include <linux/init.h>
58687acba   Don Zickus   lockup_detector: ...
16
17
  #include <linux/module.h>
  #include <linux/sysctl.h>
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
18
  #include <linux/smpboot.h>
8bd75c77b   Clark Williams   sched/rt: Move rt...
19
  #include <linux/sched/rt.h>
fe4ba3c34   Chris Metcalf   watchdog: add wat...
20
  #include <linux/tick.h>
82607adcf   Tejun Heo   workqueue: implem...
21
  #include <linux/workqueue.h>
58687acba   Don Zickus   lockup_detector: ...
22
23
  
  #include <asm/irq_regs.h>
5d1c0f4a8   Eric B Munson   watchdog: add che...
24
  #include <linux/kvm_para.h>
58687acba   Don Zickus   lockup_detector: ...
25
  #include <linux/perf_event.h>
81a4beef9   Ulrich Obergfell   watchdog: introdu...
26
  #include <linux/kthread.h>
58687acba   Don Zickus   lockup_detector: ...
27

84d56e66b   Ulrich Obergfell   watchdog: new def...
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
  /*
   * The run state of the lockup detectors is controlled by the content of the
   * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
   * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
   *
   * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
   * are variables that are only used as an 'interface' between the parameters
   * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
   * 'watchdog_thresh' variable is handled differently because its value is not
   * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
   * is equal zero.
   */
  #define NMI_WATCHDOG_ENABLED_BIT   0
  #define SOFT_WATCHDOG_ENABLED_BIT  1
  #define NMI_WATCHDOG_ENABLED      (1 << NMI_WATCHDOG_ENABLED_BIT)
  #define SOFT_WATCHDOG_ENABLED     (1 << SOFT_WATCHDOG_ENABLED_BIT)
ab992dc38   Peter Zijlstra   watchdog: Fix mer...
44
  static DEFINE_MUTEX(watchdog_proc_mutex);
84d56e66b   Ulrich Obergfell   watchdog: new def...
45
46
47
48
49
50
51
52
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
  static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
  #else
  static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
  #endif
  int __read_mostly nmi_watchdog_enabled;
  int __read_mostly soft_watchdog_enabled;
  int __read_mostly watchdog_user_enabled;
4eec42f39   Mandeep Singh Baines   watchdog: Change ...
53
  int __read_mostly watchdog_thresh = 10;
84d56e66b   Ulrich Obergfell   watchdog: new def...
54

ed235875e   Aaron Tomlin   kernel/watchdog.c...
55
56
  #ifdef CONFIG_SMP
  int __read_mostly sysctl_softlockup_all_cpu_backtrace;
55537871e   Jiri Kosina   kernel/watchdog.c...
57
  int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
ed235875e   Aaron Tomlin   kernel/watchdog.c...
58
59
  #else
  #define sysctl_softlockup_all_cpu_backtrace 0
55537871e   Jiri Kosina   kernel/watchdog.c...
60
  #define sysctl_hardlockup_all_cpu_backtrace 0
ed235875e   Aaron Tomlin   kernel/watchdog.c...
61
  #endif
fe4ba3c34   Chris Metcalf   watchdog: add wat...
62
63
64
65
66
67
  static struct cpumask watchdog_cpumask __read_mostly;
  unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
  
  /* Helper for online, unparked cpus. */
  #define for_each_watchdog_cpu(cpu) \
  	for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
ed235875e   Aaron Tomlin   kernel/watchdog.c...
68

ec6a90661   Ulrich Obergfell   watchdog: rename ...
69
70
71
72
73
  /*
   * The 'watchdog_running' variable is set to 1 when the watchdog threads
   * are registered/started and is set to 0 when the watchdog threads are
   * unregistered/stopped, so it is an indicator whether the threads exist.
   */
3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
74
  static int __read_mostly watchdog_running;
ec6a90661   Ulrich Obergfell   watchdog: rename ...
75
76
77
78
79
80
81
82
83
84
85
86
87
  /*
   * If a subsystem has a need to deactivate the watchdog temporarily, it
   * can use the suspend/resume interface to achieve this. The content of
   * the 'watchdog_suspended' variable reflects this state. Existing threads
   * are parked/unparked by the lockup_detector_{suspend|resume} functions
   * (see comment blocks pertaining to those functions for further details).
   *
   * 'watchdog_suspended' also prevents threads from being registered/started
   * or unregistered/stopped via parameters in /proc/sys/kernel, so the state
   * of 'watchdog_running' cannot change while the watchdog is deactivated
   * temporarily (see related code in 'proc' handlers).
   */
  static int __read_mostly watchdog_suspended;
0f34c4009   Chuansheng Liu   watchdog: store t...
88
  static u64 __read_mostly sample_period;
58687acba   Don Zickus   lockup_detector: ...
89
90
91
92
93
  
  static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
  static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
  static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
  static DEFINE_PER_CPU(bool, softlockup_touch_sync);
58687acba   Don Zickus   lockup_detector: ...
94
  static DEFINE_PER_CPU(bool, soft_watchdog_warn);
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
95
96
  static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
  static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
b1a8de1f5   chai wen   softlockup: make ...
97
  static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
23637d477   Frederic Weisbecker   lockup_detector: ...
98
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
cafcd80d2   Don Zickus   lockup_detector: ...
99
100
  static DEFINE_PER_CPU(bool, hard_watchdog_warn);
  static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
58687acba   Don Zickus   lockup_detector: ...
101
102
103
  static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
  static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
  #endif
ed235875e   Aaron Tomlin   kernel/watchdog.c...
104
  static unsigned long soft_lockup_nmi_warn;
58687acba   Don Zickus   lockup_detector: ...
105

58687acba   Don Zickus   lockup_detector: ...
106
107
108
109
  /* boot commands */
  /*
   * Should we panic when a soft-lockup or hard-lockup occurs:
   */
23637d477   Frederic Weisbecker   lockup_detector: ...
110
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
ac1f59124   Don Zickus   kernel/watchdog.c...
111
  unsigned int __read_mostly hardlockup_panic =
fef2c9bc1   Don Zickus   kernel/watchdog.c...
112
  			CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
55537871e   Jiri Kosina   kernel/watchdog.c...
113
  static unsigned long hardlockup_allcpu_dumped;
6e7458a6f   Ulrich Obergfell   kernel/watchdog.c...
114
115
116
117
118
119
120
121
  /*
   * We may not want to enable hard lockup detection by default in all cases,
   * for example when running the kernel as a guest on a hypervisor. In these
   * cases this function can be called to disable hard lockup detection. This
   * function should only be executed once by the boot processor before the
   * kernel command line parameters are parsed, because otherwise it is not
   * possible to override this in hardlockup_panic_setup().
   */
692297d8f   Ulrich Obergfell   watchdog: introdu...
122
  void hardlockup_detector_disable(void)
6e7458a6f   Ulrich Obergfell   kernel/watchdog.c...
123
  {
692297d8f   Ulrich Obergfell   watchdog: introdu...
124
  	watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
6e7458a6f   Ulrich Obergfell   kernel/watchdog.c...
125
  }
58687acba   Don Zickus   lockup_detector: ...
126
127
128
129
  static int __init hardlockup_panic_setup(char *str)
  {
  	if (!strncmp(str, "panic", 5))
  		hardlockup_panic = 1;
fef2c9bc1   Don Zickus   kernel/watchdog.c...
130
131
  	else if (!strncmp(str, "nopanic", 7))
  		hardlockup_panic = 0;
5dc305587   Don Zickus   x86, NMI: Add bac...
132
  	else if (!strncmp(str, "0", 1))
195daf665   Ulrich Obergfell   watchdog: enable ...
133
134
135
  		watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
  	else if (!strncmp(str, "1", 1))
  		watchdog_enabled |= NMI_WATCHDOG_ENABLED;
58687acba   Don Zickus   lockup_detector: ...
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
  	return 1;
  }
  __setup("nmi_watchdog=", hardlockup_panic_setup);
  #endif
  
  unsigned int __read_mostly softlockup_panic =
  			CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
  
  static int __init softlockup_panic_setup(char *str)
  {
  	softlockup_panic = simple_strtoul(str, NULL, 0);
  
  	return 1;
  }
  __setup("softlockup_panic=", softlockup_panic_setup);
  
  static int __init nowatchdog_setup(char *str)
  {
195daf665   Ulrich Obergfell   watchdog: enable ...
154
  	watchdog_enabled = 0;
58687acba   Don Zickus   lockup_detector: ...
155
156
157
  	return 1;
  }
  __setup("nowatchdog", nowatchdog_setup);
58687acba   Don Zickus   lockup_detector: ...
158
159
  static int __init nosoftlockup_setup(char *str)
  {
195daf665   Ulrich Obergfell   watchdog: enable ...
160
  	watchdog_enabled &= ~SOFT_WATCHDOG_ENABLED;
58687acba   Don Zickus   lockup_detector: ...
161
162
163
  	return 1;
  }
  __setup("nosoftlockup", nosoftlockup_setup);
195daf665   Ulrich Obergfell   watchdog: enable ...
164

ed235875e   Aaron Tomlin   kernel/watchdog.c...
165
166
167
168
169
170
171
172
  #ifdef CONFIG_SMP
  static int __init softlockup_all_cpu_backtrace_setup(char *str)
  {
  	sysctl_softlockup_all_cpu_backtrace =
  		!!simple_strtol(str, NULL, 0);
  	return 1;
  }
  __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
55537871e   Jiri Kosina   kernel/watchdog.c...
173
174
175
176
177
178
179
  static int __init hardlockup_all_cpu_backtrace_setup(char *str)
  {
  	sysctl_hardlockup_all_cpu_backtrace =
  		!!simple_strtol(str, NULL, 0);
  	return 1;
  }
  __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
ed235875e   Aaron Tomlin   kernel/watchdog.c...
180
  #endif
58687acba   Don Zickus   lockup_detector: ...
181

4eec42f39   Mandeep Singh Baines   watchdog: Change ...
182
183
184
185
186
187
188
  /*
   * Hard-lockup warnings should be triggered after just a few seconds. Soft-
   * lockups can have false positives under extreme conditions. So we generally
   * want a higher threshold for soft lockups than for hard lockups. So we couple
   * the thresholds with a factor: we make the soft threshold twice the amount of
   * time the hard threshold is.
   */
6e9101aee   Ingo Molnar   watchdog: Fix non...
189
  static int get_softlockup_thresh(void)
4eec42f39   Mandeep Singh Baines   watchdog: Change ...
190
191
192
  {
  	return watchdog_thresh * 2;
  }
58687acba   Don Zickus   lockup_detector: ...
193
194
195
196
197
198
  
  /*
   * Returns seconds, approximately.  We don't need nanosecond
   * resolution, and we don't need to waste time with a big divide when
   * 2^30ns == 1.074s.
   */
c06b4f194   Namhyung Kim   watchdog: Use loc...
199
  static unsigned long get_timestamp(void)
58687acba   Don Zickus   lockup_detector: ...
200
  {
545a2bf74   Cyril Bur   kernel/sched/cloc...
201
  	return running_clock() >> 30LL;  /* 2^30 ~= 10^9 */
58687acba   Don Zickus   lockup_detector: ...
202
  }
0f34c4009   Chuansheng Liu   watchdog: store t...
203
  static void set_sample_period(void)
58687acba   Don Zickus   lockup_detector: ...
204
205
  {
  	/*
586692a5a   Mandeep Singh Baines   watchdog: Disable...
206
  	 * convert watchdog_thresh from seconds to ns
86f5e6a7b   Fernando Luis Vázquez Cao   watchdog: Fix cod...
207
208
209
210
  	 * the divide by 5 is to give hrtimer several chances (two
  	 * or three with the current relation between the soft
  	 * and hard thresholds) to increment before the
  	 * hardlockup detector generates a warning
58687acba   Don Zickus   lockup_detector: ...
211
  	 */
0f34c4009   Chuansheng Liu   watchdog: store t...
212
  	sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
58687acba   Don Zickus   lockup_detector: ...
213
214
215
216
217
  }
  
  /* Commands for resetting the watchdog */
  static void __touch_watchdog(void)
  {
c06b4f194   Namhyung Kim   watchdog: Use loc...
218
  	__this_cpu_write(watchdog_touch_ts, get_timestamp());
58687acba   Don Zickus   lockup_detector: ...
219
  }
03e0d4610   Tejun Heo   watchdog: introdu...
220
221
222
223
224
225
226
227
228
  /**
   * touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls
   *
   * Call when the scheduler may have stalled for legitimate reasons
   * preventing the watchdog task from executing - e.g. the scheduler
   * entering idle state.  This should only be used for scheduler events.
   * Use touch_softlockup_watchdog() for everything else.
   */
  void touch_softlockup_watchdog_sched(void)
58687acba   Don Zickus   lockup_detector: ...
229
  {
7861144b8   Andrew Morton   kernel/watchdog.c...
230
231
232
233
234
  	/*
  	 * Preemption can be enabled.  It doesn't matter which CPU's timestamp
  	 * gets zeroed here, so use the raw_ operation.
  	 */
  	raw_cpu_write(watchdog_touch_ts, 0);
58687acba   Don Zickus   lockup_detector: ...
235
  }
03e0d4610   Tejun Heo   watchdog: introdu...
236
237
238
239
  
  void touch_softlockup_watchdog(void)
  {
  	touch_softlockup_watchdog_sched();
82607adcf   Tejun Heo   workqueue: implem...
240
  	wq_watchdog_touch(raw_smp_processor_id());
03e0d4610   Tejun Heo   watchdog: introdu...
241
  }
0167c7819   Ingo Molnar   watchdog: Export ...
242
  EXPORT_SYMBOL(touch_softlockup_watchdog);
58687acba   Don Zickus   lockup_detector: ...
243

332fbdbca   Don Zickus   lockup_detector: ...
244
  void touch_all_softlockup_watchdogs(void)
58687acba   Don Zickus   lockup_detector: ...
245
246
247
248
249
250
251
252
  {
  	int cpu;
  
  	/*
  	 * this is done lockless
  	 * do we care if a 0 races with a timestamp?
  	 * all it means is the softlock check starts one cycle later
  	 */
fe4ba3c34   Chris Metcalf   watchdog: add wat...
253
  	for_each_watchdog_cpu(cpu)
58687acba   Don Zickus   lockup_detector: ...
254
  		per_cpu(watchdog_touch_ts, cpu) = 0;
82607adcf   Tejun Heo   workqueue: implem...
255
  	wq_watchdog_touch(-1);
58687acba   Don Zickus   lockup_detector: ...
256
  }
cafcd80d2   Don Zickus   lockup_detector: ...
257
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
58687acba   Don Zickus   lockup_detector: ...
258
259
  void touch_nmi_watchdog(void)
  {
62572e29b   Ben Zhang   kernel/watchdog.c...
260
261
262
263
264
265
266
  	/*
  	 * Using __raw here because some code paths have
  	 * preemption enabled.  If preemption is enabled
  	 * then interrupts should be enabled too, in which
  	 * case we shouldn't have to worry about the watchdog
  	 * going off.
  	 */
f7f66b05a   Christoph Lameter   watchdog: Replace...
267
  	raw_cpu_write(watchdog_nmi_touch, true);
332fbdbca   Don Zickus   lockup_detector: ...
268
  	touch_softlockup_watchdog();
58687acba   Don Zickus   lockup_detector: ...
269
270
  }
  EXPORT_SYMBOL(touch_nmi_watchdog);
cafcd80d2   Don Zickus   lockup_detector: ...
271
  #endif
58687acba   Don Zickus   lockup_detector: ...
272
273
  void touch_softlockup_watchdog_sync(void)
  {
f7f66b05a   Christoph Lameter   watchdog: Replace...
274
275
  	__this_cpu_write(softlockup_touch_sync, true);
  	__this_cpu_write(watchdog_touch_ts, 0);
58687acba   Don Zickus   lockup_detector: ...
276
  }
23637d477   Frederic Weisbecker   lockup_detector: ...
277
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
58687acba   Don Zickus   lockup_detector: ...
278
  /* watchdog detector functions */
451637e45   Yaowei Bai   kernel/watchdog.c...
279
  static bool is_hardlockup(void)
58687acba   Don Zickus   lockup_detector: ...
280
  {
909ea9646   Christoph Lameter   core: Replace __g...
281
  	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
58687acba   Don Zickus   lockup_detector: ...
282

909ea9646   Christoph Lameter   core: Replace __g...
283
  	if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
451637e45   Yaowei Bai   kernel/watchdog.c...
284
  		return true;
58687acba   Don Zickus   lockup_detector: ...
285

909ea9646   Christoph Lameter   core: Replace __g...
286
  	__this_cpu_write(hrtimer_interrupts_saved, hrint);
451637e45   Yaowei Bai   kernel/watchdog.c...
287
  	return false;
58687acba   Don Zickus   lockup_detector: ...
288
289
  }
  #endif
26e09c6ee   Don Zickus   lockup_detector: ...
290
  static int is_softlockup(unsigned long touch_ts)
58687acba   Don Zickus   lockup_detector: ...
291
  {
c06b4f194   Namhyung Kim   watchdog: Use loc...
292
  	unsigned long now = get_timestamp();
58687acba   Don Zickus   lockup_detector: ...
293

39d2da216   Ulrich Obergfell   kernel/watchdog.c...
294
  	if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){
195daf665   Ulrich Obergfell   watchdog: enable ...
295
296
297
298
  		/* Warn about unreasonable delays. */
  		if (time_after(now, touch_ts + get_softlockup_thresh()))
  			return now - touch_ts;
  	}
58687acba   Don Zickus   lockup_detector: ...
299
300
  	return 0;
  }
23637d477   Frederic Weisbecker   lockup_detector: ...
301
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
1880c4ae1   Cyrill Gorcunov   perf, x86: Add hw...
302

58687acba   Don Zickus   lockup_detector: ...
303
304
305
306
307
308
309
310
311
  static struct perf_event_attr wd_hw_attr = {
  	.type		= PERF_TYPE_HARDWARE,
  	.config		= PERF_COUNT_HW_CPU_CYCLES,
  	.size		= sizeof(struct perf_event_attr),
  	.pinned		= 1,
  	.disabled	= 1,
  };
  
  /* Callback function for perf event subsystem */
a8b0ca17b   Peter Zijlstra   perf: Remove the ...
312
  static void watchdog_overflow_callback(struct perf_event *event,
58687acba   Don Zickus   lockup_detector: ...
313
314
315
  		 struct perf_sample_data *data,
  		 struct pt_regs *regs)
  {
c6db67cda   Peter Zijlstra   watchdog: Don't t...
316
317
  	/* Ensure the watchdog never gets throttled */
  	event->hw.interrupts = 0;
909ea9646   Christoph Lameter   core: Replace __g...
318
319
  	if (__this_cpu_read(watchdog_nmi_touch) == true) {
  		__this_cpu_write(watchdog_nmi_touch, false);
58687acba   Don Zickus   lockup_detector: ...
320
321
322
323
324
325
326
327
328
  		return;
  	}
  
  	/* check for a hardlockup
  	 * This is done by making sure our timer interrupt
  	 * is incrementing.  The timer interrupt should have
  	 * fired multiple times before we overflow'd.  If it hasn't
  	 * then this is a good indication the cpu is stuck
  	 */
26e09c6ee   Don Zickus   lockup_detector: ...
329
330
  	if (is_hardlockup()) {
  		int this_cpu = smp_processor_id();
58687acba   Don Zickus   lockup_detector: ...
331
  		/* only print hardlockups once */
909ea9646   Christoph Lameter   core: Replace __g...
332
  		if (__this_cpu_read(hard_watchdog_warn) == true)
58687acba   Don Zickus   lockup_detector: ...
333
  			return;
55537871e   Jiri Kosina   kernel/watchdog.c...
334
335
336
337
338
  		pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
  		print_modules();
  		print_irqtrace_events(current);
  		if (regs)
  			show_regs(regs);
58687acba   Don Zickus   lockup_detector: ...
339
  		else
55537871e   Jiri Kosina   kernel/watchdog.c...
340
341
342
343
344
345
346
347
348
349
350
  			dump_stack();
  
  		/*
  		 * Perform all-CPU dump only once to avoid multiple hardlockups
  		 * generating interleaving traces
  		 */
  		if (sysctl_hardlockup_all_cpu_backtrace &&
  				!test_and_set_bit(0, &hardlockup_allcpu_dumped))
  			trigger_allbutself_cpu_backtrace();
  
  		if (hardlockup_panic)
58c5661f2   Hidehiro Kawai   panic, x86: Allow...
351
  			nmi_panic(regs, "Hard LOCKUP");
58687acba   Don Zickus   lockup_detector: ...
352

909ea9646   Christoph Lameter   core: Replace __g...
353
  		__this_cpu_write(hard_watchdog_warn, true);
58687acba   Don Zickus   lockup_detector: ...
354
355
  		return;
  	}
909ea9646   Christoph Lameter   core: Replace __g...
356
  	__this_cpu_write(hard_watchdog_warn, false);
58687acba   Don Zickus   lockup_detector: ...
357
358
  	return;
  }
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
359
  #endif /* CONFIG_HARDLOCKUP_DETECTOR */
58687acba   Don Zickus   lockup_detector: ...
360
361
  static void watchdog_interrupt_count(void)
  {
909ea9646   Christoph Lameter   core: Replace __g...
362
  	__this_cpu_inc(hrtimer_interrupts);
58687acba   Don Zickus   lockup_detector: ...
363
  }
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
364
365
366
  
  static int watchdog_nmi_enable(unsigned int cpu);
  static void watchdog_nmi_disable(unsigned int cpu);
58687acba   Don Zickus   lockup_detector: ...
367

58cf690a0   Ulrich Obergfell   watchdog: move wa...
368
369
  static int watchdog_enable_all_cpus(void);
  static void watchdog_disable_all_cpus(void);
58687acba   Don Zickus   lockup_detector: ...
370
371
372
  /* watchdog kicker functions */
  static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
  {
909ea9646   Christoph Lameter   core: Replace __g...
373
  	unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
58687acba   Don Zickus   lockup_detector: ...
374
375
  	struct pt_regs *regs = get_irq_regs();
  	int duration;
ed235875e   Aaron Tomlin   kernel/watchdog.c...
376
  	int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
58687acba   Don Zickus   lockup_detector: ...
377
378
379
380
381
  
  	/* kick the hardlockup detector */
  	watchdog_interrupt_count();
  
  	/* kick the softlockup detector */
909ea9646   Christoph Lameter   core: Replace __g...
382
  	wake_up_process(__this_cpu_read(softlockup_watchdog));
58687acba   Don Zickus   lockup_detector: ...
383
384
  
  	/* .. and repeat */
0f34c4009   Chuansheng Liu   watchdog: store t...
385
  	hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
58687acba   Don Zickus   lockup_detector: ...
386
387
  
  	if (touch_ts == 0) {
909ea9646   Christoph Lameter   core: Replace __g...
388
  		if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
58687acba   Don Zickus   lockup_detector: ...
389
390
391
392
  			/*
  			 * If the time stamp was touched atomically
  			 * make sure the scheduler tick is up to date.
  			 */
909ea9646   Christoph Lameter   core: Replace __g...
393
  			__this_cpu_write(softlockup_touch_sync, false);
58687acba   Don Zickus   lockup_detector: ...
394
395
  			sched_clock_tick();
  		}
5d1c0f4a8   Eric B Munson   watchdog: add che...
396
397
398
  
  		/* Clear the guest paused flag on watchdog reset */
  		kvm_check_and_clear_guest_paused();
58687acba   Don Zickus   lockup_detector: ...
399
400
401
402
403
404
405
406
407
408
  		__touch_watchdog();
  		return HRTIMER_RESTART;
  	}
  
  	/* check for a softlockup
  	 * This is done by making sure a high priority task is
  	 * being scheduled.  The task touches the watchdog to
  	 * indicate it is getting cpu time.  If it hasn't then
  	 * this is a good indication some task is hogging the cpu
  	 */
26e09c6ee   Don Zickus   lockup_detector: ...
409
  	duration = is_softlockup(touch_ts);
58687acba   Don Zickus   lockup_detector: ...
410
  	if (unlikely(duration)) {
5d1c0f4a8   Eric B Munson   watchdog: add che...
411
412
413
414
415
416
417
  		/*
  		 * If a virtual machine is stopped by the host it can look to
  		 * the watchdog like a soft lockup, check to see if the host
  		 * stopped the vm before we issue the warning
  		 */
  		if (kvm_check_and_clear_guest_paused())
  			return HRTIMER_RESTART;
58687acba   Don Zickus   lockup_detector: ...
418
  		/* only warn once */
b1a8de1f5   chai wen   softlockup: make ...
419
420
421
422
423
424
425
426
427
428
429
430
431
432
  		if (__this_cpu_read(soft_watchdog_warn) == true) {
  			/*
  			 * When multiple processes are causing softlockups the
  			 * softlockup detector only warns on the first one
  			 * because the code relies on a full quiet cycle to
  			 * re-arm.  The second process prevents the quiet cycle
  			 * and never gets reported.  Use task pointers to detect
  			 * this.
  			 */
  			if (__this_cpu_read(softlockup_task_ptr_saved) !=
  			    current) {
  				__this_cpu_write(soft_watchdog_warn, false);
  				__touch_watchdog();
  			}
58687acba   Don Zickus   lockup_detector: ...
433
  			return HRTIMER_RESTART;
b1a8de1f5   chai wen   softlockup: make ...
434
  		}
58687acba   Don Zickus   lockup_detector: ...
435

ed235875e   Aaron Tomlin   kernel/watchdog.c...
436
437
438
439
440
441
442
443
444
445
  		if (softlockup_all_cpu_backtrace) {
  			/* Prevent multiple soft-lockup reports if one cpu is already
  			 * engaged in dumping cpu back traces
  			 */
  			if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
  				/* Someone else will report us. Let's give up */
  				__this_cpu_write(soft_watchdog_warn, true);
  				return HRTIMER_RESTART;
  			}
  		}
656c3b79f   Fabian Frederick   kernel/watchdog.c...
446
447
  		pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]
  ",
26e09c6ee   Don Zickus   lockup_detector: ...
448
  			smp_processor_id(), duration,
58687acba   Don Zickus   lockup_detector: ...
449
  			current->comm, task_pid_nr(current));
b1a8de1f5   chai wen   softlockup: make ...
450
  		__this_cpu_write(softlockup_task_ptr_saved, current);
58687acba   Don Zickus   lockup_detector: ...
451
452
453
454
455
456
  		print_modules();
  		print_irqtrace_events(current);
  		if (regs)
  			show_regs(regs);
  		else
  			dump_stack();
ed235875e   Aaron Tomlin   kernel/watchdog.c...
457
458
459
460
461
462
463
464
465
466
  		if (softlockup_all_cpu_backtrace) {
  			/* Avoid generating two back traces for current
  			 * given that one is already made above
  			 */
  			trigger_allbutself_cpu_backtrace();
  
  			clear_bit(0, &soft_lockup_nmi_warn);
  			/* Barrier to sync with other cpus */
  			smp_mb__after_atomic();
  		}
69361eef9   Josh Hunt   panic: add TAINT_...
467
  		add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
58687acba   Don Zickus   lockup_detector: ...
468
469
  		if (softlockup_panic)
  			panic("softlockup: hung tasks");
909ea9646   Christoph Lameter   core: Replace __g...
470
  		__this_cpu_write(soft_watchdog_warn, true);
58687acba   Don Zickus   lockup_detector: ...
471
  	} else
909ea9646   Christoph Lameter   core: Replace __g...
472
  		__this_cpu_write(soft_watchdog_warn, false);
58687acba   Don Zickus   lockup_detector: ...
473
474
475
  
  	return HRTIMER_RESTART;
  }
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
476
477
478
  static void watchdog_set_prio(unsigned int policy, unsigned int prio)
  {
  	struct sched_param param = { .sched_priority = prio };
58687acba   Don Zickus   lockup_detector: ...
479

bcd951cf1   Thomas Gleixner   watchdog: Use hot...
480
481
482
483
  	sched_setscheduler(current, policy, &param);
  }
  
  static void watchdog_enable(unsigned int cpu)
58687acba   Don Zickus   lockup_detector: ...
484
  {
f7f66b05a   Christoph Lameter   watchdog: Replace...
485
  	struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
58687acba   Don Zickus   lockup_detector: ...
486

3935e8950   Bjørn Mork   watchdog: Fix dis...
487
488
489
  	/* kick off the timer for the hardlockup detector */
  	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  	hrtimer->function = watchdog_timer_fn;
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
490
491
  	/* Enable the perf event */
  	watchdog_nmi_enable(cpu);
58687acba   Don Zickus   lockup_detector: ...
492

58687acba   Don Zickus   lockup_detector: ...
493
  	/* done here because hrtimer_start can only pin to smp_processor_id() */
0f34c4009   Chuansheng Liu   watchdog: store t...
494
  	hrtimer_start(hrtimer, ns_to_ktime(sample_period),
58687acba   Don Zickus   lockup_detector: ...
495
  		      HRTIMER_MODE_REL_PINNED);
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
496
497
498
499
  	/* initialize timestamp */
  	watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
  	__touch_watchdog();
  }
58687acba   Don Zickus   lockup_detector: ...
500

bcd951cf1   Thomas Gleixner   watchdog: Use hot...
501
502
  static void watchdog_disable(unsigned int cpu)
  {
f7f66b05a   Christoph Lameter   watchdog: Replace...
503
  	struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
58687acba   Don Zickus   lockup_detector: ...
504

bcd951cf1   Thomas Gleixner   watchdog: Use hot...
505
506
507
508
  	watchdog_set_prio(SCHED_NORMAL, 0);
  	hrtimer_cancel(hrtimer);
  	/* disable the perf event */
  	watchdog_nmi_disable(cpu);
58687acba   Don Zickus   lockup_detector: ...
509
  }
b8900bc02   Frederic Weisbecker   watchdog: Registe...
510
511
512
513
  static void watchdog_cleanup(unsigned int cpu, bool online)
  {
  	watchdog_disable(cpu);
  }
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
514
515
516
517
518
519
520
521
522
  static int watchdog_should_run(unsigned int cpu)
  {
  	return __this_cpu_read(hrtimer_interrupts) !=
  		__this_cpu_read(soft_lockup_hrtimer_cnt);
  }
  
  /*
   * The watchdog thread function - touches the timestamp.
   *
0f34c4009   Chuansheng Liu   watchdog: store t...
523
   * It only runs once every sample_period seconds (4 seconds by
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
524
525
526
527
528
529
530
531
532
   * default) to reset the softlockup timestamp. If this gets delayed
   * for more than 2*watchdog_thresh seconds then the debug-printout
   * triggers in watchdog_timer_fn().
   */
  static void watchdog(unsigned int cpu)
  {
  	__this_cpu_write(soft_lockup_hrtimer_cnt,
  			 __this_cpu_read(hrtimer_interrupts));
  	__touch_watchdog();
bcfba4f4b   Ulrich Obergfell   watchdog: impleme...
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
  
  	/*
  	 * watchdog_nmi_enable() clears the NMI_WATCHDOG_ENABLED bit in the
  	 * failure path. Check for failures that can occur asynchronously -
  	 * for example, when CPUs are on-lined - and shut down the hardware
  	 * perf event on each CPU accordingly.
  	 *
  	 * The only non-obvious place this bit can be cleared is through
  	 * watchdog_nmi_enable(), so a pr_info() is placed there.  Placing a
  	 * pr_info here would be too noisy as it would result in a message
  	 * every few seconds if the hardlockup was disabled but the softlockup
  	 * enabled.
  	 */
  	if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
  		watchdog_nmi_disable(cpu);
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
548
  }
58687acba   Don Zickus   lockup_detector: ...
549

23637d477   Frederic Weisbecker   lockup_detector: ...
550
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
a70270468   Don Zickus   watchdog: Quiet d...
551
552
553
554
555
556
  /*
   * People like the simple clean cpu node info on boot.
   * Reduce the watchdog noise by only printing messages
   * that are different from what cpu0 displayed.
   */
  static unsigned long cpu0_err;
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
557
  static int watchdog_nmi_enable(unsigned int cpu)
58687acba   Don Zickus   lockup_detector: ...
558
559
560
  {
  	struct perf_event_attr *wd_attr;
  	struct perf_event *event = per_cpu(watchdog_ev, cpu);
195daf665   Ulrich Obergfell   watchdog: enable ...
561
562
563
  	/* nothing to do if the hard lockup detector is disabled */
  	if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
  		goto out;
6e7458a6f   Ulrich Obergfell   kernel/watchdog.c...
564

58687acba   Don Zickus   lockup_detector: ...
565
566
567
568
569
570
571
  	/* is it already setup and enabled? */
  	if (event && event->state > PERF_EVENT_STATE_OFF)
  		goto out;
  
  	/* it is setup but not enabled */
  	if (event != NULL)
  		goto out_enable;
58687acba   Don Zickus   lockup_detector: ...
572
  	wd_attr = &wd_hw_attr;
4eec42f39   Mandeep Singh Baines   watchdog: Change ...
573
  	wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
1880c4ae1   Cyrill Gorcunov   perf, x86: Add hw...
574
575
  
  	/* Try to register using hardware perf events */
4dc0da869   Avi Kivity   perf: Add context...
576
  	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
a70270468   Don Zickus   watchdog: Quiet d...
577
578
579
580
  
  	/* save cpu0 error for future comparision */
  	if (cpu == 0 && IS_ERR(event))
  		cpu0_err = PTR_ERR(event);
58687acba   Don Zickus   lockup_detector: ...
581
  	if (!IS_ERR(event)) {
a70270468   Don Zickus   watchdog: Quiet d...
582
583
584
585
  		/* only print for cpu0 or different than cpu0 */
  		if (cpu == 0 || cpu0_err)
  			pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.
  ");
58687acba   Don Zickus   lockup_detector: ...
586
587
  		goto out_save;
  	}
bcfba4f4b   Ulrich Obergfell   watchdog: impleme...
588
589
590
591
592
593
594
595
596
597
598
  	/*
  	 * Disable the hard lockup detector if _any_ CPU fails to set up
  	 * set up the hardware perf event. The watchdog() function checks
  	 * the NMI_WATCHDOG_ENABLED bit periodically.
  	 *
  	 * The barriers are for syncing up watchdog_enabled across all the
  	 * cpus, as clear_bit() does not use barriers.
  	 */
  	smp_mb__before_atomic();
  	clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
  	smp_mb__after_atomic();
a70270468   Don Zickus   watchdog: Quiet d...
599
600
601
  	/* skip displaying the same error again */
  	if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
  		return PTR_ERR(event);
5651f7f47   Don Zickus   watchdog, nmi: Lo...
602
603
604
  
  	/* vary the KERN level based on the returned errno */
  	if (PTR_ERR(event) == -EOPNOTSUPP)
4501980aa   Andrew Morton   kernel/watchdog.c...
605
606
  		pr_info("disabled (cpu%i): not supported (no LAPIC?)
  ", cpu);
5651f7f47   Don Zickus   watchdog, nmi: Lo...
607
  	else if (PTR_ERR(event) == -ENOENT)
656c3b79f   Fabian Frederick   kernel/watchdog.c...
608
609
  		pr_warn("disabled (cpu%i): hardware events not enabled
  ",
4501980aa   Andrew Morton   kernel/watchdog.c...
610
  			 cpu);
5651f7f47   Don Zickus   watchdog, nmi: Lo...
611
  	else
4501980aa   Andrew Morton   kernel/watchdog.c...
612
613
614
  		pr_err("disabled (cpu%i): unable to create perf event: %ld
  ",
  			cpu, PTR_ERR(event));
bcfba4f4b   Ulrich Obergfell   watchdog: impleme...
615
616
617
  
  	pr_info("Shutting down hard lockup detector on all cpus
  ");
eac243355   Akinobu Mita   lockup_detector: ...
618
  	return PTR_ERR(event);
58687acba   Don Zickus   lockup_detector: ...
619
620
621
622
623
624
625
626
627
  
  	/* success path */
  out_save:
  	per_cpu(watchdog_ev, cpu) = event;
  out_enable:
  	perf_event_enable(per_cpu(watchdog_ev, cpu));
  out:
  	return 0;
  }
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
628
  static void watchdog_nmi_disable(unsigned int cpu)
58687acba   Don Zickus   lockup_detector: ...
629
630
631
632
633
634
635
636
637
638
  {
  	struct perf_event *event = per_cpu(watchdog_ev, cpu);
  
  	if (event) {
  		perf_event_disable(event);
  		per_cpu(watchdog_ev, cpu) = NULL;
  
  		/* should be in cleanup, but blocks oprofile */
  		perf_event_release_kernel(event);
  	}
df5771495   Ulrich Obergfell   watchdog: Fix pri...
639
640
641
642
  	if (cpu == 0) {
  		/* watchdog_nmi_enable() expects this to be zero initially. */
  		cpu0_err = 0;
  	}
58687acba   Don Zickus   lockup_detector: ...
643
  }
b3738d293   Stephane Eranian   watchdog: Add wat...
644

58687acba   Don Zickus   lockup_detector: ...
645
  #else
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
646
647
  static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
  static void watchdog_nmi_disable(unsigned int cpu) { return; }
23637d477   Frederic Weisbecker   lockup_detector: ...
648
  #endif /* CONFIG_HARDLOCKUP_DETECTOR */
58687acba   Don Zickus   lockup_detector: ...
649

b8900bc02   Frederic Weisbecker   watchdog: Registe...
650
651
652
653
654
655
656
657
658
659
  static struct smp_hotplug_thread watchdog_threads = {
  	.store			= &softlockup_watchdog,
  	.thread_should_run	= watchdog_should_run,
  	.thread_fn		= watchdog,
  	.thread_comm		= "watchdog/%u",
  	.setup			= watchdog_enable,
  	.cleanup		= watchdog_cleanup,
  	.park			= watchdog_disable,
  	.unpark			= watchdog_enable,
  };
81a4beef9   Ulrich Obergfell   watchdog: introdu...
660
661
  /*
   * park all watchdog threads that are specified in 'watchdog_cpumask'
ee7fed540   Ulrich Obergfell   watchdog: do not ...
662
663
664
665
666
667
   *
   * This function returns an error if kthread_park() of a watchdog thread
   * fails. In this situation, the watchdog threads of some CPUs can already
   * be parked and the watchdog threads of other CPUs can still be runnable.
   * Callers are expected to handle this special condition as appropriate in
   * their context.
a2a45b85e   Ulrich Obergfell   kernel/watchdog.c...
668
669
670
   *
   * This function may only be called in a context that is protected against
   * races with CPU hotplug - for example, via get_online_cpus().
81a4beef9   Ulrich Obergfell   watchdog: introdu...
671
672
673
674
   */
  static int watchdog_park_threads(void)
  {
  	int cpu, ret = 0;
81a4beef9   Ulrich Obergfell   watchdog: introdu...
675
676
677
678
679
  	for_each_watchdog_cpu(cpu) {
  		ret = kthread_park(per_cpu(softlockup_watchdog, cpu));
  		if (ret)
  			break;
  	}
81a4beef9   Ulrich Obergfell   watchdog: introdu...
680
681
682
683
684
685
  
  	return ret;
  }
  
  /*
   * unpark all watchdog threads that are specified in 'watchdog_cpumask'
a2a45b85e   Ulrich Obergfell   kernel/watchdog.c...
686
687
688
   *
   * This function may only be called in a context that is protected against
   * races with CPU hotplug - for example, via get_online_cpus().
81a4beef9   Ulrich Obergfell   watchdog: introdu...
689
690
691
692
   */
  static void watchdog_unpark_threads(void)
  {
  	int cpu;
81a4beef9   Ulrich Obergfell   watchdog: introdu...
693
694
  	for_each_watchdog_cpu(cpu)
  		kthread_unpark(per_cpu(softlockup_watchdog, cpu));
81a4beef9   Ulrich Obergfell   watchdog: introdu...
695
  }
8c073d27d   Ulrich Obergfell   watchdog: introdu...
696
697
698
  /*
   * Suspend the hard and soft lockup detector by parking the watchdog threads.
   */
ec6a90661   Ulrich Obergfell   watchdog: rename ...
699
  int lockup_detector_suspend(void)
8c073d27d   Ulrich Obergfell   watchdog: introdu...
700
701
  {
  	int ret = 0;
ee89e71eb   Ulrich Obergfell   kernel/watchdog.c...
702
  	get_online_cpus();
8c073d27d   Ulrich Obergfell   watchdog: introdu...
703
704
705
706
707
708
  	mutex_lock(&watchdog_proc_mutex);
  	/*
  	 * Multiple suspend requests can be active in parallel (counted by
  	 * the 'watchdog_suspended' variable). If the watchdog threads are
  	 * running, the first caller takes care that they will be parked.
  	 * The state of 'watchdog_running' cannot change while a suspend
ec6a90661   Ulrich Obergfell   watchdog: rename ...
709
  	 * request is active (see related code in 'proc' handlers).
8c073d27d   Ulrich Obergfell   watchdog: introdu...
710
711
712
713
714
715
  	 */
  	if (watchdog_running && !watchdog_suspended)
  		ret = watchdog_park_threads();
  
  	if (ret == 0)
  		watchdog_suspended++;
c993590c6   Ulrich Obergfell   watchdog: impleme...
716
717
718
719
720
721
  	else {
  		watchdog_disable_all_cpus();
  		pr_err("Failed to suspend lockup detectors, disabled
  ");
  		watchdog_enabled = 0;
  	}
8c073d27d   Ulrich Obergfell   watchdog: introdu...
722
723
724
725
726
727
728
729
730
  
  	mutex_unlock(&watchdog_proc_mutex);
  
  	return ret;
  }
  
  /*
   * Resume the hard and soft lockup detector by unparking the watchdog threads.
   */
ec6a90661   Ulrich Obergfell   watchdog: rename ...
731
  void lockup_detector_resume(void)
8c073d27d   Ulrich Obergfell   watchdog: introdu...
732
733
734
735
736
737
738
739
740
741
742
743
  {
  	mutex_lock(&watchdog_proc_mutex);
  
  	watchdog_suspended--;
  	/*
  	 * The watchdog threads are unparked if they were previously running
  	 * and if there is no more active suspend request.
  	 */
  	if (watchdog_running && !watchdog_suspended)
  		watchdog_unpark_threads();
  
  	mutex_unlock(&watchdog_proc_mutex);
ee89e71eb   Ulrich Obergfell   kernel/watchdog.c...
744
  	put_online_cpus();
8c073d27d   Ulrich Obergfell   watchdog: introdu...
745
  }
b43cb43cb   Ulrich Obergfell   watchdog: impleme...
746
  static int update_watchdog_all_cpus(void)
9809b18fc   Michal Hocko   watchdog: update ...
747
  {
b43cb43cb   Ulrich Obergfell   watchdog: impleme...
748
749
750
751
752
  	int ret;
  
  	ret = watchdog_park_threads();
  	if (ret)
  		return ret;
d4bdd0b21   Ulrich Obergfell   watchdog: use par...
753
  	watchdog_unpark_threads();
b43cb43cb   Ulrich Obergfell   watchdog: impleme...
754
755
  
  	return 0;
9809b18fc   Michal Hocko   watchdog: update ...
756
  }
b2f57c3a0   Ulrich Obergfell   watchdog: clean u...
757
  static int watchdog_enable_all_cpus(void)
58687acba   Don Zickus   lockup_detector: ...
758
  {
b8900bc02   Frederic Weisbecker   watchdog: Registe...
759
  	int err = 0;
58687acba   Don Zickus   lockup_detector: ...
760

3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
761
  	if (!watchdog_running) {
230ec9390   Frederic Weisbecker   smpboot: allow pa...
762
763
  		err = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
  							     &watchdog_cpumask);
b8900bc02   Frederic Weisbecker   watchdog: Registe...
764
765
766
  		if (err)
  			pr_err("Failed to create watchdog threads, disabled
  ");
230ec9390   Frederic Weisbecker   smpboot: allow pa...
767
  		else
3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
768
  			watchdog_running = 1;
b2f57c3a0   Ulrich Obergfell   watchdog: clean u...
769
770
771
772
773
  	} else {
  		/*
  		 * Enable/disable the lockup detectors or
  		 * change the sample period 'on the fly'.
  		 */
b43cb43cb   Ulrich Obergfell   watchdog: impleme...
774
775
776
777
778
779
780
  		err = update_watchdog_all_cpus();
  
  		if (err) {
  			watchdog_disable_all_cpus();
  			pr_err("Failed to update lockup detectors, disabled
  ");
  		}
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
781
  	}
b8900bc02   Frederic Weisbecker   watchdog: Registe...
782

b43cb43cb   Ulrich Obergfell   watchdog: impleme...
783
784
  	if (err)
  		watchdog_enabled = 0;
b8900bc02   Frederic Weisbecker   watchdog: Registe...
785
  	return err;
58687acba   Don Zickus   lockup_detector: ...
786
787
788
789
  }
  
  static void watchdog_disable_all_cpus(void)
  {
3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
790
791
  	if (watchdog_running) {
  		watchdog_running = 0;
b8900bc02   Frederic Weisbecker   watchdog: Registe...
792
  		smpboot_unregister_percpu_thread(&watchdog_threads);
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
793
  	}
58687acba   Don Zickus   lockup_detector: ...
794
  }
58cf690a0   Ulrich Obergfell   watchdog: move wa...
795
  #ifdef CONFIG_SYSCTL
58687acba   Don Zickus   lockup_detector: ...
796
  /*
a0c9cbb93   Ulrich Obergfell   watchdog: introdu...
797
798
799
800
801
802
803
804
805
806
807
808
809
810
   * Update the run state of the lockup detectors.
   */
  static int proc_watchdog_update(void)
  {
  	int err = 0;
  
  	/*
  	 * Watchdog threads won't be started if they are already active.
  	 * The 'watchdog_running' variable in watchdog_*_all_cpus() takes
  	 * care of this. If those threads are already active, the sample
  	 * period will be updated and the lockup detectors will be enabled
  	 * or disabled 'on the fly'.
  	 */
  	if (watchdog_enabled && watchdog_thresh)
b2f57c3a0   Ulrich Obergfell   watchdog: clean u...
811
  		err = watchdog_enable_all_cpus();
a0c9cbb93   Ulrich Obergfell   watchdog: introdu...
812
813
814
815
816
817
818
819
  	else
  		watchdog_disable_all_cpus();
  
  	return err;
  
  }
  
  /*
ef246a216   Ulrich Obergfell   watchdog: introdu...
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
   * common function for watchdog, nmi_watchdog and soft_watchdog parameter
   *
   * caller             | table->data points to | 'which' contains the flag(s)
   * -------------------|-----------------------|-----------------------------
   * proc_watchdog      | watchdog_user_enabled | NMI_WATCHDOG_ENABLED or'ed
   *                    |                       | with SOFT_WATCHDOG_ENABLED
   * -------------------|-----------------------|-----------------------------
   * proc_nmi_watchdog  | nmi_watchdog_enabled  | NMI_WATCHDOG_ENABLED
   * -------------------|-----------------------|-----------------------------
   * proc_soft_watchdog | soft_watchdog_enabled | SOFT_WATCHDOG_ENABLED
   */
  static int proc_watchdog_common(int which, struct ctl_table *table, int write,
  				void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	int err, old, new;
  	int *watchdog_param = (int *)table->data;
8614ddef8   Ulrich Obergfell   kernel/watchdog.c...
836
  	get_online_cpus();
ef246a216   Ulrich Obergfell   watchdog: introdu...
837
  	mutex_lock(&watchdog_proc_mutex);
8c073d27d   Ulrich Obergfell   watchdog: introdu...
838
839
840
841
842
  	if (watchdog_suspended) {
  		/* no parameter changes allowed while watchdog is suspended */
  		err = -EAGAIN;
  		goto out;
  	}
ef246a216   Ulrich Obergfell   watchdog: introdu...
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
  	/*
  	 * If the parameter is being read return the state of the corresponding
  	 * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the
  	 * run state of the lockup detectors.
  	 */
  	if (!write) {
  		*watchdog_param = (watchdog_enabled & which) != 0;
  		err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  	} else {
  		err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  		if (err)
  			goto out;
  
  		/*
  		 * There is a race window between fetching the current value
  		 * from 'watchdog_enabled' and storing the new value. During
  		 * this race window, watchdog_nmi_enable() can sneak in and
  		 * clear the NMI_WATCHDOG_ENABLED bit in 'watchdog_enabled'.
  		 * The 'cmpxchg' detects this race and the loop retries.
  		 */
  		do {
  			old = watchdog_enabled;
  			/*
  			 * If the parameter value is not zero set the
  			 * corresponding bit(s), else clear it(them).
  			 */
  			if (*watchdog_param)
  				new = old | which;
  			else
  				new = old & ~which;
  		} while (cmpxchg(&watchdog_enabled, old, new) != old);
  
  		/*
b43cb43cb   Ulrich Obergfell   watchdog: impleme...
876
877
878
879
880
  		 * Update the run state of the lockup detectors. There is _no_
  		 * need to check the value returned by proc_watchdog_update()
  		 * and to restore the previous value of 'watchdog_enabled' as
  		 * both lockup detectors are disabled if proc_watchdog_update()
  		 * returns an error.
ef246a216   Ulrich Obergfell   watchdog: introdu...
881
  		 */
a1ee1932a   Joshua Hunt   watchdog: don't r...
882
883
  		if (old == new)
  			goto out;
ef246a216   Ulrich Obergfell   watchdog: introdu...
884
  		err = proc_watchdog_update();
ef246a216   Ulrich Obergfell   watchdog: introdu...
885
886
887
  	}
  out:
  	mutex_unlock(&watchdog_proc_mutex);
8614ddef8   Ulrich Obergfell   kernel/watchdog.c...
888
  	put_online_cpus();
ef246a216   Ulrich Obergfell   watchdog: introdu...
889
890
891
892
  	return err;
  }
  
  /*
83a80a390   Ulrich Obergfell   watchdog: introdu...
893
894
895
896
897
898
899
900
901
902
903
   * /proc/sys/kernel/watchdog
   */
  int proc_watchdog(struct ctl_table *table, int write,
  		  void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED,
  				    table, write, buffer, lenp, ppos);
  }
  
  /*
   * /proc/sys/kernel/nmi_watchdog
58687acba   Don Zickus   lockup_detector: ...
904
   */
83a80a390   Ulrich Obergfell   watchdog: introdu...
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
  int proc_nmi_watchdog(struct ctl_table *table, int write,
  		      void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	return proc_watchdog_common(NMI_WATCHDOG_ENABLED,
  				    table, write, buffer, lenp, ppos);
  }
  
  /*
   * /proc/sys/kernel/soft_watchdog
   */
  int proc_soft_watchdog(struct ctl_table *table, int write,
  			void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	return proc_watchdog_common(SOFT_WATCHDOG_ENABLED,
  				    table, write, buffer, lenp, ppos);
  }
58687acba   Don Zickus   lockup_detector: ...
921

83a80a390   Ulrich Obergfell   watchdog: introdu...
922
923
924
925
926
  /*
   * /proc/sys/kernel/watchdog_thresh
   */
  int proc_watchdog_thresh(struct ctl_table *table, int write,
  			 void __user *buffer, size_t *lenp, loff_t *ppos)
58687acba   Don Zickus   lockup_detector: ...
927
  {
a1ee1932a   Joshua Hunt   watchdog: don't r...
928
  	int err, old, new;
58687acba   Don Zickus   lockup_detector: ...
929

8614ddef8   Ulrich Obergfell   kernel/watchdog.c...
930
  	get_online_cpus();
359e6fab6   Michal Hocko   watchdog: update ...
931
  	mutex_lock(&watchdog_proc_mutex);
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
932

8c073d27d   Ulrich Obergfell   watchdog: introdu...
933
934
935
936
937
  	if (watchdog_suspended) {
  		/* no parameter changes allowed while watchdog is suspended */
  		err = -EAGAIN;
  		goto out;
  	}
83a80a390   Ulrich Obergfell   watchdog: introdu...
938
  	old = ACCESS_ONCE(watchdog_thresh);
b8900bc02   Frederic Weisbecker   watchdog: Registe...
939
  	err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
83a80a390   Ulrich Obergfell   watchdog: introdu...
940

b8900bc02   Frederic Weisbecker   watchdog: Registe...
941
  	if (err || !write)
359e6fab6   Michal Hocko   watchdog: update ...
942
  		goto out;
e04ab2bc4   Mandeep Singh Baines   watchdog: Only di...
943

b66a2356d   anish kumar   watchdog: Add com...
944
  	/*
d283c640c   Ulrich Obergfell   watchdog: fix err...
945
  	 * Update the sample period. Restore on failure.
b66a2356d   anish kumar   watchdog: Add com...
946
  	 */
a1ee1932a   Joshua Hunt   watchdog: don't r...
947
948
949
  	new = ACCESS_ONCE(watchdog_thresh);
  	if (old == new)
  		goto out;
83a80a390   Ulrich Obergfell   watchdog: introdu...
950
951
  	set_sample_period();
  	err = proc_watchdog_update();
d283c640c   Ulrich Obergfell   watchdog: fix err...
952
  	if (err) {
83a80a390   Ulrich Obergfell   watchdog: introdu...
953
  		watchdog_thresh = old;
d283c640c   Ulrich Obergfell   watchdog: fix err...
954
955
  		set_sample_period();
  	}
359e6fab6   Michal Hocko   watchdog: update ...
956
957
  out:
  	mutex_unlock(&watchdog_proc_mutex);
8614ddef8   Ulrich Obergfell   kernel/watchdog.c...
958
  	put_online_cpus();
b8900bc02   Frederic Weisbecker   watchdog: Registe...
959
  	return err;
58687acba   Don Zickus   lockup_detector: ...
960
  }
fe4ba3c34   Chris Metcalf   watchdog: add wat...
961
962
963
964
965
966
967
968
969
970
971
  
  /*
   * The cpumask is the mask of possible cpus that the watchdog can run
   * on, not the mask of cpus it is actually running on.  This allows the
   * user to specify a mask that will include cpus that have not yet
   * been brought online, if desired.
   */
  int proc_watchdog_cpumask(struct ctl_table *table, int write,
  			  void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	int err;
8614ddef8   Ulrich Obergfell   kernel/watchdog.c...
972
  	get_online_cpus();
fe4ba3c34   Chris Metcalf   watchdog: add wat...
973
  	mutex_lock(&watchdog_proc_mutex);
8c073d27d   Ulrich Obergfell   watchdog: introdu...
974
975
976
977
978
979
  
  	if (watchdog_suspended) {
  		/* no parameter changes allowed while watchdog is suspended */
  		err = -EAGAIN;
  		goto out;
  	}
fe4ba3c34   Chris Metcalf   watchdog: add wat...
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
  	err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
  	if (!err && write) {
  		/* Remove impossible cpus to keep sysctl output cleaner. */
  		cpumask_and(&watchdog_cpumask, &watchdog_cpumask,
  			    cpu_possible_mask);
  
  		if (watchdog_running) {
  			/*
  			 * Failure would be due to being unable to allocate
  			 * a temporary cpumask, so we are likely not in a
  			 * position to do much else to make things better.
  			 */
  			if (smpboot_update_cpumask_percpu_thread(
  				    &watchdog_threads, &watchdog_cpumask) != 0)
  				pr_err("cpumask update failed
  ");
  		}
  	}
8c073d27d   Ulrich Obergfell   watchdog: introdu...
998
  out:
fe4ba3c34   Chris Metcalf   watchdog: add wat...
999
  	mutex_unlock(&watchdog_proc_mutex);
8614ddef8   Ulrich Obergfell   kernel/watchdog.c...
1000
  	put_online_cpus();
fe4ba3c34   Chris Metcalf   watchdog: add wat...
1001
1002
  	return err;
  }
58687acba   Don Zickus   lockup_detector: ...
1003
  #endif /* CONFIG_SYSCTL */
004417a6d   Peter Zijlstra   perf, arch: Clean...
1004
  void __init lockup_detector_init(void)
58687acba   Don Zickus   lockup_detector: ...
1005
  {
0f34c4009   Chuansheng Liu   watchdog: store t...
1006
  	set_sample_period();
b8900bc02   Frederic Weisbecker   watchdog: Registe...
1007

fe4ba3c34   Chris Metcalf   watchdog: add wat...
1008
1009
  #ifdef CONFIG_NO_HZ_FULL
  	if (tick_nohz_full_enabled()) {
314b08ff5   Frederic Weisbecker   watchdog: simplif...
1010
1011
1012
  		pr_info("Disabling watchdog on nohz_full cores by default
  ");
  		cpumask_copy(&watchdog_cpumask, housekeeping_mask);
fe4ba3c34   Chris Metcalf   watchdog: add wat...
1013
1014
1015
1016
1017
  	} else
  		cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
  #else
  	cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
  #endif
195daf665   Ulrich Obergfell   watchdog: enable ...
1018
  	if (watchdog_enabled)
b2f57c3a0   Ulrich Obergfell   watchdog: clean u...
1019
  		watchdog_enable_all_cpus();
58687acba   Don Zickus   lockup_detector: ...
1020
  }