Blame view

kernel/watchdog.c 29.6 KB
58687acba   Don Zickus   lockup_detector: ...
1
2
3
4
5
  /*
   * Detect hard and soft lockups on a system
   *
   * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
   *
86f5e6a7b   Fernando Luis Vázquez Cao   watchdog: Fix cod...
6
7
8
   * Note: Most of this code is borrowed heavily from the original softlockup
   * detector, so thanks to Ingo for the initial implementation.
   * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
58687acba   Don Zickus   lockup_detector: ...
9
10
   * to those contributors as well.
   */
4501980aa   Andrew Morton   kernel/watchdog.c...
11
  #define pr_fmt(fmt) "NMI watchdog: " fmt
58687acba   Don Zickus   lockup_detector: ...
12
13
14
15
  #include <linux/mm.h>
  #include <linux/cpu.h>
  #include <linux/nmi.h>
  #include <linux/init.h>
58687acba   Don Zickus   lockup_detector: ...
16
17
  #include <linux/module.h>
  #include <linux/sysctl.h>
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
18
  #include <linux/smpboot.h>
8bd75c77b   Clark Williams   sched/rt: Move rt...
19
  #include <linux/sched/rt.h>
fe4ba3c34   Chris Metcalf   watchdog: add wat...
20
  #include <linux/tick.h>
82607adcf   Tejun Heo   workqueue: implem...
21
  #include <linux/workqueue.h>
58687acba   Don Zickus   lockup_detector: ...
22
23
  
  #include <asm/irq_regs.h>
5d1c0f4a8   Eric B Munson   watchdog: add che...
24
  #include <linux/kvm_para.h>
58687acba   Don Zickus   lockup_detector: ...
25
  #include <linux/perf_event.h>
81a4beef9   Ulrich Obergfell   watchdog: introdu...
26
  #include <linux/kthread.h>
58687acba   Don Zickus   lockup_detector: ...
27

84d56e66b   Ulrich Obergfell   watchdog: new def...
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
  /*
   * The run state of the lockup detectors is controlled by the content of the
   * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
   * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
   *
   * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
   * are variables that are only used as an 'interface' between the parameters
   * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
   * 'watchdog_thresh' variable is handled differently because its value is not
   * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
   * is equal zero.
   */
  #define NMI_WATCHDOG_ENABLED_BIT   0
  #define SOFT_WATCHDOG_ENABLED_BIT  1
  #define NMI_WATCHDOG_ENABLED      (1 << NMI_WATCHDOG_ENABLED_BIT)
  #define SOFT_WATCHDOG_ENABLED     (1 << SOFT_WATCHDOG_ENABLED_BIT)
ab992dc38   Peter Zijlstra   watchdog: Fix mer...
44
  static DEFINE_MUTEX(watchdog_proc_mutex);
84d56e66b   Ulrich Obergfell   watchdog: new def...
45
46
47
48
49
50
51
52
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
  static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
  #else
  static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
  #endif
  int __read_mostly nmi_watchdog_enabled;
  int __read_mostly soft_watchdog_enabled;
  int __read_mostly watchdog_user_enabled;
4eec42f39   Mandeep Singh Baines   watchdog: Change ...
53
  int __read_mostly watchdog_thresh = 10;
84d56e66b   Ulrich Obergfell   watchdog: new def...
54

ed235875e   Aaron Tomlin   kernel/watchdog.c...
55
56
  #ifdef CONFIG_SMP
  int __read_mostly sysctl_softlockup_all_cpu_backtrace;
55537871e   Jiri Kosina   kernel/watchdog.c...
57
  int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
ed235875e   Aaron Tomlin   kernel/watchdog.c...
58
59
  #else
  #define sysctl_softlockup_all_cpu_backtrace 0
55537871e   Jiri Kosina   kernel/watchdog.c...
60
  #define sysctl_hardlockup_all_cpu_backtrace 0
ed235875e   Aaron Tomlin   kernel/watchdog.c...
61
  #endif
fe4ba3c34   Chris Metcalf   watchdog: add wat...
62
63
64
65
66
67
  static struct cpumask watchdog_cpumask __read_mostly;
  unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
  
  /* Helper for online, unparked cpus. */
  #define for_each_watchdog_cpu(cpu) \
  	for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
ed235875e   Aaron Tomlin   kernel/watchdog.c...
68

ec6a90661   Ulrich Obergfell   watchdog: rename ...
69
70
71
72
73
  /*
   * The 'watchdog_running' variable is set to 1 when the watchdog threads
   * are registered/started and is set to 0 when the watchdog threads are
   * unregistered/stopped, so it is an indicator whether the threads exist.
   */
3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
74
  static int __read_mostly watchdog_running;
ec6a90661   Ulrich Obergfell   watchdog: rename ...
75
76
77
78
79
80
81
82
83
84
85
86
87
  /*
   * If a subsystem has a need to deactivate the watchdog temporarily, it
   * can use the suspend/resume interface to achieve this. The content of
   * the 'watchdog_suspended' variable reflects this state. Existing threads
   * are parked/unparked by the lockup_detector_{suspend|resume} functions
   * (see comment blocks pertaining to those functions for further details).
   *
   * 'watchdog_suspended' also prevents threads from being registered/started
   * or unregistered/stopped via parameters in /proc/sys/kernel, so the state
   * of 'watchdog_running' cannot change while the watchdog is deactivated
   * temporarily (see related code in 'proc' handlers).
   */
  static int __read_mostly watchdog_suspended;
0f34c4009   Chuansheng Liu   watchdog: store t...
88
  static u64 __read_mostly sample_period;
58687acba   Don Zickus   lockup_detector: ...
89
90
91
92
93
  
  static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
  static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
  static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
  static DEFINE_PER_CPU(bool, softlockup_touch_sync);
58687acba   Don Zickus   lockup_detector: ...
94
  static DEFINE_PER_CPU(bool, soft_watchdog_warn);
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
95
96
  static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
  static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
b1a8de1f5   chai wen   softlockup: make ...
97
  static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
23637d477   Frederic Weisbecker   lockup_detector: ...
98
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
cafcd80d2   Don Zickus   lockup_detector: ...
99
100
  static DEFINE_PER_CPU(bool, hard_watchdog_warn);
  static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
58687acba   Don Zickus   lockup_detector: ...
101
102
103
  static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
  static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
  #endif
ed235875e   Aaron Tomlin   kernel/watchdog.c...
104
  static unsigned long soft_lockup_nmi_warn;
58687acba   Don Zickus   lockup_detector: ...
105

58687acba   Don Zickus   lockup_detector: ...
106
107
108
109
  /* boot commands */
  /*
   * Should we panic when a soft-lockup or hard-lockup occurs:
   */
23637d477   Frederic Weisbecker   lockup_detector: ...
110
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
ac1f59124   Don Zickus   kernel/watchdog.c...
111
  unsigned int __read_mostly hardlockup_panic =
fef2c9bc1   Don Zickus   kernel/watchdog.c...
112
  			CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
55537871e   Jiri Kosina   kernel/watchdog.c...
113
  static unsigned long hardlockup_allcpu_dumped;
6e7458a6f   Ulrich Obergfell   kernel/watchdog.c...
114
115
116
117
118
119
120
121
  /*
   * We may not want to enable hard lockup detection by default in all cases,
   * for example when running the kernel as a guest on a hypervisor. In these
   * cases this function can be called to disable hard lockup detection. This
   * function should only be executed once by the boot processor before the
   * kernel command line parameters are parsed, because otherwise it is not
   * possible to override this in hardlockup_panic_setup().
   */
692297d8f   Ulrich Obergfell   watchdog: introdu...
122
  void hardlockup_detector_disable(void)
6e7458a6f   Ulrich Obergfell   kernel/watchdog.c...
123
  {
692297d8f   Ulrich Obergfell   watchdog: introdu...
124
  	watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
6e7458a6f   Ulrich Obergfell   kernel/watchdog.c...
125
  }
58687acba   Don Zickus   lockup_detector: ...
126
127
128
129
  static int __init hardlockup_panic_setup(char *str)
  {
  	if (!strncmp(str, "panic", 5))
  		hardlockup_panic = 1;
fef2c9bc1   Don Zickus   kernel/watchdog.c...
130
131
  	else if (!strncmp(str, "nopanic", 7))
  		hardlockup_panic = 0;
5dc305587   Don Zickus   x86, NMI: Add bac...
132
  	else if (!strncmp(str, "0", 1))
195daf665   Ulrich Obergfell   watchdog: enable ...
133
134
135
  		watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
  	else if (!strncmp(str, "1", 1))
  		watchdog_enabled |= NMI_WATCHDOG_ENABLED;
58687acba   Don Zickus   lockup_detector: ...
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
  	return 1;
  }
  __setup("nmi_watchdog=", hardlockup_panic_setup);
  #endif
  
  unsigned int __read_mostly softlockup_panic =
  			CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
  
  static int __init softlockup_panic_setup(char *str)
  {
  	softlockup_panic = simple_strtoul(str, NULL, 0);
  
  	return 1;
  }
  __setup("softlockup_panic=", softlockup_panic_setup);
  
  static int __init nowatchdog_setup(char *str)
  {
195daf665   Ulrich Obergfell   watchdog: enable ...
154
  	watchdog_enabled = 0;
58687acba   Don Zickus   lockup_detector: ...
155
156
157
  	return 1;
  }
  __setup("nowatchdog", nowatchdog_setup);
58687acba   Don Zickus   lockup_detector: ...
158
159
  static int __init nosoftlockup_setup(char *str)
  {
195daf665   Ulrich Obergfell   watchdog: enable ...
160
  	watchdog_enabled &= ~SOFT_WATCHDOG_ENABLED;
58687acba   Don Zickus   lockup_detector: ...
161
162
163
  	return 1;
  }
  __setup("nosoftlockup", nosoftlockup_setup);
195daf665   Ulrich Obergfell   watchdog: enable ...
164

ed235875e   Aaron Tomlin   kernel/watchdog.c...
165
166
167
168
169
170
171
172
  #ifdef CONFIG_SMP
  static int __init softlockup_all_cpu_backtrace_setup(char *str)
  {
  	sysctl_softlockup_all_cpu_backtrace =
  		!!simple_strtol(str, NULL, 0);
  	return 1;
  }
  __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
55537871e   Jiri Kosina   kernel/watchdog.c...
173
174
175
176
177
178
179
  static int __init hardlockup_all_cpu_backtrace_setup(char *str)
  {
  	sysctl_hardlockup_all_cpu_backtrace =
  		!!simple_strtol(str, NULL, 0);
  	return 1;
  }
  __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
ed235875e   Aaron Tomlin   kernel/watchdog.c...
180
  #endif
58687acba   Don Zickus   lockup_detector: ...
181

4eec42f39   Mandeep Singh Baines   watchdog: Change ...
182
183
184
185
186
187
188
  /*
   * Hard-lockup warnings should be triggered after just a few seconds. Soft-
   * lockups can have false positives under extreme conditions. So we generally
   * want a higher threshold for soft lockups than for hard lockups. So we couple
   * the thresholds with a factor: we make the soft threshold twice the amount of
   * time the hard threshold is.
   */
6e9101aee   Ingo Molnar   watchdog: Fix non...
189
  static int get_softlockup_thresh(void)
4eec42f39   Mandeep Singh Baines   watchdog: Change ...
190
191
192
  {
  	return watchdog_thresh * 2;
  }
58687acba   Don Zickus   lockup_detector: ...
193
194
195
196
197
198
  
  /*
   * Returns seconds, approximately.  We don't need nanosecond
   * resolution, and we don't need to waste time with a big divide when
   * 2^30ns == 1.074s.
   */
c06b4f194   Namhyung Kim   watchdog: Use loc...
199
  static unsigned long get_timestamp(void)
58687acba   Don Zickus   lockup_detector: ...
200
  {
545a2bf74   Cyril Bur   kernel/sched/cloc...
201
  	return running_clock() >> 30LL;  /* 2^30 ~= 10^9 */
58687acba   Don Zickus   lockup_detector: ...
202
  }
0f34c4009   Chuansheng Liu   watchdog: store t...
203
  static void set_sample_period(void)
58687acba   Don Zickus   lockup_detector: ...
204
205
  {
  	/*
586692a5a   Mandeep Singh Baines   watchdog: Disable...
206
  	 * convert watchdog_thresh from seconds to ns
86f5e6a7b   Fernando Luis Vázquez Cao   watchdog: Fix cod...
207
208
209
210
  	 * the divide by 5 is to give hrtimer several chances (two
  	 * or three with the current relation between the soft
  	 * and hard thresholds) to increment before the
  	 * hardlockup detector generates a warning
58687acba   Don Zickus   lockup_detector: ...
211
  	 */
0f34c4009   Chuansheng Liu   watchdog: store t...
212
  	sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
58687acba   Don Zickus   lockup_detector: ...
213
214
215
216
217
  }
  
  /* Commands for resetting the watchdog */
  static void __touch_watchdog(void)
  {
c06b4f194   Namhyung Kim   watchdog: Use loc...
218
  	__this_cpu_write(watchdog_touch_ts, get_timestamp());
58687acba   Don Zickus   lockup_detector: ...
219
  }
03e0d4610   Tejun Heo   watchdog: introdu...
220
221
222
223
224
225
226
227
228
  /**
   * touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls
   *
   * Call when the scheduler may have stalled for legitimate reasons
   * preventing the watchdog task from executing - e.g. the scheduler
   * entering idle state.  This should only be used for scheduler events.
   * Use touch_softlockup_watchdog() for everything else.
   */
  void touch_softlockup_watchdog_sched(void)
58687acba   Don Zickus   lockup_detector: ...
229
  {
7861144b8   Andrew Morton   kernel/watchdog.c...
230
231
232
233
234
  	/*
  	 * Preemption can be enabled.  It doesn't matter which CPU's timestamp
  	 * gets zeroed here, so use the raw_ operation.
  	 */
  	raw_cpu_write(watchdog_touch_ts, 0);
58687acba   Don Zickus   lockup_detector: ...
235
  }
03e0d4610   Tejun Heo   watchdog: introdu...
236
237
238
239
  
  void touch_softlockup_watchdog(void)
  {
  	touch_softlockup_watchdog_sched();
82607adcf   Tejun Heo   workqueue: implem...
240
  	wq_watchdog_touch(raw_smp_processor_id());
03e0d4610   Tejun Heo   watchdog: introdu...
241
  }
0167c7819   Ingo Molnar   watchdog: Export ...
242
  EXPORT_SYMBOL(touch_softlockup_watchdog);
58687acba   Don Zickus   lockup_detector: ...
243

332fbdbca   Don Zickus   lockup_detector: ...
244
  void touch_all_softlockup_watchdogs(void)
58687acba   Don Zickus   lockup_detector: ...
245
246
247
248
249
250
251
252
  {
  	int cpu;
  
  	/*
  	 * this is done lockless
  	 * do we care if a 0 races with a timestamp?
  	 * all it means is the softlock check starts one cycle later
  	 */
fe4ba3c34   Chris Metcalf   watchdog: add wat...
253
  	for_each_watchdog_cpu(cpu)
58687acba   Don Zickus   lockup_detector: ...
254
  		per_cpu(watchdog_touch_ts, cpu) = 0;
82607adcf   Tejun Heo   workqueue: implem...
255
  	wq_watchdog_touch(-1);
58687acba   Don Zickus   lockup_detector: ...
256
  }
cafcd80d2   Don Zickus   lockup_detector: ...
257
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
58687acba   Don Zickus   lockup_detector: ...
258
259
  void touch_nmi_watchdog(void)
  {
62572e29b   Ben Zhang   kernel/watchdog.c...
260
261
262
263
264
265
266
  	/*
  	 * Using __raw here because some code paths have
  	 * preemption enabled.  If preemption is enabled
  	 * then interrupts should be enabled too, in which
  	 * case we shouldn't have to worry about the watchdog
  	 * going off.
  	 */
f7f66b05a   Christoph Lameter   watchdog: Replace...
267
  	raw_cpu_write(watchdog_nmi_touch, true);
332fbdbca   Don Zickus   lockup_detector: ...
268
  	touch_softlockup_watchdog();
58687acba   Don Zickus   lockup_detector: ...
269
270
  }
  EXPORT_SYMBOL(touch_nmi_watchdog);
cafcd80d2   Don Zickus   lockup_detector: ...
271
  #endif
58687acba   Don Zickus   lockup_detector: ...
272
273
  void touch_softlockup_watchdog_sync(void)
  {
f7f66b05a   Christoph Lameter   watchdog: Replace...
274
275
  	__this_cpu_write(softlockup_touch_sync, true);
  	__this_cpu_write(watchdog_touch_ts, 0);
58687acba   Don Zickus   lockup_detector: ...
276
  }
23637d477   Frederic Weisbecker   lockup_detector: ...
277
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
58687acba   Don Zickus   lockup_detector: ...
278
  /* watchdog detector functions */
451637e45   Yaowei Bai   kernel/watchdog.c...
279
  static bool is_hardlockup(void)
58687acba   Don Zickus   lockup_detector: ...
280
  {
909ea9646   Christoph Lameter   core: Replace __g...
281
  	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
58687acba   Don Zickus   lockup_detector: ...
282

909ea9646   Christoph Lameter   core: Replace __g...
283
  	if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
451637e45   Yaowei Bai   kernel/watchdog.c...
284
  		return true;
58687acba   Don Zickus   lockup_detector: ...
285

909ea9646   Christoph Lameter   core: Replace __g...
286
  	__this_cpu_write(hrtimer_interrupts_saved, hrint);
451637e45   Yaowei Bai   kernel/watchdog.c...
287
  	return false;
58687acba   Don Zickus   lockup_detector: ...
288
289
  }
  #endif
26e09c6ee   Don Zickus   lockup_detector: ...
290
  static int is_softlockup(unsigned long touch_ts)
58687acba   Don Zickus   lockup_detector: ...
291
  {
c06b4f194   Namhyung Kim   watchdog: Use loc...
292
  	unsigned long now = get_timestamp();
58687acba   Don Zickus   lockup_detector: ...
293

39d2da216   Ulrich Obergfell   kernel/watchdog.c...
294
  	if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){
195daf665   Ulrich Obergfell   watchdog: enable ...
295
296
297
298
  		/* Warn about unreasonable delays. */
  		if (time_after(now, touch_ts + get_softlockup_thresh()))
  			return now - touch_ts;
  	}
58687acba   Don Zickus   lockup_detector: ...
299
300
  	return 0;
  }
23637d477   Frederic Weisbecker   lockup_detector: ...
301
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
1880c4ae1   Cyrill Gorcunov   perf, x86: Add hw...
302

58687acba   Don Zickus   lockup_detector: ...
303
304
305
306
307
308
309
310
311
  static struct perf_event_attr wd_hw_attr = {
  	.type		= PERF_TYPE_HARDWARE,
  	.config		= PERF_COUNT_HW_CPU_CYCLES,
  	.size		= sizeof(struct perf_event_attr),
  	.pinned		= 1,
  	.disabled	= 1,
  };
  
  /* Callback function for perf event subsystem */
a8b0ca17b   Peter Zijlstra   perf: Remove the ...
312
  static void watchdog_overflow_callback(struct perf_event *event,
58687acba   Don Zickus   lockup_detector: ...
313
314
315
  		 struct perf_sample_data *data,
  		 struct pt_regs *regs)
  {
c6db67cda   Peter Zijlstra   watchdog: Don't t...
316
317
  	/* Ensure the watchdog never gets throttled */
  	event->hw.interrupts = 0;
909ea9646   Christoph Lameter   core: Replace __g...
318
319
  	if (__this_cpu_read(watchdog_nmi_touch) == true) {
  		__this_cpu_write(watchdog_nmi_touch, false);
58687acba   Don Zickus   lockup_detector: ...
320
321
322
323
324
325
326
327
328
  		return;
  	}
  
  	/* check for a hardlockup
  	 * This is done by making sure our timer interrupt
  	 * is incrementing.  The timer interrupt should have
  	 * fired multiple times before we overflow'd.  If it hasn't
  	 * then this is a good indication the cpu is stuck
  	 */
26e09c6ee   Don Zickus   lockup_detector: ...
329
330
  	if (is_hardlockup()) {
  		int this_cpu = smp_processor_id();
55537871e   Jiri Kosina   kernel/watchdog.c...
331
  		struct pt_regs *regs = get_irq_regs();
26e09c6ee   Don Zickus   lockup_detector: ...
332

58687acba   Don Zickus   lockup_detector: ...
333
  		/* only print hardlockups once */
909ea9646   Christoph Lameter   core: Replace __g...
334
  		if (__this_cpu_read(hard_watchdog_warn) == true)
58687acba   Don Zickus   lockup_detector: ...
335
  			return;
55537871e   Jiri Kosina   kernel/watchdog.c...
336
337
338
339
340
  		pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
  		print_modules();
  		print_irqtrace_events(current);
  		if (regs)
  			show_regs(regs);
58687acba   Don Zickus   lockup_detector: ...
341
  		else
55537871e   Jiri Kosina   kernel/watchdog.c...
342
343
344
345
346
347
348
349
350
351
352
  			dump_stack();
  
  		/*
  		 * Perform all-CPU dump only once to avoid multiple hardlockups
  		 * generating interleaving traces
  		 */
  		if (sysctl_hardlockup_all_cpu_backtrace &&
  				!test_and_set_bit(0, &hardlockup_allcpu_dumped))
  			trigger_allbutself_cpu_backtrace();
  
  		if (hardlockup_panic)
58c5661f2   Hidehiro Kawai   panic, x86: Allow...
353
  			nmi_panic(regs, "Hard LOCKUP");
58687acba   Don Zickus   lockup_detector: ...
354

909ea9646   Christoph Lameter   core: Replace __g...
355
  		__this_cpu_write(hard_watchdog_warn, true);
58687acba   Don Zickus   lockup_detector: ...
356
357
  		return;
  	}
909ea9646   Christoph Lameter   core: Replace __g...
358
  	__this_cpu_write(hard_watchdog_warn, false);
58687acba   Don Zickus   lockup_detector: ...
359
360
  	return;
  }
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
361
  #endif /* CONFIG_HARDLOCKUP_DETECTOR */
58687acba   Don Zickus   lockup_detector: ...
362
363
  static void watchdog_interrupt_count(void)
  {
909ea9646   Christoph Lameter   core: Replace __g...
364
  	__this_cpu_inc(hrtimer_interrupts);
58687acba   Don Zickus   lockup_detector: ...
365
  }
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
366
367
368
  
  static int watchdog_nmi_enable(unsigned int cpu);
  static void watchdog_nmi_disable(unsigned int cpu);
58687acba   Don Zickus   lockup_detector: ...
369

58cf690a0   Ulrich Obergfell   watchdog: move wa...
370
371
  static int watchdog_enable_all_cpus(void);
  static void watchdog_disable_all_cpus(void);
58687acba   Don Zickus   lockup_detector: ...
372
373
374
  /* watchdog kicker functions */
  static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
  {
909ea9646   Christoph Lameter   core: Replace __g...
375
  	unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
58687acba   Don Zickus   lockup_detector: ...
376
377
  	struct pt_regs *regs = get_irq_regs();
  	int duration;
ed235875e   Aaron Tomlin   kernel/watchdog.c...
378
  	int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
58687acba   Don Zickus   lockup_detector: ...
379
380
381
382
383
  
  	/* kick the hardlockup detector */
  	watchdog_interrupt_count();
  
  	/* kick the softlockup detector */
909ea9646   Christoph Lameter   core: Replace __g...
384
  	wake_up_process(__this_cpu_read(softlockup_watchdog));
58687acba   Don Zickus   lockup_detector: ...
385
386
  
  	/* .. and repeat */
0f34c4009   Chuansheng Liu   watchdog: store t...
387
  	hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
58687acba   Don Zickus   lockup_detector: ...
388
389
  
  	if (touch_ts == 0) {
909ea9646   Christoph Lameter   core: Replace __g...
390
  		if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
58687acba   Don Zickus   lockup_detector: ...
391
392
393
394
  			/*
  			 * If the time stamp was touched atomically
  			 * make sure the scheduler tick is up to date.
  			 */
909ea9646   Christoph Lameter   core: Replace __g...
395
  			__this_cpu_write(softlockup_touch_sync, false);
58687acba   Don Zickus   lockup_detector: ...
396
397
  			sched_clock_tick();
  		}
5d1c0f4a8   Eric B Munson   watchdog: add che...
398
399
400
  
  		/* Clear the guest paused flag on watchdog reset */
  		kvm_check_and_clear_guest_paused();
58687acba   Don Zickus   lockup_detector: ...
401
402
403
404
405
406
407
408
409
410
  		__touch_watchdog();
  		return HRTIMER_RESTART;
  	}
  
  	/* check for a softlockup
  	 * This is done by making sure a high priority task is
  	 * being scheduled.  The task touches the watchdog to
  	 * indicate it is getting cpu time.  If it hasn't then
  	 * this is a good indication some task is hogging the cpu
  	 */
26e09c6ee   Don Zickus   lockup_detector: ...
411
  	duration = is_softlockup(touch_ts);
58687acba   Don Zickus   lockup_detector: ...
412
  	if (unlikely(duration)) {
5d1c0f4a8   Eric B Munson   watchdog: add che...
413
414
415
416
417
418
419
  		/*
  		 * If a virtual machine is stopped by the host it can look to
  		 * the watchdog like a soft lockup, check to see if the host
  		 * stopped the vm before we issue the warning
  		 */
  		if (kvm_check_and_clear_guest_paused())
  			return HRTIMER_RESTART;
58687acba   Don Zickus   lockup_detector: ...
420
  		/* only warn once */
b1a8de1f5   chai wen   softlockup: make ...
421
422
423
424
425
426
427
428
429
430
431
432
433
434
  		if (__this_cpu_read(soft_watchdog_warn) == true) {
  			/*
  			 * When multiple processes are causing softlockups the
  			 * softlockup detector only warns on the first one
  			 * because the code relies on a full quiet cycle to
  			 * re-arm.  The second process prevents the quiet cycle
  			 * and never gets reported.  Use task pointers to detect
  			 * this.
  			 */
  			if (__this_cpu_read(softlockup_task_ptr_saved) !=
  			    current) {
  				__this_cpu_write(soft_watchdog_warn, false);
  				__touch_watchdog();
  			}
58687acba   Don Zickus   lockup_detector: ...
435
  			return HRTIMER_RESTART;
b1a8de1f5   chai wen   softlockup: make ...
436
  		}
58687acba   Don Zickus   lockup_detector: ...
437

ed235875e   Aaron Tomlin   kernel/watchdog.c...
438
439
440
441
442
443
444
445
446
447
  		if (softlockup_all_cpu_backtrace) {
  			/* Prevent multiple soft-lockup reports if one cpu is already
  			 * engaged in dumping cpu back traces
  			 */
  			if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
  				/* Someone else will report us. Let's give up */
  				__this_cpu_write(soft_watchdog_warn, true);
  				return HRTIMER_RESTART;
  			}
  		}
656c3b79f   Fabian Frederick   kernel/watchdog.c...
448
449
  		pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]
  ",
26e09c6ee   Don Zickus   lockup_detector: ...
450
  			smp_processor_id(), duration,
58687acba   Don Zickus   lockup_detector: ...
451
  			current->comm, task_pid_nr(current));
b1a8de1f5   chai wen   softlockup: make ...
452
  		__this_cpu_write(softlockup_task_ptr_saved, current);
58687acba   Don Zickus   lockup_detector: ...
453
454
455
456
457
458
  		print_modules();
  		print_irqtrace_events(current);
  		if (regs)
  			show_regs(regs);
  		else
  			dump_stack();
ed235875e   Aaron Tomlin   kernel/watchdog.c...
459
460
461
462
463
464
465
466
467
468
  		if (softlockup_all_cpu_backtrace) {
  			/* Avoid generating two back traces for current
  			 * given that one is already made above
  			 */
  			trigger_allbutself_cpu_backtrace();
  
  			clear_bit(0, &soft_lockup_nmi_warn);
  			/* Barrier to sync with other cpus */
  			smp_mb__after_atomic();
  		}
69361eef9   Josh Hunt   panic: add TAINT_...
469
  		add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
58687acba   Don Zickus   lockup_detector: ...
470
471
  		if (softlockup_panic)
  			panic("softlockup: hung tasks");
909ea9646   Christoph Lameter   core: Replace __g...
472
  		__this_cpu_write(soft_watchdog_warn, true);
58687acba   Don Zickus   lockup_detector: ...
473
  	} else
909ea9646   Christoph Lameter   core: Replace __g...
474
  		__this_cpu_write(soft_watchdog_warn, false);
58687acba   Don Zickus   lockup_detector: ...
475
476
477
  
  	return HRTIMER_RESTART;
  }
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
478
479
480
  static void watchdog_set_prio(unsigned int policy, unsigned int prio)
  {
  	struct sched_param param = { .sched_priority = prio };
58687acba   Don Zickus   lockup_detector: ...
481

bcd951cf1   Thomas Gleixner   watchdog: Use hot...
482
483
484
485
  	sched_setscheduler(current, policy, &param);
  }
  
  static void watchdog_enable(unsigned int cpu)
58687acba   Don Zickus   lockup_detector: ...
486
  {
f7f66b05a   Christoph Lameter   watchdog: Replace...
487
  	struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
58687acba   Don Zickus   lockup_detector: ...
488

3935e8950   Bjørn Mork   watchdog: Fix dis...
489
490
491
  	/* kick off the timer for the hardlockup detector */
  	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  	hrtimer->function = watchdog_timer_fn;
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
492
493
  	/* Enable the perf event */
  	watchdog_nmi_enable(cpu);
58687acba   Don Zickus   lockup_detector: ...
494

58687acba   Don Zickus   lockup_detector: ...
495
  	/* done here because hrtimer_start can only pin to smp_processor_id() */
0f34c4009   Chuansheng Liu   watchdog: store t...
496
  	hrtimer_start(hrtimer, ns_to_ktime(sample_period),
58687acba   Don Zickus   lockup_detector: ...
497
  		      HRTIMER_MODE_REL_PINNED);
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
498
499
500
501
  	/* initialize timestamp */
  	watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
  	__touch_watchdog();
  }
58687acba   Don Zickus   lockup_detector: ...
502

bcd951cf1   Thomas Gleixner   watchdog: Use hot...
503
504
  static void watchdog_disable(unsigned int cpu)
  {
f7f66b05a   Christoph Lameter   watchdog: Replace...
505
  	struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);
58687acba   Don Zickus   lockup_detector: ...
506

bcd951cf1   Thomas Gleixner   watchdog: Use hot...
507
508
509
510
  	watchdog_set_prio(SCHED_NORMAL, 0);
  	hrtimer_cancel(hrtimer);
  	/* disable the perf event */
  	watchdog_nmi_disable(cpu);
58687acba   Don Zickus   lockup_detector: ...
511
  }
b8900bc02   Frederic Weisbecker   watchdog: Registe...
512
513
514
515
  static void watchdog_cleanup(unsigned int cpu, bool online)
  {
  	watchdog_disable(cpu);
  }
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
516
517
518
519
520
521
522
523
524
  static int watchdog_should_run(unsigned int cpu)
  {
  	return __this_cpu_read(hrtimer_interrupts) !=
  		__this_cpu_read(soft_lockup_hrtimer_cnt);
  }
  
  /*
   * The watchdog thread function - touches the timestamp.
   *
0f34c4009   Chuansheng Liu   watchdog: store t...
525
   * It only runs once every sample_period seconds (4 seconds by
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
526
527
528
529
530
531
532
533
534
   * default) to reset the softlockup timestamp. If this gets delayed
   * for more than 2*watchdog_thresh seconds then the debug-printout
   * triggers in watchdog_timer_fn().
   */
  static void watchdog(unsigned int cpu)
  {
  	__this_cpu_write(soft_lockup_hrtimer_cnt,
  			 __this_cpu_read(hrtimer_interrupts));
  	__touch_watchdog();
bcfba4f4b   Ulrich Obergfell   watchdog: impleme...
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
  
  	/*
  	 * watchdog_nmi_enable() clears the NMI_WATCHDOG_ENABLED bit in the
  	 * failure path. Check for failures that can occur asynchronously -
  	 * for example, when CPUs are on-lined - and shut down the hardware
  	 * perf event on each CPU accordingly.
  	 *
  	 * The only non-obvious place this bit can be cleared is through
  	 * watchdog_nmi_enable(), so a pr_info() is placed there.  Placing a
  	 * pr_info here would be too noisy as it would result in a message
  	 * every few seconds if the hardlockup was disabled but the softlockup
  	 * enabled.
  	 */
  	if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
  		watchdog_nmi_disable(cpu);
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
550
  }
58687acba   Don Zickus   lockup_detector: ...
551

23637d477   Frederic Weisbecker   lockup_detector: ...
552
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
a70270468   Don Zickus   watchdog: Quiet d...
553
554
555
556
557
558
  /*
   * People like the simple clean cpu node info on boot.
   * Reduce the watchdog noise by only printing messages
   * that are different from what cpu0 displayed.
   */
  static unsigned long cpu0_err;
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
559
  static int watchdog_nmi_enable(unsigned int cpu)
58687acba   Don Zickus   lockup_detector: ...
560
561
562
  {
  	struct perf_event_attr *wd_attr;
  	struct perf_event *event = per_cpu(watchdog_ev, cpu);
195daf665   Ulrich Obergfell   watchdog: enable ...
563
564
565
  	/* nothing to do if the hard lockup detector is disabled */
  	if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
  		goto out;
6e7458a6f   Ulrich Obergfell   kernel/watchdog.c...
566

58687acba   Don Zickus   lockup_detector: ...
567
568
569
570
571
572
573
  	/* is it already setup and enabled? */
  	if (event && event->state > PERF_EVENT_STATE_OFF)
  		goto out;
  
  	/* it is setup but not enabled */
  	if (event != NULL)
  		goto out_enable;
58687acba   Don Zickus   lockup_detector: ...
574
  	wd_attr = &wd_hw_attr;
4eec42f39   Mandeep Singh Baines   watchdog: Change ...
575
  	wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
1880c4ae1   Cyrill Gorcunov   perf, x86: Add hw...
576
577
  
  	/* Try to register using hardware perf events */
4dc0da869   Avi Kivity   perf: Add context...
578
  	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
a70270468   Don Zickus   watchdog: Quiet d...
579
580
581
582
  
  	/* save cpu0 error for future comparision */
  	if (cpu == 0 && IS_ERR(event))
  		cpu0_err = PTR_ERR(event);
58687acba   Don Zickus   lockup_detector: ...
583
  	if (!IS_ERR(event)) {
a70270468   Don Zickus   watchdog: Quiet d...
584
585
586
587
  		/* only print for cpu0 or different than cpu0 */
  		if (cpu == 0 || cpu0_err)
  			pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.
  ");
58687acba   Don Zickus   lockup_detector: ...
588
589
  		goto out_save;
  	}
bcfba4f4b   Ulrich Obergfell   watchdog: impleme...
590
591
592
593
594
595
596
597
598
599
600
  	/*
  	 * Disable the hard lockup detector if _any_ CPU fails to set up
  	 * set up the hardware perf event. The watchdog() function checks
  	 * the NMI_WATCHDOG_ENABLED bit periodically.
  	 *
  	 * The barriers are for syncing up watchdog_enabled across all the
  	 * cpus, as clear_bit() does not use barriers.
  	 */
  	smp_mb__before_atomic();
  	clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
  	smp_mb__after_atomic();
a70270468   Don Zickus   watchdog: Quiet d...
601
602
603
  	/* skip displaying the same error again */
  	if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
  		return PTR_ERR(event);
5651f7f47   Don Zickus   watchdog, nmi: Lo...
604
605
606
  
  	/* vary the KERN level based on the returned errno */
  	if (PTR_ERR(event) == -EOPNOTSUPP)
4501980aa   Andrew Morton   kernel/watchdog.c...
607
608
  		pr_info("disabled (cpu%i): not supported (no LAPIC?)
  ", cpu);
5651f7f47   Don Zickus   watchdog, nmi: Lo...
609
  	else if (PTR_ERR(event) == -ENOENT)
656c3b79f   Fabian Frederick   kernel/watchdog.c...
610
611
  		pr_warn("disabled (cpu%i): hardware events not enabled
  ",
4501980aa   Andrew Morton   kernel/watchdog.c...
612
  			 cpu);
5651f7f47   Don Zickus   watchdog, nmi: Lo...
613
  	else
4501980aa   Andrew Morton   kernel/watchdog.c...
614
615
616
  		pr_err("disabled (cpu%i): unable to create perf event: %ld
  ",
  			cpu, PTR_ERR(event));
bcfba4f4b   Ulrich Obergfell   watchdog: impleme...
617
618
619
  
  	pr_info("Shutting down hard lockup detector on all cpus
  ");
eac243355   Akinobu Mita   lockup_detector: ...
620
  	return PTR_ERR(event);
58687acba   Don Zickus   lockup_detector: ...
621
622
623
624
625
626
627
628
629
  
  	/* success path */
  out_save:
  	per_cpu(watchdog_ev, cpu) = event;
  out_enable:
  	perf_event_enable(per_cpu(watchdog_ev, cpu));
  out:
  	return 0;
  }
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
630
  static void watchdog_nmi_disable(unsigned int cpu)
58687acba   Don Zickus   lockup_detector: ...
631
632
633
634
635
636
637
638
639
640
  {
  	struct perf_event *event = per_cpu(watchdog_ev, cpu);
  
  	if (event) {
  		perf_event_disable(event);
  		per_cpu(watchdog_ev, cpu) = NULL;
  
  		/* should be in cleanup, but blocks oprofile */
  		perf_event_release_kernel(event);
  	}
df5771495   Ulrich Obergfell   watchdog: Fix pri...
641
642
643
644
  	if (cpu == 0) {
  		/* watchdog_nmi_enable() expects this to be zero initially. */
  		cpu0_err = 0;
  	}
58687acba   Don Zickus   lockup_detector: ...
645
  }
b3738d293   Stephane Eranian   watchdog: Add wat...
646

58687acba   Don Zickus   lockup_detector: ...
647
  #else
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
648
649
  static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
  static void watchdog_nmi_disable(unsigned int cpu) { return; }
23637d477   Frederic Weisbecker   lockup_detector: ...
650
  #endif /* CONFIG_HARDLOCKUP_DETECTOR */
58687acba   Don Zickus   lockup_detector: ...
651

b8900bc02   Frederic Weisbecker   watchdog: Registe...
652
653
654
655
656
657
658
659
660
661
  static struct smp_hotplug_thread watchdog_threads = {
  	.store			= &softlockup_watchdog,
  	.thread_should_run	= watchdog_should_run,
  	.thread_fn		= watchdog,
  	.thread_comm		= "watchdog/%u",
  	.setup			= watchdog_enable,
  	.cleanup		= watchdog_cleanup,
  	.park			= watchdog_disable,
  	.unpark			= watchdog_enable,
  };
81a4beef9   Ulrich Obergfell   watchdog: introdu...
662
663
  /*
   * park all watchdog threads that are specified in 'watchdog_cpumask'
ee7fed540   Ulrich Obergfell   watchdog: do not ...
664
665
666
667
668
669
   *
   * This function returns an error if kthread_park() of a watchdog thread
   * fails. In this situation, the watchdog threads of some CPUs can already
   * be parked and the watchdog threads of other CPUs can still be runnable.
   * Callers are expected to handle this special condition as appropriate in
   * their context.
a2a45b85e   Ulrich Obergfell   kernel/watchdog.c...
670
671
672
   *
   * This function may only be called in a context that is protected against
   * races with CPU hotplug - for example, via get_online_cpus().
81a4beef9   Ulrich Obergfell   watchdog: introdu...
673
674
675
676
   */
  static int watchdog_park_threads(void)
  {
  	int cpu, ret = 0;
81a4beef9   Ulrich Obergfell   watchdog: introdu...
677
678
679
680
681
  	for_each_watchdog_cpu(cpu) {
  		ret = kthread_park(per_cpu(softlockup_watchdog, cpu));
  		if (ret)
  			break;
  	}
81a4beef9   Ulrich Obergfell   watchdog: introdu...
682
683
684
685
686
687
  
  	return ret;
  }
  
  /*
   * unpark all watchdog threads that are specified in 'watchdog_cpumask'
a2a45b85e   Ulrich Obergfell   kernel/watchdog.c...
688
689
690
   *
   * This function may only be called in a context that is protected against
   * races with CPU hotplug - for example, via get_online_cpus().
81a4beef9   Ulrich Obergfell   watchdog: introdu...
691
692
693
694
   */
  static void watchdog_unpark_threads(void)
  {
  	int cpu;
81a4beef9   Ulrich Obergfell   watchdog: introdu...
695
696
  	for_each_watchdog_cpu(cpu)
  		kthread_unpark(per_cpu(softlockup_watchdog, cpu));
81a4beef9   Ulrich Obergfell   watchdog: introdu...
697
  }
8c073d27d   Ulrich Obergfell   watchdog: introdu...
698
699
700
  /*
   * Suspend the hard and soft lockup detector by parking the watchdog threads.
   */
ec6a90661   Ulrich Obergfell   watchdog: rename ...
701
  int lockup_detector_suspend(void)
8c073d27d   Ulrich Obergfell   watchdog: introdu...
702
703
  {
  	int ret = 0;
ee89e71eb   Ulrich Obergfell   kernel/watchdog.c...
704
  	get_online_cpus();
8c073d27d   Ulrich Obergfell   watchdog: introdu...
705
706
707
708
709
710
  	mutex_lock(&watchdog_proc_mutex);
  	/*
  	 * Multiple suspend requests can be active in parallel (counted by
  	 * the 'watchdog_suspended' variable). If the watchdog threads are
  	 * running, the first caller takes care that they will be parked.
  	 * The state of 'watchdog_running' cannot change while a suspend
ec6a90661   Ulrich Obergfell   watchdog: rename ...
711
  	 * request is active (see related code in 'proc' handlers).
8c073d27d   Ulrich Obergfell   watchdog: introdu...
712
713
714
715
716
717
  	 */
  	if (watchdog_running && !watchdog_suspended)
  		ret = watchdog_park_threads();
  
  	if (ret == 0)
  		watchdog_suspended++;
c993590c6   Ulrich Obergfell   watchdog: impleme...
718
719
720
721
722
723
  	else {
  		watchdog_disable_all_cpus();
  		pr_err("Failed to suspend lockup detectors, disabled
  ");
  		watchdog_enabled = 0;
  	}
8c073d27d   Ulrich Obergfell   watchdog: introdu...
724
725
726
727
728
729
730
731
732
  
  	mutex_unlock(&watchdog_proc_mutex);
  
  	return ret;
  }
  
  /*
   * Resume the hard and soft lockup detector by unparking the watchdog threads.
   */
ec6a90661   Ulrich Obergfell   watchdog: rename ...
733
  void lockup_detector_resume(void)
8c073d27d   Ulrich Obergfell   watchdog: introdu...
734
735
736
737
738
739
740
741
742
743
744
745
  {
  	mutex_lock(&watchdog_proc_mutex);
  
  	watchdog_suspended--;
  	/*
  	 * The watchdog threads are unparked if they were previously running
  	 * and if there is no more active suspend request.
  	 */
  	if (watchdog_running && !watchdog_suspended)
  		watchdog_unpark_threads();
  
  	mutex_unlock(&watchdog_proc_mutex);
ee89e71eb   Ulrich Obergfell   kernel/watchdog.c...
746
  	put_online_cpus();
8c073d27d   Ulrich Obergfell   watchdog: introdu...
747
  }
b43cb43cb   Ulrich Obergfell   watchdog: impleme...
748
  static int update_watchdog_all_cpus(void)
9809b18fc   Michal Hocko   watchdog: update ...
749
  {
b43cb43cb   Ulrich Obergfell   watchdog: impleme...
750
751
752
753
754
  	int ret;
  
  	ret = watchdog_park_threads();
  	if (ret)
  		return ret;
d4bdd0b21   Ulrich Obergfell   watchdog: use par...
755
  	watchdog_unpark_threads();
b43cb43cb   Ulrich Obergfell   watchdog: impleme...
756
757
  
  	return 0;
9809b18fc   Michal Hocko   watchdog: update ...
758
  }
b2f57c3a0   Ulrich Obergfell   watchdog: clean u...
759
  static int watchdog_enable_all_cpus(void)
58687acba   Don Zickus   lockup_detector: ...
760
  {
b8900bc02   Frederic Weisbecker   watchdog: Registe...
761
  	int err = 0;
58687acba   Don Zickus   lockup_detector: ...
762

3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
763
  	if (!watchdog_running) {
230ec9390   Frederic Weisbecker   smpboot: allow pa...
764
765
  		err = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
  							     &watchdog_cpumask);
b8900bc02   Frederic Weisbecker   watchdog: Registe...
766
767
768
  		if (err)
  			pr_err("Failed to create watchdog threads, disabled
  ");
230ec9390   Frederic Weisbecker   smpboot: allow pa...
769
  		else
3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
770
  			watchdog_running = 1;
b2f57c3a0   Ulrich Obergfell   watchdog: clean u...
771
772
773
774
775
  	} else {
  		/*
  		 * Enable/disable the lockup detectors or
  		 * change the sample period 'on the fly'.
  		 */
b43cb43cb   Ulrich Obergfell   watchdog: impleme...
776
777
778
779
780
781
782
  		err = update_watchdog_all_cpus();
  
  		if (err) {
  			watchdog_disable_all_cpus();
  			pr_err("Failed to update lockup detectors, disabled
  ");
  		}
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
783
  	}
b8900bc02   Frederic Weisbecker   watchdog: Registe...
784

b43cb43cb   Ulrich Obergfell   watchdog: impleme...
785
786
  	if (err)
  		watchdog_enabled = 0;
b8900bc02   Frederic Weisbecker   watchdog: Registe...
787
  	return err;
58687acba   Don Zickus   lockup_detector: ...
788
789
790
791
  }
  
  static void watchdog_disable_all_cpus(void)
  {
3c00ea82c   Frederic Weisbecker   watchdog: Rename ...
792
793
  	if (watchdog_running) {
  		watchdog_running = 0;
b8900bc02   Frederic Weisbecker   watchdog: Registe...
794
  		smpboot_unregister_percpu_thread(&watchdog_threads);
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
795
  	}
58687acba   Don Zickus   lockup_detector: ...
796
  }
58cf690a0   Ulrich Obergfell   watchdog: move wa...
797
  #ifdef CONFIG_SYSCTL
58687acba   Don Zickus   lockup_detector: ...
798
  /*
a0c9cbb93   Ulrich Obergfell   watchdog: introdu...
799
800
801
802
803
804
805
806
807
808
809
810
811
812
   * Update the run state of the lockup detectors.
   */
  static int proc_watchdog_update(void)
  {
  	int err = 0;
  
  	/*
  	 * Watchdog threads won't be started if they are already active.
  	 * The 'watchdog_running' variable in watchdog_*_all_cpus() takes
  	 * care of this. If those threads are already active, the sample
  	 * period will be updated and the lockup detectors will be enabled
  	 * or disabled 'on the fly'.
  	 */
  	if (watchdog_enabled && watchdog_thresh)
b2f57c3a0   Ulrich Obergfell   watchdog: clean u...
813
  		err = watchdog_enable_all_cpus();
a0c9cbb93   Ulrich Obergfell   watchdog: introdu...
814
815
816
817
818
819
820
821
  	else
  		watchdog_disable_all_cpus();
  
  	return err;
  
  }
  
  /*
ef246a216   Ulrich Obergfell   watchdog: introdu...
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
   * common function for watchdog, nmi_watchdog and soft_watchdog parameter
   *
   * caller             | table->data points to | 'which' contains the flag(s)
   * -------------------|-----------------------|-----------------------------
   * proc_watchdog      | watchdog_user_enabled | NMI_WATCHDOG_ENABLED or'ed
   *                    |                       | with SOFT_WATCHDOG_ENABLED
   * -------------------|-----------------------|-----------------------------
   * proc_nmi_watchdog  | nmi_watchdog_enabled  | NMI_WATCHDOG_ENABLED
   * -------------------|-----------------------|-----------------------------
   * proc_soft_watchdog | soft_watchdog_enabled | SOFT_WATCHDOG_ENABLED
   */
  static int proc_watchdog_common(int which, struct ctl_table *table, int write,
  				void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	int err, old, new;
  	int *watchdog_param = (int *)table->data;
8614ddef8   Ulrich Obergfell   kernel/watchdog.c...
838
  	get_online_cpus();
ef246a216   Ulrich Obergfell   watchdog: introdu...
839
  	mutex_lock(&watchdog_proc_mutex);
8c073d27d   Ulrich Obergfell   watchdog: introdu...
840
841
842
843
844
  	if (watchdog_suspended) {
  		/* no parameter changes allowed while watchdog is suspended */
  		err = -EAGAIN;
  		goto out;
  	}
ef246a216   Ulrich Obergfell   watchdog: introdu...
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
  	/*
  	 * If the parameter is being read return the state of the corresponding
  	 * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the
  	 * run state of the lockup detectors.
  	 */
  	if (!write) {
  		*watchdog_param = (watchdog_enabled & which) != 0;
  		err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  	} else {
  		err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  		if (err)
  			goto out;
  
  		/*
  		 * There is a race window between fetching the current value
  		 * from 'watchdog_enabled' and storing the new value. During
  		 * this race window, watchdog_nmi_enable() can sneak in and
  		 * clear the NMI_WATCHDOG_ENABLED bit in 'watchdog_enabled'.
  		 * The 'cmpxchg' detects this race and the loop retries.
  		 */
  		do {
  			old = watchdog_enabled;
  			/*
  			 * If the parameter value is not zero set the
  			 * corresponding bit(s), else clear it(them).
  			 */
  			if (*watchdog_param)
  				new = old | which;
  			else
  				new = old & ~which;
  		} while (cmpxchg(&watchdog_enabled, old, new) != old);
  
  		/*
b43cb43cb   Ulrich Obergfell   watchdog: impleme...
878
879
880
881
882
  		 * Update the run state of the lockup detectors. There is _no_
  		 * need to check the value returned by proc_watchdog_update()
  		 * and to restore the previous value of 'watchdog_enabled' as
  		 * both lockup detectors are disabled if proc_watchdog_update()
  		 * returns an error.
ef246a216   Ulrich Obergfell   watchdog: introdu...
883
  		 */
a1ee1932a   Joshua Hunt   watchdog: don't r...
884
885
  		if (old == new)
  			goto out;
ef246a216   Ulrich Obergfell   watchdog: introdu...
886
  		err = proc_watchdog_update();
ef246a216   Ulrich Obergfell   watchdog: introdu...
887
888
889
  	}
  out:
  	mutex_unlock(&watchdog_proc_mutex);
8614ddef8   Ulrich Obergfell   kernel/watchdog.c...
890
  	put_online_cpus();
ef246a216   Ulrich Obergfell   watchdog: introdu...
891
892
893
894
  	return err;
  }
  
  /*
83a80a390   Ulrich Obergfell   watchdog: introdu...
895
896
897
898
899
900
901
902
903
904
905
   * /proc/sys/kernel/watchdog
   */
  int proc_watchdog(struct ctl_table *table, int write,
  		  void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED,
  				    table, write, buffer, lenp, ppos);
  }
  
  /*
   * /proc/sys/kernel/nmi_watchdog
58687acba   Don Zickus   lockup_detector: ...
906
   */
83a80a390   Ulrich Obergfell   watchdog: introdu...
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
  int proc_nmi_watchdog(struct ctl_table *table, int write,
  		      void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	return proc_watchdog_common(NMI_WATCHDOG_ENABLED,
  				    table, write, buffer, lenp, ppos);
  }
  
  /*
   * /proc/sys/kernel/soft_watchdog
   */
  int proc_soft_watchdog(struct ctl_table *table, int write,
  			void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	return proc_watchdog_common(SOFT_WATCHDOG_ENABLED,
  				    table, write, buffer, lenp, ppos);
  }
58687acba   Don Zickus   lockup_detector: ...
923

83a80a390   Ulrich Obergfell   watchdog: introdu...
924
925
926
927
928
  /*
   * /proc/sys/kernel/watchdog_thresh
   */
  int proc_watchdog_thresh(struct ctl_table *table, int write,
  			 void __user *buffer, size_t *lenp, loff_t *ppos)
58687acba   Don Zickus   lockup_detector: ...
929
  {
a1ee1932a   Joshua Hunt   watchdog: don't r...
930
  	int err, old, new;
58687acba   Don Zickus   lockup_detector: ...
931

8614ddef8   Ulrich Obergfell   kernel/watchdog.c...
932
  	get_online_cpus();
359e6fab6   Michal Hocko   watchdog: update ...
933
  	mutex_lock(&watchdog_proc_mutex);
bcd951cf1   Thomas Gleixner   watchdog: Use hot...
934

8c073d27d   Ulrich Obergfell   watchdog: introdu...
935
936
937
938
939
  	if (watchdog_suspended) {
  		/* no parameter changes allowed while watchdog is suspended */
  		err = -EAGAIN;
  		goto out;
  	}
83a80a390   Ulrich Obergfell   watchdog: introdu...
940
  	old = ACCESS_ONCE(watchdog_thresh);
b8900bc02   Frederic Weisbecker   watchdog: Registe...
941
  	err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
83a80a390   Ulrich Obergfell   watchdog: introdu...
942

b8900bc02   Frederic Weisbecker   watchdog: Registe...
943
  	if (err || !write)
359e6fab6   Michal Hocko   watchdog: update ...
944
  		goto out;
e04ab2bc4   Mandeep Singh Baines   watchdog: Only di...
945

b66a2356d   anish kumar   watchdog: Add com...
946
  	/*
d283c640c   Ulrich Obergfell   watchdog: fix err...
947
  	 * Update the sample period. Restore on failure.
b66a2356d   anish kumar   watchdog: Add com...
948
  	 */
a1ee1932a   Joshua Hunt   watchdog: don't r...
949
950
951
  	new = ACCESS_ONCE(watchdog_thresh);
  	if (old == new)
  		goto out;
83a80a390   Ulrich Obergfell   watchdog: introdu...
952
953
  	set_sample_period();
  	err = proc_watchdog_update();
d283c640c   Ulrich Obergfell   watchdog: fix err...
954
  	if (err) {
83a80a390   Ulrich Obergfell   watchdog: introdu...
955
  		watchdog_thresh = old;
d283c640c   Ulrich Obergfell   watchdog: fix err...
956
957
  		set_sample_period();
  	}
359e6fab6   Michal Hocko   watchdog: update ...
958
959
  out:
  	mutex_unlock(&watchdog_proc_mutex);
8614ddef8   Ulrich Obergfell   kernel/watchdog.c...
960
  	put_online_cpus();
b8900bc02   Frederic Weisbecker   watchdog: Registe...
961
  	return err;
58687acba   Don Zickus   lockup_detector: ...
962
  }
fe4ba3c34   Chris Metcalf   watchdog: add wat...
963
964
965
966
967
968
969
970
971
972
973
  
  /*
   * The cpumask is the mask of possible cpus that the watchdog can run
   * on, not the mask of cpus it is actually running on.  This allows the
   * user to specify a mask that will include cpus that have not yet
   * been brought online, if desired.
   */
  int proc_watchdog_cpumask(struct ctl_table *table, int write,
  			  void __user *buffer, size_t *lenp, loff_t *ppos)
  {
  	int err;
8614ddef8   Ulrich Obergfell   kernel/watchdog.c...
974
  	get_online_cpus();
fe4ba3c34   Chris Metcalf   watchdog: add wat...
975
  	mutex_lock(&watchdog_proc_mutex);
8c073d27d   Ulrich Obergfell   watchdog: introdu...
976
977
978
979
980
981
  
  	if (watchdog_suspended) {
  		/* no parameter changes allowed while watchdog is suspended */
  		err = -EAGAIN;
  		goto out;
  	}
fe4ba3c34   Chris Metcalf   watchdog: add wat...
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
  	err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
  	if (!err && write) {
  		/* Remove impossible cpus to keep sysctl output cleaner. */
  		cpumask_and(&watchdog_cpumask, &watchdog_cpumask,
  			    cpu_possible_mask);
  
  		if (watchdog_running) {
  			/*
  			 * Failure would be due to being unable to allocate
  			 * a temporary cpumask, so we are likely not in a
  			 * position to do much else to make things better.
  			 */
  			if (smpboot_update_cpumask_percpu_thread(
  				    &watchdog_threads, &watchdog_cpumask) != 0)
  				pr_err("cpumask update failed
  ");
  		}
  	}
8c073d27d   Ulrich Obergfell   watchdog: introdu...
1000
  out:
fe4ba3c34   Chris Metcalf   watchdog: add wat...
1001
  	mutex_unlock(&watchdog_proc_mutex);
8614ddef8   Ulrich Obergfell   kernel/watchdog.c...
1002
  	put_online_cpus();
fe4ba3c34   Chris Metcalf   watchdog: add wat...
1003
1004
  	return err;
  }
58687acba   Don Zickus   lockup_detector: ...
1005
  #endif /* CONFIG_SYSCTL */
004417a6d   Peter Zijlstra   perf, arch: Clean...
1006
  void __init lockup_detector_init(void)
58687acba   Don Zickus   lockup_detector: ...
1007
  {
0f34c4009   Chuansheng Liu   watchdog: store t...
1008
  	set_sample_period();
b8900bc02   Frederic Weisbecker   watchdog: Registe...
1009

fe4ba3c34   Chris Metcalf   watchdog: add wat...
1010
1011
  #ifdef CONFIG_NO_HZ_FULL
  	if (tick_nohz_full_enabled()) {
314b08ff5   Frederic Weisbecker   watchdog: simplif...
1012
1013
1014
  		pr_info("Disabling watchdog on nohz_full cores by default
  ");
  		cpumask_copy(&watchdog_cpumask, housekeeping_mask);
fe4ba3c34   Chris Metcalf   watchdog: add wat...
1015
1016
1017
1018
1019
  	} else
  		cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
  #else
  	cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
  #endif
195daf665   Ulrich Obergfell   watchdog: enable ...
1020
  	if (watchdog_enabled)
b2f57c3a0   Ulrich Obergfell   watchdog: clean u...
1021
  		watchdog_enable_all_cpus();
58687acba   Don Zickus   lockup_detector: ...
1022
  }