Blame view
kernel/watchdog.c
18.9 KB
58687acba lockup_detector: ... |
1 2 3 4 5 |
/* * Detect hard and soft lockups on a system * * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. * |
86f5e6a7b watchdog: Fix cod... |
6 7 8 |
* Note: Most of this code is borrowed heavily from the original softlockup * detector, so thanks to Ingo for the initial implementation. * Some chunks also taken from the old x86-specific nmi watchdog code, thanks |
58687acba lockup_detector: ... |
9 10 |
* to those contributors as well. */ |
4501980aa kernel/watchdog.c... |
11 |
#define pr_fmt(fmt) "NMI watchdog: " fmt |
58687acba lockup_detector: ... |
12 13 14 15 |
#include <linux/mm.h> #include <linux/cpu.h> #include <linux/nmi.h> #include <linux/init.h> |
58687acba lockup_detector: ... |
16 17 |
#include <linux/module.h> #include <linux/sysctl.h> |
bcd951cf1 watchdog: Use hot... |
18 |
#include <linux/smpboot.h> |
8bd75c77b sched/rt: Move rt... |
19 |
#include <linux/sched/rt.h> |
58687acba lockup_detector: ... |
20 21 |
#include <asm/irq_regs.h> |
5d1c0f4a8 watchdog: add che... |
22 |
#include <linux/kvm_para.h> |
58687acba lockup_detector: ... |
23 |
#include <linux/perf_event.h> |
3c00ea82c watchdog: Rename ... |
24 |
int watchdog_user_enabled = 1; |
4eec42f39 watchdog: Change ... |
25 |
int __read_mostly watchdog_thresh = 10; |
ed235875e kernel/watchdog.c... |
26 27 28 29 30 |
#ifdef CONFIG_SMP int __read_mostly sysctl_softlockup_all_cpu_backtrace; #else #define sysctl_softlockup_all_cpu_backtrace 0 #endif |
3c00ea82c watchdog: Rename ... |
31 |
static int __read_mostly watchdog_running; |
0f34c4009 watchdog: store t... |
32 |
static u64 __read_mostly sample_period; |
58687acba lockup_detector: ... |
33 34 35 36 37 |
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); static DEFINE_PER_CPU(bool, softlockup_touch_sync); |
58687acba lockup_detector: ... |
38 |
static DEFINE_PER_CPU(bool, soft_watchdog_warn); |
bcd951cf1 watchdog: Use hot... |
39 40 |
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt); |
b1a8de1f5 softlockup: make ... |
41 |
static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved); |
23637d477 lockup_detector: ... |
42 |
#ifdef CONFIG_HARDLOCKUP_DETECTOR |
cafcd80d2 lockup_detector: ... |
43 44 |
static DEFINE_PER_CPU(bool, hard_watchdog_warn); static DEFINE_PER_CPU(bool, watchdog_nmi_touch); |
58687acba lockup_detector: ... |
45 46 47 |
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); #endif |
ed235875e kernel/watchdog.c... |
48 |
static unsigned long soft_lockup_nmi_warn; |
58687acba lockup_detector: ... |
49 |
|
58687acba lockup_detector: ... |
50 51 52 53 |
/* boot commands */ /* * Should we panic when a soft-lockup or hard-lockup occurs: */ |
23637d477 lockup_detector: ... |
54 |
#ifdef CONFIG_HARDLOCKUP_DETECTOR |
fef2c9bc1 kernel/watchdog.c... |
55 56 |
static int hardlockup_panic = CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; |
58687acba lockup_detector: ... |
57 |
|
6e7458a6f kernel/watchdog.c... |
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
static bool hardlockup_detector_enabled = true; /* * We may not want to enable hard lockup detection by default in all cases, * for example when running the kernel as a guest on a hypervisor. In these * cases this function can be called to disable hard lockup detection. This * function should only be executed once by the boot processor before the * kernel command line parameters are parsed, because otherwise it is not * possible to override this in hardlockup_panic_setup(). */ void watchdog_enable_hardlockup_detector(bool val) { hardlockup_detector_enabled = val; } bool watchdog_hardlockup_detector_is_enabled(void) { return hardlockup_detector_enabled; } |
58687acba lockup_detector: ... |
76 77 78 79 |
static int __init hardlockup_panic_setup(char *str) { if (!strncmp(str, "panic", 5)) hardlockup_panic = 1; |
fef2c9bc1 kernel/watchdog.c... |
80 81 |
else if (!strncmp(str, "nopanic", 7)) hardlockup_panic = 0; |
5dc305587 x86, NMI: Add bac... |
82 |
else if (!strncmp(str, "0", 1)) |
3c00ea82c watchdog: Rename ... |
83 |
watchdog_user_enabled = 0; |
6e7458a6f kernel/watchdog.c... |
84 85 86 87 88 89 90 91 |
else if (!strncmp(str, "1", 1) || !strncmp(str, "2", 1)) { /* * Setting 'nmi_watchdog=1' or 'nmi_watchdog=2' (legacy option) * has the same effect. */ watchdog_user_enabled = 1; watchdog_enable_hardlockup_detector(true); } |
58687acba lockup_detector: ... |
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
return 1; } __setup("nmi_watchdog=", hardlockup_panic_setup); #endif unsigned int __read_mostly softlockup_panic = CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; static int __init softlockup_panic_setup(char *str) { softlockup_panic = simple_strtoul(str, NULL, 0); return 1; } __setup("softlockup_panic=", softlockup_panic_setup); static int __init nowatchdog_setup(char *str) { |
3c00ea82c watchdog: Rename ... |
110 |
watchdog_user_enabled = 0; |
58687acba lockup_detector: ... |
111 112 113 114 115 116 117 |
return 1; } __setup("nowatchdog", nowatchdog_setup); /* deprecated */ static int __init nosoftlockup_setup(char *str) { |
3c00ea82c watchdog: Rename ... |
118 |
watchdog_user_enabled = 0; |
58687acba lockup_detector: ... |
119 120 121 122 |
return 1; } __setup("nosoftlockup", nosoftlockup_setup); /* */ |
ed235875e kernel/watchdog.c... |
123 124 125 126 127 128 129 130 131 |
#ifdef CONFIG_SMP static int __init softlockup_all_cpu_backtrace_setup(char *str) { sysctl_softlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0); return 1; } __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); #endif |
58687acba lockup_detector: ... |
132 |
|
4eec42f39 watchdog: Change ... |
133 134 135 136 137 138 139 |
/* * Hard-lockup warnings should be triggered after just a few seconds. Soft- * lockups can have false positives under extreme conditions. So we generally * want a higher threshold for soft lockups than for hard lockups. So we couple * the thresholds with a factor: we make the soft threshold twice the amount of * time the hard threshold is. */ |
6e9101aee watchdog: Fix non... |
140 |
static int get_softlockup_thresh(void) |
4eec42f39 watchdog: Change ... |
141 142 143 |
{ return watchdog_thresh * 2; } |
58687acba lockup_detector: ... |
144 145 146 147 148 149 |
/* * Returns seconds, approximately. We don't need nanosecond * resolution, and we don't need to waste time with a big divide when * 2^30ns == 1.074s. */ |
c06b4f194 watchdog: Use loc... |
150 |
static unsigned long get_timestamp(void) |
58687acba lockup_detector: ... |
151 |
{ |
c06b4f194 watchdog: Use loc... |
152 |
return local_clock() >> 30LL; /* 2^30 ~= 10^9 */ |
58687acba lockup_detector: ... |
153 |
} |
0f34c4009 watchdog: store t... |
154 |
static void set_sample_period(void) |
58687acba lockup_detector: ... |
155 156 |
{ /* |
586692a5a watchdog: Disable... |
157 |
* convert watchdog_thresh from seconds to ns |
86f5e6a7b watchdog: Fix cod... |
158 159 160 161 |
* the divide by 5 is to give hrtimer several chances (two * or three with the current relation between the soft * and hard thresholds) to increment before the * hardlockup detector generates a warning |
58687acba lockup_detector: ... |
162 |
*/ |
0f34c4009 watchdog: store t... |
163 |
sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5); |
58687acba lockup_detector: ... |
164 165 166 167 168 |
} /* Commands for resetting the watchdog */ static void __touch_watchdog(void) { |
c06b4f194 watchdog: Use loc... |
169 |
__this_cpu_write(watchdog_touch_ts, get_timestamp()); |
58687acba lockup_detector: ... |
170 |
} |
332fbdbca lockup_detector: ... |
171 |
void touch_softlockup_watchdog(void) |
58687acba lockup_detector: ... |
172 |
{ |
7861144b8 kernel/watchdog.c... |
173 174 175 176 177 |
/* * Preemption can be enabled. It doesn't matter which CPU's timestamp * gets zeroed here, so use the raw_ operation. */ raw_cpu_write(watchdog_touch_ts, 0); |
58687acba lockup_detector: ... |
178 |
} |
0167c7819 watchdog: Export ... |
179 |
EXPORT_SYMBOL(touch_softlockup_watchdog); |
58687acba lockup_detector: ... |
180 |
|
332fbdbca lockup_detector: ... |
181 |
void touch_all_softlockup_watchdogs(void) |
58687acba lockup_detector: ... |
182 183 184 185 186 187 188 189 190 191 192 |
{ int cpu; /* * this is done lockless * do we care if a 0 races with a timestamp? * all it means is the softlock check starts one cycle later */ for_each_online_cpu(cpu) per_cpu(watchdog_touch_ts, cpu) = 0; } |
cafcd80d2 lockup_detector: ... |
193 |
#ifdef CONFIG_HARDLOCKUP_DETECTOR |
58687acba lockup_detector: ... |
194 195 |
void touch_nmi_watchdog(void) { |
62572e29b kernel/watchdog.c... |
196 197 198 199 200 201 202 |
/* * Using __raw here because some code paths have * preemption enabled. If preemption is enabled * then interrupts should be enabled too, in which * case we shouldn't have to worry about the watchdog * going off. */ |
f7f66b05a watchdog: Replace... |
203 |
raw_cpu_write(watchdog_nmi_touch, true); |
332fbdbca lockup_detector: ... |
204 |
touch_softlockup_watchdog(); |
58687acba lockup_detector: ... |
205 206 |
} EXPORT_SYMBOL(touch_nmi_watchdog); |
cafcd80d2 lockup_detector: ... |
207 |
#endif |
58687acba lockup_detector: ... |
208 209 |
void touch_softlockup_watchdog_sync(void) { |
f7f66b05a watchdog: Replace... |
210 211 |
__this_cpu_write(softlockup_touch_sync, true); __this_cpu_write(watchdog_touch_ts, 0); |
58687acba lockup_detector: ... |
212 |
} |
23637d477 lockup_detector: ... |
213 |
#ifdef CONFIG_HARDLOCKUP_DETECTOR |
58687acba lockup_detector: ... |
214 |
/* watchdog detector functions */ |
26e09c6ee lockup_detector: ... |
215 |
static int is_hardlockup(void) |
58687acba lockup_detector: ... |
216 |
{ |
909ea9646 core: Replace __g... |
217 |
unsigned long hrint = __this_cpu_read(hrtimer_interrupts); |
58687acba lockup_detector: ... |
218 |
|
909ea9646 core: Replace __g... |
219 |
if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) |
58687acba lockup_detector: ... |
220 |
return 1; |
909ea9646 core: Replace __g... |
221 |
__this_cpu_write(hrtimer_interrupts_saved, hrint); |
58687acba lockup_detector: ... |
222 223 224 |
return 0; } #endif |
26e09c6ee lockup_detector: ... |
225 |
static int is_softlockup(unsigned long touch_ts) |
58687acba lockup_detector: ... |
226 |
{ |
c06b4f194 watchdog: Use loc... |
227 |
unsigned long now = get_timestamp(); |
58687acba lockup_detector: ... |
228 229 |
/* Warn about unreasonable delays: */ |
4eec42f39 watchdog: Change ... |
230 |
if (time_after(now, touch_ts + get_softlockup_thresh())) |
58687acba lockup_detector: ... |
231 232 233 234 |
return now - touch_ts; return 0; } |
23637d477 lockup_detector: ... |
235 |
#ifdef CONFIG_HARDLOCKUP_DETECTOR |
1880c4ae1 perf, x86: Add hw... |
236 |
|
58687acba lockup_detector: ... |
237 238 239 240 241 242 243 244 245 |
static struct perf_event_attr wd_hw_attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, .size = sizeof(struct perf_event_attr), .pinned = 1, .disabled = 1, }; /* Callback function for perf event subsystem */ |
a8b0ca17b perf: Remove the ... |
246 |
static void watchdog_overflow_callback(struct perf_event *event, |
58687acba lockup_detector: ... |
247 248 249 |
struct perf_sample_data *data, struct pt_regs *regs) { |
c6db67cda watchdog: Don't t... |
250 251 |
/* Ensure the watchdog never gets throttled */ event->hw.interrupts = 0; |
909ea9646 core: Replace __g... |
252 253 |
if (__this_cpu_read(watchdog_nmi_touch) == true) { __this_cpu_write(watchdog_nmi_touch, false); |
58687acba lockup_detector: ... |
254 255 256 257 258 259 260 261 262 |
return; } /* check for a hardlockup * This is done by making sure our timer interrupt * is incrementing. The timer interrupt should have * fired multiple times before we overflow'd. If it hasn't * then this is a good indication the cpu is stuck */ |
26e09c6ee lockup_detector: ... |
263 264 |
if (is_hardlockup()) { int this_cpu = smp_processor_id(); |
58687acba lockup_detector: ... |
265 |
/* only print hardlockups once */ |
909ea9646 core: Replace __g... |
266 |
if (__this_cpu_read(hard_watchdog_warn) == true) |
58687acba lockup_detector: ... |
267 268 269 |
return; if (hardlockup_panic) |
656c3b79f kernel/watchdog.c... |
270 271 |
panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu); |
58687acba lockup_detector: ... |
272 |
else |
656c3b79f kernel/watchdog.c... |
273 274 |
WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu); |
58687acba lockup_detector: ... |
275 |
|
909ea9646 core: Replace __g... |
276 |
__this_cpu_write(hard_watchdog_warn, true); |
58687acba lockup_detector: ... |
277 278 |
return; } |
909ea9646 core: Replace __g... |
279 |
__this_cpu_write(hard_watchdog_warn, false); |
58687acba lockup_detector: ... |
280 281 |
return; } |
bcd951cf1 watchdog: Use hot... |
282 |
#endif /* CONFIG_HARDLOCKUP_DETECTOR */ |
58687acba lockup_detector: ... |
283 284 |
static void watchdog_interrupt_count(void) { |
909ea9646 core: Replace __g... |
285 |
__this_cpu_inc(hrtimer_interrupts); |
58687acba lockup_detector: ... |
286 |
} |
bcd951cf1 watchdog: Use hot... |
287 288 289 |
static int watchdog_nmi_enable(unsigned int cpu); static void watchdog_nmi_disable(unsigned int cpu); |
58687acba lockup_detector: ... |
290 291 292 293 |
/* watchdog kicker functions */ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { |
909ea9646 core: Replace __g... |
294 |
unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts); |
58687acba lockup_detector: ... |
295 296 |
struct pt_regs *regs = get_irq_regs(); int duration; |
ed235875e kernel/watchdog.c... |
297 |
int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; |
58687acba lockup_detector: ... |
298 299 300 301 302 |
/* kick the hardlockup detector */ watchdog_interrupt_count(); /* kick the softlockup detector */ |
909ea9646 core: Replace __g... |
303 |
wake_up_process(__this_cpu_read(softlockup_watchdog)); |
58687acba lockup_detector: ... |
304 305 |
/* .. and repeat */ |
0f34c4009 watchdog: store t... |
306 |
hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); |
58687acba lockup_detector: ... |
307 308 |
if (touch_ts == 0) { |
909ea9646 core: Replace __g... |
309 |
if (unlikely(__this_cpu_read(softlockup_touch_sync))) { |
58687acba lockup_detector: ... |
310 311 312 313 |
/* * If the time stamp was touched atomically * make sure the scheduler tick is up to date. */ |
909ea9646 core: Replace __g... |
314 |
__this_cpu_write(softlockup_touch_sync, false); |
58687acba lockup_detector: ... |
315 316 |
sched_clock_tick(); } |
5d1c0f4a8 watchdog: add che... |
317 318 319 |
/* Clear the guest paused flag on watchdog reset */ kvm_check_and_clear_guest_paused(); |
58687acba lockup_detector: ... |
320 321 322 323 324 325 326 327 328 329 |
__touch_watchdog(); return HRTIMER_RESTART; } /* check for a softlockup * This is done by making sure a high priority task is * being scheduled. The task touches the watchdog to * indicate it is getting cpu time. If it hasn't then * this is a good indication some task is hogging the cpu */ |
26e09c6ee lockup_detector: ... |
330 |
duration = is_softlockup(touch_ts); |
58687acba lockup_detector: ... |
331 |
if (unlikely(duration)) { |
5d1c0f4a8 watchdog: add che... |
332 333 334 335 336 337 338 |
/* * If a virtual machine is stopped by the host it can look to * the watchdog like a soft lockup, check to see if the host * stopped the vm before we issue the warning */ if (kvm_check_and_clear_guest_paused()) return HRTIMER_RESTART; |
58687acba lockup_detector: ... |
339 |
/* only warn once */ |
b1a8de1f5 softlockup: make ... |
340 341 342 343 344 345 346 347 348 349 350 351 352 353 |
if (__this_cpu_read(soft_watchdog_warn) == true) { /* * When multiple processes are causing softlockups the * softlockup detector only warns on the first one * because the code relies on a full quiet cycle to * re-arm. The second process prevents the quiet cycle * and never gets reported. Use task pointers to detect * this. */ if (__this_cpu_read(softlockup_task_ptr_saved) != current) { __this_cpu_write(soft_watchdog_warn, false); __touch_watchdog(); } |
58687acba lockup_detector: ... |
354 |
return HRTIMER_RESTART; |
b1a8de1f5 softlockup: make ... |
355 |
} |
58687acba lockup_detector: ... |
356 |
|
ed235875e kernel/watchdog.c... |
357 358 359 360 361 362 363 364 365 366 |
if (softlockup_all_cpu_backtrace) { /* Prevent multiple soft-lockup reports if one cpu is already * engaged in dumping cpu back traces */ if (test_and_set_bit(0, &soft_lockup_nmi_warn)) { /* Someone else will report us. Let's give up */ __this_cpu_write(soft_watchdog_warn, true); return HRTIMER_RESTART; } } |
656c3b79f kernel/watchdog.c... |
367 368 |
pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d] ", |
26e09c6ee lockup_detector: ... |
369 |
smp_processor_id(), duration, |
58687acba lockup_detector: ... |
370 |
current->comm, task_pid_nr(current)); |
b1a8de1f5 softlockup: make ... |
371 |
__this_cpu_write(softlockup_task_ptr_saved, current); |
58687acba lockup_detector: ... |
372 373 374 375 376 377 |
print_modules(); print_irqtrace_events(current); if (regs) show_regs(regs); else dump_stack(); |
ed235875e kernel/watchdog.c... |
378 379 380 381 382 383 384 385 386 387 |
if (softlockup_all_cpu_backtrace) { /* Avoid generating two back traces for current * given that one is already made above */ trigger_allbutself_cpu_backtrace(); clear_bit(0, &soft_lockup_nmi_warn); /* Barrier to sync with other cpus */ smp_mb__after_atomic(); } |
69361eef9 panic: add TAINT_... |
388 |
add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); |
58687acba lockup_detector: ... |
389 390 |
if (softlockup_panic) panic("softlockup: hung tasks"); |
909ea9646 core: Replace __g... |
391 |
__this_cpu_write(soft_watchdog_warn, true); |
58687acba lockup_detector: ... |
392 |
} else |
909ea9646 core: Replace __g... |
393 |
__this_cpu_write(soft_watchdog_warn, false); |
58687acba lockup_detector: ... |
394 395 396 |
return HRTIMER_RESTART; } |
bcd951cf1 watchdog: Use hot... |
397 398 399 |
static void watchdog_set_prio(unsigned int policy, unsigned int prio) { struct sched_param param = { .sched_priority = prio }; |
58687acba lockup_detector: ... |
400 |
|
bcd951cf1 watchdog: Use hot... |
401 402 403 404 |
sched_setscheduler(current, policy, ¶m); } static void watchdog_enable(unsigned int cpu) |
58687acba lockup_detector: ... |
405 |
{ |
f7f66b05a watchdog: Replace... |
406 |
struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); |
58687acba lockup_detector: ... |
407 |
|
3935e8950 watchdog: Fix dis... |
408 409 410 |
/* kick off the timer for the hardlockup detector */ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer->function = watchdog_timer_fn; |
bcd951cf1 watchdog: Use hot... |
411 412 |
/* Enable the perf event */ watchdog_nmi_enable(cpu); |
58687acba lockup_detector: ... |
413 |
|
58687acba lockup_detector: ... |
414 |
/* done here because hrtimer_start can only pin to smp_processor_id() */ |
0f34c4009 watchdog: store t... |
415 |
hrtimer_start(hrtimer, ns_to_ktime(sample_period), |
58687acba lockup_detector: ... |
416 |
HRTIMER_MODE_REL_PINNED); |
bcd951cf1 watchdog: Use hot... |
417 418 419 420 |
/* initialize timestamp */ watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); __touch_watchdog(); } |
58687acba lockup_detector: ... |
421 |
|
bcd951cf1 watchdog: Use hot... |
422 423 |
static void watchdog_disable(unsigned int cpu) { |
f7f66b05a watchdog: Replace... |
424 |
struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); |
58687acba lockup_detector: ... |
425 |
|
bcd951cf1 watchdog: Use hot... |
426 427 428 429 |
watchdog_set_prio(SCHED_NORMAL, 0); hrtimer_cancel(hrtimer); /* disable the perf event */ watchdog_nmi_disable(cpu); |
58687acba lockup_detector: ... |
430 |
} |
b8900bc02 watchdog: Registe... |
431 432 433 434 |
static void watchdog_cleanup(unsigned int cpu, bool online) { watchdog_disable(cpu); } |
bcd951cf1 watchdog: Use hot... |
435 436 437 438 439 440 441 442 443 |
static int watchdog_should_run(unsigned int cpu) { return __this_cpu_read(hrtimer_interrupts) != __this_cpu_read(soft_lockup_hrtimer_cnt); } /* * The watchdog thread function - touches the timestamp. * |
0f34c4009 watchdog: store t... |
444 |
* It only runs once every sample_period seconds (4 seconds by |
bcd951cf1 watchdog: Use hot... |
445 446 447 448 449 450 451 452 453 454 |
* default) to reset the softlockup timestamp. If this gets delayed * for more than 2*watchdog_thresh seconds then the debug-printout * triggers in watchdog_timer_fn(). */ static void watchdog(unsigned int cpu) { __this_cpu_write(soft_lockup_hrtimer_cnt, __this_cpu_read(hrtimer_interrupts)); __touch_watchdog(); } |
58687acba lockup_detector: ... |
455 |
|
23637d477 lockup_detector: ... |
456 |
#ifdef CONFIG_HARDLOCKUP_DETECTOR |
a70270468 watchdog: Quiet d... |
457 458 459 460 461 462 |
/* * People like the simple clean cpu node info on boot. * Reduce the watchdog noise by only printing messages * that are different from what cpu0 displayed. */ static unsigned long cpu0_err; |
bcd951cf1 watchdog: Use hot... |
463 |
static int watchdog_nmi_enable(unsigned int cpu) |
58687acba lockup_detector: ... |
464 465 466 |
{ struct perf_event_attr *wd_attr; struct perf_event *event = per_cpu(watchdog_ev, cpu); |
6e7458a6f kernel/watchdog.c... |
467 468 469 470 471 472 473 474 |
/* * Some kernels need to default hard lockup detection to * 'disabled', for example a guest on a hypervisor. */ if (!watchdog_hardlockup_detector_is_enabled()) { event = ERR_PTR(-ENOENT); goto handle_err; } |
58687acba lockup_detector: ... |
475 476 477 478 479 480 481 |
/* is it already setup and enabled? */ if (event && event->state > PERF_EVENT_STATE_OFF) goto out; /* it is setup but not enabled */ if (event != NULL) goto out_enable; |
58687acba lockup_detector: ... |
482 |
wd_attr = &wd_hw_attr; |
4eec42f39 watchdog: Change ... |
483 |
wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); |
1880c4ae1 perf, x86: Add hw... |
484 485 |
/* Try to register using hardware perf events */ |
4dc0da869 perf: Add context... |
486 |
event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); |
a70270468 watchdog: Quiet d... |
487 |
|
6e7458a6f kernel/watchdog.c... |
488 |
handle_err: |
a70270468 watchdog: Quiet d... |
489 490 491 |
/* save cpu0 error for future comparision */ if (cpu == 0 && IS_ERR(event)) cpu0_err = PTR_ERR(event); |
58687acba lockup_detector: ... |
492 |
if (!IS_ERR(event)) { |
a70270468 watchdog: Quiet d... |
493 494 495 496 |
/* only print for cpu0 or different than cpu0 */ if (cpu == 0 || cpu0_err) pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter. "); |
58687acba lockup_detector: ... |
497 498 |
goto out_save; } |
a70270468 watchdog: Quiet d... |
499 500 501 |
/* skip displaying the same error again */ if (cpu > 0 && (PTR_ERR(event) == cpu0_err)) return PTR_ERR(event); |
5651f7f47 watchdog, nmi: Lo... |
502 503 504 |
/* vary the KERN level based on the returned errno */ if (PTR_ERR(event) == -EOPNOTSUPP) |
4501980aa kernel/watchdog.c... |
505 506 |
pr_info("disabled (cpu%i): not supported (no LAPIC?) ", cpu); |
5651f7f47 watchdog, nmi: Lo... |
507 |
else if (PTR_ERR(event) == -ENOENT) |
656c3b79f kernel/watchdog.c... |
508 509 |
pr_warn("disabled (cpu%i): hardware events not enabled ", |
4501980aa kernel/watchdog.c... |
510 |
cpu); |
5651f7f47 watchdog, nmi: Lo... |
511 |
else |
4501980aa kernel/watchdog.c... |
512 513 514 |
pr_err("disabled (cpu%i): unable to create perf event: %ld ", cpu, PTR_ERR(event)); |
eac243355 lockup_detector: ... |
515 |
return PTR_ERR(event); |
58687acba lockup_detector: ... |
516 517 518 519 520 521 522 523 524 |
/* success path */ out_save: per_cpu(watchdog_ev, cpu) = event; out_enable: perf_event_enable(per_cpu(watchdog_ev, cpu)); out: return 0; } |
bcd951cf1 watchdog: Use hot... |
525 |
static void watchdog_nmi_disable(unsigned int cpu) |
58687acba lockup_detector: ... |
526 527 528 529 530 531 532 533 534 535 |
{ struct perf_event *event = per_cpu(watchdog_ev, cpu); if (event) { perf_event_disable(event); per_cpu(watchdog_ev, cpu) = NULL; /* should be in cleanup, but blocks oprofile */ perf_event_release_kernel(event); } |
df5771495 watchdog: Fix pri... |
536 537 538 539 |
if (cpu == 0) { /* watchdog_nmi_enable() expects this to be zero initially. */ cpu0_err = 0; } |
58687acba lockup_detector: ... |
540 541 |
} #else |
bcd951cf1 watchdog: Use hot... |
542 543 |
static int watchdog_nmi_enable(unsigned int cpu) { return 0; } static void watchdog_nmi_disable(unsigned int cpu) { return; } |
23637d477 lockup_detector: ... |
544 |
#endif /* CONFIG_HARDLOCKUP_DETECTOR */ |
58687acba lockup_detector: ... |
545 |
|
b8900bc02 watchdog: Registe... |
546 547 548 549 550 551 552 553 554 555 |
static struct smp_hotplug_thread watchdog_threads = { .store = &softlockup_watchdog, .thread_should_run = watchdog_should_run, .thread_fn = watchdog, .thread_comm = "watchdog/%u", .setup = watchdog_enable, .cleanup = watchdog_cleanup, .park = watchdog_disable, .unpark = watchdog_enable, }; |
9809b18fc watchdog: update ... |
556 557 |
static void restart_watchdog_hrtimer(void *info) { |
f7f66b05a watchdog: Replace... |
558 |
struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); |
9809b18fc watchdog: update ... |
559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 |
int ret; /* * No need to cancel and restart hrtimer if it is currently executing * because it will reprogram itself with the new period now. * We should never see it unqueued here because we are running per-cpu * with interrupts disabled. */ ret = hrtimer_try_to_cancel(hrtimer); if (ret == 1) hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL_PINNED); } static void update_timers(int cpu) { |
9809b18fc watchdog: update ... |
575 576 577 578 579 580 581 582 583 |
/* * Make sure that perf event counter will adopt to a new * sampling period. Updating the sampling period directly would * be much nicer but we do not have an API for that now so * let's use a big hammer. * Hrtimer will adopt the new period on the next tick but this * might be late already so we have to restart the timer as well. */ watchdog_nmi_disable(cpu); |
e0a23b062 watchdog: Simplif... |
584 |
smp_call_function_single(cpu, restart_watchdog_hrtimer, NULL, 1); |
9809b18fc watchdog: update ... |
585 586 587 588 589 590 591 592 |
watchdog_nmi_enable(cpu); } static void update_timers_all_cpus(void) { int cpu; get_online_cpus(); |
9809b18fc watchdog: update ... |
593 594 |
for_each_online_cpu(cpu) update_timers(cpu); |
9809b18fc watchdog: update ... |
595 596 597 598 |
put_online_cpus(); } static int watchdog_enable_all_cpus(bool sample_period_changed) |
58687acba lockup_detector: ... |
599 |
{ |
b8900bc02 watchdog: Registe... |
600 |
int err = 0; |
58687acba lockup_detector: ... |
601 |
|
3c00ea82c watchdog: Rename ... |
602 |
if (!watchdog_running) { |
b8900bc02 watchdog: Registe... |
603 604 605 606 607 |
err = smpboot_register_percpu_thread(&watchdog_threads); if (err) pr_err("Failed to create watchdog threads, disabled "); else |
3c00ea82c watchdog: Rename ... |
608 |
watchdog_running = 1; |
9809b18fc watchdog: update ... |
609 610 |
} else if (sample_period_changed) { update_timers_all_cpus(); |
bcd951cf1 watchdog: Use hot... |
611 |
} |
b8900bc02 watchdog: Registe... |
612 613 |
return err; |
58687acba lockup_detector: ... |
614 |
} |
b8900bc02 watchdog: Registe... |
615 616 617 |
/* prepare/enable/disable routines */ /* sysctl functions */ #ifdef CONFIG_SYSCTL |
58687acba lockup_detector: ... |
618 619 |
static void watchdog_disable_all_cpus(void) { |
3c00ea82c watchdog: Rename ... |
620 621 |
if (watchdog_running) { watchdog_running = 0; |
b8900bc02 watchdog: Registe... |
622 |
smpboot_unregister_percpu_thread(&watchdog_threads); |
bcd951cf1 watchdog: Use hot... |
623 |
} |
58687acba lockup_detector: ... |
624 |
} |
58687acba lockup_detector: ... |
625 |
/* |
586692a5a watchdog: Disable... |
626 |
* proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh |
58687acba lockup_detector: ... |
627 |
*/ |
586692a5a watchdog: Disable... |
628 629 |
int proc_dowatchdog(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) |
58687acba lockup_detector: ... |
630 |
{ |
b8900bc02 watchdog: Registe... |
631 |
int err, old_thresh, old_enabled; |
6e7458a6f kernel/watchdog.c... |
632 |
bool old_hardlockup; |
359e6fab6 watchdog: update ... |
633 |
static DEFINE_MUTEX(watchdog_proc_mutex); |
58687acba lockup_detector: ... |
634 |
|
359e6fab6 watchdog: update ... |
635 |
mutex_lock(&watchdog_proc_mutex); |
b8900bc02 watchdog: Registe... |
636 |
old_thresh = ACCESS_ONCE(watchdog_thresh); |
3c00ea82c watchdog: Rename ... |
637 |
old_enabled = ACCESS_ONCE(watchdog_user_enabled); |
6e7458a6f kernel/watchdog.c... |
638 |
old_hardlockup = watchdog_hardlockup_detector_is_enabled(); |
bcd951cf1 watchdog: Use hot... |
639 |
|
b8900bc02 watchdog: Registe... |
640 641 |
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (err || !write) |
359e6fab6 watchdog: update ... |
642 |
goto out; |
e04ab2bc4 watchdog: Only di... |
643 |
|
0f34c4009 watchdog: store t... |
644 |
set_sample_period(); |
b66a2356d watchdog: Add com... |
645 646 |
/* * Watchdog threads shouldn't be enabled if they are |
3c00ea82c watchdog: Rename ... |
647 |
* disabled. The 'watchdog_running' variable check in |
b66a2356d watchdog: Add com... |
648 649 |
* watchdog_*_all_cpus() function takes care of this. */ |
6e7458a6f kernel/watchdog.c... |
650 651 652 653 654 655 656 |
if (watchdog_user_enabled && watchdog_thresh) { /* * Prevent a change in watchdog_thresh accidentally overriding * the enablement of the hardlockup detector. */ if (watchdog_user_enabled != old_enabled) watchdog_enable_hardlockup_detector(true); |
9809b18fc watchdog: update ... |
657 |
err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh); |
6e7458a6f kernel/watchdog.c... |
658 |
} else |
e04ab2bc4 watchdog: Only di... |
659 |
watchdog_disable_all_cpus(); |
b8900bc02 watchdog: Registe... |
660 661 662 |
/* Restore old values on failure */ if (err) { watchdog_thresh = old_thresh; |
3c00ea82c watchdog: Rename ... |
663 |
watchdog_user_enabled = old_enabled; |
6e7458a6f kernel/watchdog.c... |
664 |
watchdog_enable_hardlockup_detector(old_hardlockup); |
b8900bc02 watchdog: Registe... |
665 |
} |
359e6fab6 watchdog: update ... |
666 667 |
out: mutex_unlock(&watchdog_proc_mutex); |
b8900bc02 watchdog: Registe... |
668 |
return err; |
58687acba lockup_detector: ... |
669 |
} |
58687acba lockup_detector: ... |
670 |
#endif /* CONFIG_SYSCTL */ |
004417a6d perf, arch: Clean... |
671 |
void __init lockup_detector_init(void) |
58687acba lockup_detector: ... |
672 |
{ |
0f34c4009 watchdog: store t... |
673 |
set_sample_period(); |
b8900bc02 watchdog: Registe... |
674 |
|
3c00ea82c watchdog: Rename ... |
675 |
if (watchdog_user_enabled) |
9809b18fc watchdog: update ... |
676 |
watchdog_enable_all_cpus(false); |
58687acba lockup_detector: ... |
677 |
} |