Blame view

arch/x86/kernel/nmi.c 13.9 KB
1d48922c1   Don Zickus   x86, nmi: Split o...
1
2
3
  /*
   *  Copyright (C) 1991, 1992  Linus Torvalds
   *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
9c48f1c62   Don Zickus   x86, nmi: Wire up...
4
   *  Copyright (C) 2011	Don Zickus Red Hat, Inc.
1d48922c1   Don Zickus   x86, nmi: Split o...
5
6
7
8
9
10
11
12
13
14
15
16
   *
   *  Pentium III FXSR, SSE support
   *	Gareth Hughes <gareth@valinux.com>, May 2000
   */
  
  /*
   * Handle hardware traps and faults.
   */
  #include <linux/spinlock.h>
  #include <linux/kprobes.h>
  #include <linux/kdebug.h>
  #include <linux/nmi.h>
c9126b2ee   Don Zickus   x86, nmi: Create ...
17
18
19
  #include <linux/delay.h>
  #include <linux/hardirq.h>
  #include <linux/slab.h>
69c60c88e   Paul Gortmaker   x86: Fix files ex...
20
  #include <linux/export.h>
1d48922c1   Don Zickus   x86, nmi: Split o...
21

d48b0e173   Ingo Molnar   x86, nmi, drivers...
22
  #include <linux/mca.h>
1d48922c1   Don Zickus   x86, nmi: Split o...
23
24
25
26
27
28
29
  #if defined(CONFIG_EDAC)
  #include <linux/edac.h>
  #endif
  
  #include <linux/atomic.h>
  #include <asm/traps.h>
  #include <asm/mach_traps.h>
c9126b2ee   Don Zickus   x86, nmi: Create ...
30
  #include <asm/nmi.h>
6fd36ba02   Mathias Nyman   x86, ioapic: Only...
31
  #include <asm/x86_init.h>
c9126b2ee   Don Zickus   x86, nmi: Create ...
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
  
  #define NMI_MAX_NAMELEN	16
  struct nmiaction {
  	struct list_head list;
  	nmi_handler_t handler;
  	unsigned int flags;
  	char *name;
  };
  
  struct nmi_desc {
  	spinlock_t lock;
  	struct list_head head;
  };
  
  static struct nmi_desc nmi_desc[NMI_MAX] = 
  {
  	{
  		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock),
  		.head = LIST_HEAD_INIT(nmi_desc[0].head),
  	},
  	{
  		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
  		.head = LIST_HEAD_INIT(nmi_desc[1].head),
  	},
  
  };
1d48922c1   Don Zickus   x86, nmi: Split o...
58

efc3aac5f   Don Zickus   x86, nmi: Track N...
59
60
61
62
63
64
65
66
  struct nmi_stats {
  	unsigned int normal;
  	unsigned int unknown;
  	unsigned int external;
  	unsigned int swallow;
  };
  
  static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
1d48922c1   Don Zickus   x86, nmi: Split o...
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
  static int ignore_nmis;
  
  int unknown_nmi_panic;
  /*
   * Prevent NMI reason port (0x61) being accessed simultaneously, can
   * only be used in NMI handler.
   */
  static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
  
  static int __init setup_unknown_nmi_panic(char *str)
  {
  	unknown_nmi_panic = 1;
  	return 1;
  }
  __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
c9126b2ee   Don Zickus   x86, nmi: Create ...
82
  #define nmi_to_desc(type) (&nmi_desc[type])
b227e2339   Don Zickus   x86, nmi: Add in ...
83
  static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
c9126b2ee   Don Zickus   x86, nmi: Create ...
84
85
86
87
88
89
90
91
92
93
94
95
96
  {
  	struct nmi_desc *desc = nmi_to_desc(type);
  	struct nmiaction *a;
  	int handled=0;
  
  	rcu_read_lock();
  
  	/*
  	 * NMIs are edge-triggered, which means if you have enough
  	 * of them concurrently, you can lose some because only one
  	 * can be latched at any given time.  Walk the whole list
  	 * to handle those situations.
  	 */
b227e2339   Don Zickus   x86, nmi: Add in ...
97
  	list_for_each_entry_rcu(a, &desc->head, list)
c9126b2ee   Don Zickus   x86, nmi: Create ...
98
  		handled += a->handler(type, regs);
c9126b2ee   Don Zickus   x86, nmi: Create ...
99
100
101
102
103
104
105
106
107
108
109
110
111
112
  	rcu_read_unlock();
  
  	/* return total number of NMI events handled */
  	return handled;
  }
  
  static int __setup_nmi(unsigned int type, struct nmiaction *action)
  {
  	struct nmi_desc *desc = nmi_to_desc(type);
  	unsigned long flags;
  
  	spin_lock_irqsave(&desc->lock, flags);
  
  	/*
b227e2339   Don Zickus   x86, nmi: Add in ...
113
114
115
116
117
118
119
  	 * most handlers of type NMI_UNKNOWN never return because
  	 * they just assume the NMI is theirs.  Just a sanity check
  	 * to manage expectations
  	 */
  	WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head));
  
  	/*
c9126b2ee   Don Zickus   x86, nmi: Create ...
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
  	 * some handlers need to be executed first otherwise a fake
  	 * event confuses some handlers (kdump uses this flag)
  	 */
  	if (action->flags & NMI_FLAG_FIRST)
  		list_add_rcu(&action->list, &desc->head);
  	else
  		list_add_tail_rcu(&action->list, &desc->head);
  	
  	spin_unlock_irqrestore(&desc->lock, flags);
  	return 0;
  }
  
  static struct nmiaction *__free_nmi(unsigned int type, const char *name)
  {
  	struct nmi_desc *desc = nmi_to_desc(type);
  	struct nmiaction *n;
  	unsigned long flags;
  
  	spin_lock_irqsave(&desc->lock, flags);
  
  	list_for_each_entry_rcu(n, &desc->head, list) {
  		/*
  		 * the name passed in to describe the nmi handler
  		 * is used as the lookup key
  		 */
  		if (!strcmp(n->name, name)) {
  			WARN(in_nmi(),
  				"Trying to free NMI (%s) from NMI context!
  ", n->name);
  			list_del_rcu(&n->list);
  			break;
  		}
  	}
  
  	spin_unlock_irqrestore(&desc->lock, flags);
  	synchronize_rcu();
  	return (n);
  }
  
  int register_nmi_handler(unsigned int type, nmi_handler_t handler,
  			unsigned long nmiflags, const char *devname)
  {
  	struct nmiaction *action;
  	int retval = -ENOMEM;
  
  	if (!handler)
  		return -EINVAL;
  
  	action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL);
  	if (!action)
  		goto fail_action;
  
  	action->handler = handler;
  	action->flags = nmiflags;
  	action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL);
  	if (!action->name)
  		goto fail_action_name;
  
  	retval = __setup_nmi(type, action);
  
  	if (retval)
  		goto fail_setup_nmi;
  
  	return retval;
  
  fail_setup_nmi:
  	kfree(action->name);
  fail_action_name:
  	kfree(action);
  fail_action:	
  
  	return retval;
  }
  EXPORT_SYMBOL_GPL(register_nmi_handler);
  
  void unregister_nmi_handler(unsigned int type, const char *name)
  {
  	struct nmiaction *a;
  
  	a = __free_nmi(type, name);
  	if (a) {
  		kfree(a->name);
  		kfree(a);
  	}
  }
  
  EXPORT_SYMBOL_GPL(unregister_nmi_handler);
1d48922c1   Don Zickus   x86, nmi: Split o...
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
  static notrace __kprobes void
  pci_serr_error(unsigned char reason, struct pt_regs *regs)
  {
  	pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.
  ",
  		 reason, smp_processor_id());
  
  	/*
  	 * On some machines, PCI SERR line is used to report memory
  	 * errors. EDAC makes use of it.
  	 */
  #if defined(CONFIG_EDAC)
  	if (edac_handler_set()) {
  		edac_atomic_assert_error();
  		return;
  	}
  #endif
  
  	if (panic_on_unrecovered_nmi)
  		panic("NMI: Not continuing");
  
  	pr_emerg("Dazed and confused, but trying to continue
  ");
  
  	/* Clear and disable the PCI SERR error line. */
  	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
  	outb(reason, NMI_REASON_PORT);
  }
  
  static notrace __kprobes void
  io_check_error(unsigned char reason, struct pt_regs *regs)
  {
  	unsigned long i;
  
  	pr_emerg(
  	"NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.
  ",
  		 reason, smp_processor_id());
  	show_registers(regs);
  
  	if (panic_on_io_nmi)
  		panic("NMI IOCK error: Not continuing");
  
  	/* Re-enable the IOCK line, wait for a few seconds */
  	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
  	outb(reason, NMI_REASON_PORT);
  
  	i = 20000;
  	while (--i) {
  		touch_nmi_watchdog();
  		udelay(100);
  	}
  
  	reason &= ~NMI_REASON_CLEAR_IOCHK;
  	outb(reason, NMI_REASON_PORT);
  }
  
  static notrace __kprobes void
  unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
  {
9c48f1c62   Don Zickus   x86, nmi: Wire up...
267
  	int handled;
b227e2339   Don Zickus   x86, nmi: Add in ...
268
269
270
271
272
273
274
  	/*
  	 * Use 'false' as back-to-back NMIs are dealt with one level up.
  	 * Of course this makes having multiple 'unknown' handlers useless
  	 * as only the first one is ever run (unless it can actually determine
  	 * if it caused the NMI)
  	 */
  	handled = nmi_handle(NMI_UNKNOWN, regs, false);
efc3aac5f   Don Zickus   x86, nmi: Track N...
275
276
  	if (handled) {
  		__this_cpu_add(nmi_stats.unknown, handled);
1d48922c1   Don Zickus   x86, nmi: Split o...
277
  		return;
efc3aac5f   Don Zickus   x86, nmi: Track N...
278
279
280
  	}
  
  	__this_cpu_add(nmi_stats.unknown, 1);
1d48922c1   Don Zickus   x86, nmi: Split o...
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
  #ifdef CONFIG_MCA
  	/*
  	 * Might actually be able to figure out what the guilty party
  	 * is:
  	 */
  	if (MCA_bus) {
  		mca_handle_nmi();
  		return;
  	}
  #endif
  	pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.
  ",
  		 reason, smp_processor_id());
  
  	pr_emerg("Do you have a strange power saving mode enabled?
  ");
  	if (unknown_nmi_panic || panic_on_unrecovered_nmi)
  		panic("NMI: Not continuing");
  
  	pr_emerg("Dazed and confused, but trying to continue
  ");
  }
b227e2339   Don Zickus   x86, nmi: Add in ...
303
304
  static DEFINE_PER_CPU(bool, swallow_nmi);
  static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
1d48922c1   Don Zickus   x86, nmi: Split o...
305
306
307
  static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
  {
  	unsigned char reason = 0;
9c48f1c62   Don Zickus   x86, nmi: Wire up...
308
  	int handled;
b227e2339   Don Zickus   x86, nmi: Add in ...
309
  	bool b2b = false;
1d48922c1   Don Zickus   x86, nmi: Split o...
310
311
312
313
314
315
  
  	/*
  	 * CPU-specific NMI must be processed before non-CPU-specific
  	 * NMI, otherwise we may lose it, because the CPU-specific
  	 * NMI can not be detected/processed on other CPUs.
  	 */
b227e2339   Don Zickus   x86, nmi: Add in ...
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
  
  	/*
  	 * Back-to-back NMIs are interesting because they can either
  	 * be two NMI or more than two NMIs (any thing over two is dropped
  	 * due to NMI being edge-triggered).  If this is the second half
  	 * of the back-to-back NMI, assume we dropped things and process
  	 * more handlers.  Otherwise reset the 'swallow' NMI behaviour
  	 */
  	if (regs->ip == __this_cpu_read(last_nmi_rip))
  		b2b = true;
  	else
  		__this_cpu_write(swallow_nmi, false);
  
  	__this_cpu_write(last_nmi_rip, regs->ip);
  
  	handled = nmi_handle(NMI_LOCAL, regs, b2b);
efc3aac5f   Don Zickus   x86, nmi: Track N...
332
  	__this_cpu_add(nmi_stats.normal, handled);
b227e2339   Don Zickus   x86, nmi: Add in ...
333
334
335
336
337
338
339
340
341
342
343
  	if (handled) {
  		/*
  		 * There are cases when a NMI handler handles multiple
  		 * events in the current NMI.  One of these events may
  		 * be queued for in the next NMI.  Because the event is
  		 * already handled, the next NMI will result in an unknown
  		 * NMI.  Instead lets flag this for a potential NMI to
  		 * swallow.
  		 */
  		if (handled > 1)
  			__this_cpu_write(swallow_nmi, true);
1d48922c1   Don Zickus   x86, nmi: Split o...
344
  		return;
b227e2339   Don Zickus   x86, nmi: Add in ...
345
  	}
1d48922c1   Don Zickus   x86, nmi: Split o...
346
347
348
  
  	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
  	raw_spin_lock(&nmi_reason_lock);
064a59b6d   Jacob Pan   x86/mrst: Avoid r...
349
  	reason = x86_platform.get_nmi_reason();
1d48922c1   Don Zickus   x86, nmi: Split o...
350
351
352
353
354
355
356
357
358
359
360
361
362
  
  	if (reason & NMI_REASON_MASK) {
  		if (reason & NMI_REASON_SERR)
  			pci_serr_error(reason, regs);
  		else if (reason & NMI_REASON_IOCHK)
  			io_check_error(reason, regs);
  #ifdef CONFIG_X86_32
  		/*
  		 * Reassert NMI in case it became active
  		 * meanwhile as it's edge-triggered:
  		 */
  		reassert_nmi();
  #endif
efc3aac5f   Don Zickus   x86, nmi: Track N...
363
  		__this_cpu_add(nmi_stats.external, 1);
1d48922c1   Don Zickus   x86, nmi: Split o...
364
365
366
367
  		raw_spin_unlock(&nmi_reason_lock);
  		return;
  	}
  	raw_spin_unlock(&nmi_reason_lock);
b227e2339   Don Zickus   x86, nmi: Add in ...
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
  	/*
  	 * Only one NMI can be latched at a time.  To handle
  	 * this we may process multiple nmi handlers at once to
  	 * cover the case where an NMI is dropped.  The downside
  	 * to this approach is we may process an NMI prematurely,
  	 * while its real NMI is sitting latched.  This will cause
  	 * an unknown NMI on the next run of the NMI processing.
  	 *
  	 * We tried to flag that condition above, by setting the
  	 * swallow_nmi flag when we process more than one event.
  	 * This condition is also only present on the second half
  	 * of a back-to-back NMI, so we flag that condition too.
  	 *
  	 * If both are true, we assume we already processed this
  	 * NMI previously and we swallow it.  Otherwise we reset
  	 * the logic.
  	 *
  	 * There are scenarios where we may accidentally swallow
  	 * a 'real' unknown NMI.  For example, while processing
  	 * a perf NMI another perf NMI comes in along with a
  	 * 'real' unknown NMI.  These two NMIs get combined into
  	 * one (as descibed above).  When the next NMI gets
  	 * processed, it will be flagged by perf as handled, but
  	 * noone will know that there was a 'real' unknown NMI sent
  	 * also.  As a result it gets swallowed.  Or if the first
  	 * perf NMI returns two events handled then the second
  	 * NMI will get eaten by the logic below, again losing a
  	 * 'real' unknown NMI.  But this is the best we can do
  	 * for now.
  	 */
  	if (b2b && __this_cpu_read(swallow_nmi))
efc3aac5f   Don Zickus   x86, nmi: Track N...
399
  		__this_cpu_add(nmi_stats.swallow, 1);
b227e2339   Don Zickus   x86, nmi: Add in ...
400
401
  	else
  		unknown_nmi_error(reason, regs);
1d48922c1   Don Zickus   x86, nmi: Split o...
402
  }
ccd49c239   Steven Rostedt   x86: Allow NMIs t...
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
  /*
   * NMIs can hit breakpoints which will cause it to lose its
   * NMI context with the CPU when the breakpoint does an iret.
   */
  #ifdef CONFIG_X86_32
  /*
   * For i386, NMIs use the same stack as the kernel, and we can
   * add a workaround to the iret problem in C. Simply have 3 states
   * the NMI can be in.
   *
   *  1) not running
   *  2) executing
   *  3) latched
   *
   * When no NMI is in progress, it is in the "not running" state.
   * When an NMI comes in, it goes into the "executing" state.
   * Normally, if another NMI is triggered, it does not interrupt
   * the running NMI and the HW will simply latch it so that when
   * the first NMI finishes, it will restart the second NMI.
   * (Note, the latch is binary, thus multiple NMIs triggering,
   *  when one is running, are ignored. Only one NMI is restarted.)
   *
   * If an NMI hits a breakpoint that executes an iret, another
   * NMI can preempt it. We do not want to allow this new NMI
   * to run, but we want to execute it when the first one finishes.
   * We set the state to "latched", and the first NMI will perform
   * an cmpxchg on the state, and if it doesn't successfully
   * reset the state to "not running" it will restart the next
   * NMI.
   */
  enum nmi_states {
  	NMI_NOT_RUNNING,
  	NMI_EXECUTING,
  	NMI_LATCHED,
  };
  static DEFINE_PER_CPU(enum nmi_states, nmi_state);
  
  #define nmi_nesting_preprocess(regs)					\
  	do {								\
  		if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) {	\
  			__get_cpu_var(nmi_state) = NMI_LATCHED;		\
  			return;						\
  		}							\
  	nmi_restart:							\
  		__get_cpu_var(nmi_state) = NMI_EXECUTING;		\
  	} while (0)
  
  #define nmi_nesting_postprocess()					\
  	do {								\
  		if (cmpxchg(&__get_cpu_var(nmi_state),			\
  		    NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING)	\
  			goto nmi_restart;				\
  	} while (0)
  #else /* x86_64 */
  /*
   * In x86_64 things are a bit more difficult. This has the same problem
   * where an NMI hitting a breakpoint that calls iret will remove the
   * NMI context, allowing a nested NMI to enter. What makes this more
   * difficult is that both NMIs and breakpoints have their own stack.
   * When a new NMI or breakpoint is executed, the stack is set to a fixed
   * point. If an NMI is nested, it will have its stack set at that same
   * fixed address that the first NMI had, and will start corrupting the
   * stack. This is handled in entry_64.S, but the same problem exists with
   * the breakpoint stack.
   *
   * If a breakpoint is being processed, and the debug stack is being used,
   * if an NMI comes in and also hits a breakpoint, the stack pointer
   * will be set to the same fixed address as the breakpoint that was
   * interrupted, causing that stack to be corrupted. To handle this case,
   * check if the stack that was interrupted is the debug stack, and if
   * so, change the IDT so that new breakpoints will use the current stack
   * and not switch to the fixed address. On return of the NMI, switch back
   * to the original IDT.
   */
  static DEFINE_PER_CPU(int, update_debug_stack);
228bdaa95   Steven Rostedt   x86: Keep current...
478

ccd49c239   Steven Rostedt   x86: Allow NMIs t...
479
480
  static inline void nmi_nesting_preprocess(struct pt_regs *regs)
  {
228bdaa95   Steven Rostedt   x86: Keep current...
481
482
483
484
485
486
487
488
  	/*
  	 * If we interrupted a breakpoint, it is possible that
  	 * the nmi handler will have breakpoints too. We need to
  	 * change the IDT such that breakpoints that happen here
  	 * continue to use the NMI stack.
  	 */
  	if (unlikely(is_debug_stack(regs->sp))) {
  		debug_stack_set_zero();
ccd49c239   Steven Rostedt   x86: Allow NMIs t...
489
  		__get_cpu_var(update_debug_stack) = 1;
228bdaa95   Steven Rostedt   x86: Keep current...
490
  	}
ccd49c239   Steven Rostedt   x86: Allow NMIs t...
491
492
493
494
495
496
497
498
499
500
501
502
503
  }
  
  static inline void nmi_nesting_postprocess(void)
  {
  	if (unlikely(__get_cpu_var(update_debug_stack)))
  		debug_stack_reset();
  }
  #endif
  
  dotraplinkage notrace __kprobes void
  do_nmi(struct pt_regs *regs, long error_code)
  {
  	nmi_nesting_preprocess(regs);
1d48922c1   Don Zickus   x86, nmi: Split o...
504
505
506
507
508
509
510
511
  	nmi_enter();
  
  	inc_irq_stat(__nmi_count);
  
  	if (!ignore_nmis)
  		default_do_nmi(regs);
  
  	nmi_exit();
228bdaa95   Steven Rostedt   x86: Keep current...
512

ccd49c239   Steven Rostedt   x86: Allow NMIs t...
513
514
  	/* On i386, may loop back to preprocess */
  	nmi_nesting_postprocess();
1d48922c1   Don Zickus   x86, nmi: Split o...
515
516
517
518
519
520
521
522
523
524
525
  }
  
  void stop_nmi(void)
  {
  	ignore_nmis++;
  }
  
  void restart_nmi(void)
  {
  	ignore_nmis--;
  }
b227e2339   Don Zickus   x86, nmi: Add in ...
526
527
528
529
530
531
  
  /* reset the back-to-back NMI logic */
  void local_touch_nmi(void)
  {
  	__this_cpu_write(last_nmi_rip, 0);
  }