Commit 228bdaa95fb830e08b6acd1afd4d2c55093cabfa

Authored by Steven Rostedt
Committed by Steven Rostedt
1 parent 3f3c8b8c4b

x86: Keep current stack in NMI breakpoints

We want to allow NMI handlers to have breakpoints to be able to
remove stop_machine from ftrace, kprobes and jump_labels. But if
an NMI interrupts a current breakpoint, and then it triggers a
breakpoint itself, it will switch to the breakpoint stack and
corrupt the data on it for the breakpoint processing that it
interrupted.

Instead, have the NMI check if it interrupted breakpoint processing
by checking if the stack that is currently used is a breakpoint
stack. If it is, then load a special IDT that changes the IST
for the debug exception to keep the same stack in kernel context.
When the NMI is done, it puts it back.

This way, if the NMI does trigger a breakpoint, it will keep
using the same stack and not stomp on the breakpoint data for
the breakpoint it interrupted.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

Showing 6 changed files with 65 additions and 0 deletions Side-by-side Diff

arch/x86/include/asm/desc.h
... ... @@ -35,6 +35,8 @@
35 35  
36 36 extern struct desc_ptr idt_descr;
37 37 extern gate_desc idt_table[];
  38 +extern struct desc_ptr nmi_idt_descr;
  39 +extern gate_desc nmi_idt_table[];
38 40  
39 41 struct gdt_page {
40 42 struct desc_struct gdt[GDT_ENTRIES];
... ... @@ -306,6 +308,16 @@
306 308 desc->limit0 = limit & 0xffff;
307 309 desc->limit = (limit >> 16) & 0xf;
308 310 }
  311 +
  312 +#ifdef CONFIG_X86_64
  313 +static inline void set_nmi_gate(int gate, void *addr)
  314 +{
  315 + gate_desc s;
  316 +
  317 + pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
  318 + write_idt_entry(nmi_idt_table, gate, &s);
  319 +}
  320 +#endif
309 321  
310 322 static inline void _set_gate(int gate, unsigned type, void *addr,
311 323 unsigned dpl, unsigned ist, unsigned seg)
arch/x86/include/asm/processor.h
... ... @@ -402,6 +402,9 @@
402 402 DECLARE_PER_CPU(unsigned int, irq_count);
403 403 extern unsigned long kernel_eflags;
404 404 extern asmlinkage void ignore_sysret(void);
  405 +int is_debug_stack(unsigned long addr);
  406 +void debug_stack_set_zero(void);
  407 +void debug_stack_reset(void);
405 408 #else /* X86_64 */
406 409 #ifdef CONFIG_CC_STACKPROTECTOR
407 410 /*
... ... @@ -416,6 +419,9 @@
416 419 };
417 420 DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
418 421 #endif
  422 +static inline int is_debug_stack(unsigned long addr) { return 0; }
  423 +static inline void debug_stack_set_zero(void) { }
  424 +static inline void debug_stack_reset(void) { }
419 425 #endif /* X86_64 */
420 426  
421 427 extern unsigned int xstate_size;
arch/x86/kernel/cpu/common.c
... ... @@ -1026,6 +1026,8 @@
1026 1026  
1027 1027 #ifdef CONFIG_X86_64
1028 1028 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
  1029 +struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1,
  1030 + (unsigned long) nmi_idt_table };
1029 1031  
1030 1032 DEFINE_PER_CPU_FIRST(union irq_stack_union,
1031 1033 irq_stack_union) __aligned(PAGE_SIZE);
... ... @@ -1090,6 +1092,24 @@
1090 1092 */
1091 1093 DEFINE_PER_CPU(struct orig_ist, orig_ist);
1092 1094  
  1095 +static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
  1096 +
  1097 +int is_debug_stack(unsigned long addr)
  1098 +{
  1099 + return addr <= __get_cpu_var(debug_stack_addr) &&
  1100 + addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ);
  1101 +}
  1102 +
  1103 +void debug_stack_set_zero(void)
  1104 +{
  1105 + load_idt((const struct desc_ptr *)&nmi_idt_descr);
  1106 +}
  1107 +
  1108 +void debug_stack_reset(void)
  1109 +{
  1110 + load_idt((const struct desc_ptr *)&idt_descr);
  1111 +}
  1112 +
1093 1113 #else /* CONFIG_X86_64 */
1094 1114  
1095 1115 DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
... ... @@ -1208,6 +1228,8 @@
1208 1228 estacks += exception_stack_sizes[v];
1209 1229 oist->ist[v] = t->x86_tss.ist[v] =
1210 1230 (unsigned long)estacks;
  1231 + if (v == DEBUG_STACK-1)
  1232 + per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
1211 1233 }
1212 1234 }
1213 1235  
arch/x86/kernel/head_64.S
... ... @@ -417,6 +417,10 @@
417 417 ENTRY(idt_table)
418 418 .skip IDT_ENTRIES * 16
419 419  
  420 + .align L1_CACHE_BYTES
  421 +ENTRY(nmi_idt_table)
  422 + .skip IDT_ENTRIES * 16
  423 +
420 424 __PAGE_ALIGNED_BSS
421 425 .align PAGE_SIZE
422 426 ENTRY(empty_zero_page)
arch/x86/kernel/nmi.c
... ... @@ -408,6 +408,18 @@
408 408 dotraplinkage notrace __kprobes void
409 409 do_nmi(struct pt_regs *regs, long error_code)
410 410 {
  411 + int update_debug_stack = 0;
  412 +
  413 + /*
  414 + * If we interrupted a breakpoint, it is possible that
  415 + * the nmi handler will have breakpoints too. We need to
  416 + * change the IDT such that breakpoints that happen here
  417 + * continue to use the NMI stack.
  418 + */
  419 + if (unlikely(is_debug_stack(regs->sp))) {
  420 + debug_stack_set_zero();
  421 + update_debug_stack = 1;
  422 + }
411 423 nmi_enter();
412 424  
413 425 inc_irq_stat(__nmi_count);
... ... @@ -416,6 +428,9 @@
416 428 default_do_nmi(regs);
417 429  
418 430 nmi_exit();
  431 +
  432 + if (unlikely(update_debug_stack))
  433 + debug_stack_reset();
419 434 }
420 435  
421 436 void stop_nmi(void)
arch/x86/kernel/traps.c
... ... @@ -723,5 +723,11 @@
723 723 cpu_init();
724 724  
725 725 x86_init.irqs.trap_init();
  726 +
  727 +#ifdef CONFIG_X86_64
  728 + memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16);
  729 + set_nmi_gate(1, &debug);
  730 + set_nmi_gate(3, &int3);
  731 +#endif
726 732 }