Commit 83c2f912b43c3a7babbb6cb7ae2a5276c1ed2a3e
Exists in
master
and in
38 other branches
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (39 commits) perf tools: Fix compile error on x86_64 Ubuntu perf report: Fix --stdio output alignment when --showcpuutilization used perf annotate: Get rid of field_sep check perf annotate: Fix usage string perf kmem: Fix a memory leak perf kmem: Add missing closedir() calls perf top: Add error message for EMFILE perf test: Change type of '-v' option to INCR perf script: Add missing closedir() calls tracing: Fix compile error when static ftrace is enabled recordmcount: Fix handling of elf64 big-endian objects. perf tools: Add const.h to MANIFEST to make perf-tar-src-pkg work again perf tools: Add support for guest/host-only profiling perf kvm: Do guest-only counting by default perf top: Don't update total_period on process_sample perf hists: Stop using 'self' for struct hist_entry perf hists: Rename total_session to total_period x86: Add counter when debug stack is used with interrupts enabled x86: Allow NMIs to hit breakpoints in i386 x86: Keep current stack in NMI breakpoints ...
Showing 29 changed files Side-by-side Diff
- Documentation/kernel-parameters.txt
- arch/x86/include/asm/debugreg.h
- arch/x86/include/asm/desc.h
- arch/x86/kernel/cpu/common.c
- arch/x86/kernel/entry_64.S
- arch/x86/kernel/head_64.S
- arch/x86/kernel/nmi.c
- arch/x86/kernel/traps.c
- include/linux/compiler-gcc.h
- include/linux/ftrace.h
- kernel/trace/ftrace.c
- kernel/trace/trace_events_filter.c
- kernel/trace/trace_stack.c
- scripts/recordmcount.h
- tools/perf/Documentation/perf-list.txt
- tools/perf/MANIFEST
- tools/perf/builtin-annotate.c
- tools/perf/builtin-kmem.c
- tools/perf/builtin-kvm.c
- tools/perf/builtin-script.c
- tools/perf/builtin-test.c
- tools/perf/builtin-top.c
- tools/perf/util/evlist.c
- tools/perf/util/hist.c
- tools/perf/util/hist.h
- tools/perf/util/parse-events.c
- tools/perf/util/trace-event-info.c
- tools/perf/util/util.c
- tools/perf/util/util.h
Documentation/kernel-parameters.txt
... | ... | @@ -2475,6 +2475,14 @@ |
2475 | 2475 | stacktrace [FTRACE] |
2476 | 2476 | Enabled the stack tracer on boot up. |
2477 | 2477 | |
2478 | + stacktrace_filter=[function-list] | |
2479 | + [FTRACE] Limit the functions that the stack tracer | |
2480 | + will trace at boot up. function-list is a comma separated | |
2481 | + list of functions. This list can be changed at run | |
2482 | + time by the stack_trace_filter file in the debugfs | |
2483 | + tracing directory. Note, this enables stack tracing | |
2484 | + and the stacktrace above is not needed. | |
2485 | + | |
2478 | 2486 | sti= [PARISC,HW] |
2479 | 2487 | Format: <num> |
2480 | 2488 | Set the STI (builtin display/keyboard on the HP-PARISC |
arch/x86/include/asm/debugreg.h
... | ... | @@ -101,6 +101,28 @@ |
101 | 101 | |
102 | 102 | extern void hw_breakpoint_restore(void); |
103 | 103 | |
104 | +#ifdef CONFIG_X86_64 | |
105 | +DECLARE_PER_CPU(int, debug_stack_usage); | |
106 | +static inline void debug_stack_usage_inc(void) | |
107 | +{ | |
108 | + __get_cpu_var(debug_stack_usage)++; | |
109 | +} | |
110 | +static inline void debug_stack_usage_dec(void) | |
111 | +{ | |
112 | + __get_cpu_var(debug_stack_usage)--; | |
113 | +} | |
114 | +int is_debug_stack(unsigned long addr); | |
115 | +void debug_stack_set_zero(void); | |
116 | +void debug_stack_reset(void); | |
117 | +#else /* !X86_64 */ | |
118 | +static inline int is_debug_stack(unsigned long addr) { return 0; } | |
119 | +static inline void debug_stack_set_zero(void) { } | |
120 | +static inline void debug_stack_reset(void) { } | |
121 | +static inline void debug_stack_usage_inc(void) { } | |
122 | +static inline void debug_stack_usage_dec(void) { } | |
123 | +#endif /* X86_64 */ | |
124 | + | |
125 | + | |
104 | 126 | #endif /* __KERNEL__ */ |
105 | 127 | |
106 | 128 | #endif /* _ASM_X86_DEBUGREG_H */ |
arch/x86/include/asm/desc.h
... | ... | @@ -35,6 +35,8 @@ |
35 | 35 | |
36 | 36 | extern struct desc_ptr idt_descr; |
37 | 37 | extern gate_desc idt_table[]; |
38 | +extern struct desc_ptr nmi_idt_descr; | |
39 | +extern gate_desc nmi_idt_table[]; | |
38 | 40 | |
39 | 41 | struct gdt_page { |
40 | 42 | struct desc_struct gdt[GDT_ENTRIES]; |
... | ... | @@ -306,6 +308,16 @@ |
306 | 308 | desc->limit0 = limit & 0xffff; |
307 | 309 | desc->limit = (limit >> 16) & 0xf; |
308 | 310 | } |
311 | + | |
312 | +#ifdef CONFIG_X86_64 | |
313 | +static inline void set_nmi_gate(int gate, void *addr) | |
314 | +{ | |
315 | + gate_desc s; | |
316 | + | |
317 | + pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS); | |
318 | + write_idt_entry(nmi_idt_table, gate, &s); | |
319 | +} | |
320 | +#endif | |
309 | 321 | |
310 | 322 | static inline void _set_gate(int gate, unsigned type, void *addr, |
311 | 323 | unsigned dpl, unsigned ist, unsigned seg) |
arch/x86/kernel/cpu/common.c
... | ... | @@ -1021,6 +1021,8 @@ |
1021 | 1021 | |
1022 | 1022 | #ifdef CONFIG_X86_64 |
1023 | 1023 | struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; |
1024 | +struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1, | |
1025 | + (unsigned long) nmi_idt_table }; | |
1024 | 1026 | |
1025 | 1027 | DEFINE_PER_CPU_FIRST(union irq_stack_union, |
1026 | 1028 | irq_stack_union) __aligned(PAGE_SIZE); |
... | ... | @@ -1085,6 +1087,26 @@ |
1085 | 1087 | */ |
1086 | 1088 | DEFINE_PER_CPU(struct orig_ist, orig_ist); |
1087 | 1089 | |
1090 | +static DEFINE_PER_CPU(unsigned long, debug_stack_addr); | |
1091 | +DEFINE_PER_CPU(int, debug_stack_usage); | |
1092 | + | |
1093 | +int is_debug_stack(unsigned long addr) | |
1094 | +{ | |
1095 | + return __get_cpu_var(debug_stack_usage) || | |
1096 | + (addr <= __get_cpu_var(debug_stack_addr) && | |
1097 | + addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); | |
1098 | +} | |
1099 | + | |
1100 | +void debug_stack_set_zero(void) | |
1101 | +{ | |
1102 | + load_idt((const struct desc_ptr *)&nmi_idt_descr); | |
1103 | +} | |
1104 | + | |
1105 | +void debug_stack_reset(void) | |
1106 | +{ | |
1107 | + load_idt((const struct desc_ptr *)&idt_descr); | |
1108 | +} | |
1109 | + | |
1088 | 1110 | #else /* CONFIG_X86_64 */ |
1089 | 1111 | |
1090 | 1112 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; |
... | ... | @@ -1212,6 +1234,8 @@ |
1212 | 1234 | estacks += exception_stack_sizes[v]; |
1213 | 1235 | oist->ist[v] = t->x86_tss.ist[v] = |
1214 | 1236 | (unsigned long)estacks; |
1237 | + if (v == DEBUG_STACK-1) | |
1238 | + per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks; | |
1215 | 1239 | } |
1216 | 1240 | } |
1217 | 1241 |
arch/x86/kernel/entry_64.S
... | ... | @@ -1480,61 +1480,213 @@ |
1480 | 1480 | CFI_ENDPROC |
1481 | 1481 | END(error_exit) |
1482 | 1482 | |
1483 | +/* | |
1484 | + * Test if a given stack is an NMI stack or not. | |
1485 | + */ | |
1486 | + .macro test_in_nmi reg stack nmi_ret normal_ret | |
1487 | + cmpq %\reg, \stack | |
1488 | + ja \normal_ret | |
1489 | + subq $EXCEPTION_STKSZ, %\reg | |
1490 | + cmpq %\reg, \stack | |
1491 | + jb \normal_ret | |
1492 | + jmp \nmi_ret | |
1493 | + .endm | |
1483 | 1494 | |
1484 | 1495 | /* runs on exception stack */ |
1485 | 1496 | ENTRY(nmi) |
1486 | 1497 | INTR_FRAME |
1487 | 1498 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1488 | - pushq_cfi $-1 | |
1499 | + /* | |
1500 | + * We allow breakpoints in NMIs. If a breakpoint occurs, then | |
1501 | + * the iretq it performs will take us out of NMI context. | |
1502 | + * This means that we can have nested NMIs where the next | |
1503 | + * NMI is using the top of the stack of the previous NMI. We | |
1504 | + * can't let it execute because the nested NMI will corrupt the | |
1505 | + * stack of the previous NMI. NMI handlers are not re-entrant | |
1506 | + * anyway. | |
1507 | + * | |
1508 | + * To handle this case we do the following: | |
1509 | + * Check the a special location on the stack that contains | |
1510 | + * a variable that is set when NMIs are executing. | |
1511 | + * The interrupted task's stack is also checked to see if it | |
1512 | + * is an NMI stack. | |
1513 | + * If the variable is not set and the stack is not the NMI | |
1514 | + * stack then: | |
1515 | + * o Set the special variable on the stack | |
1516 | + * o Copy the interrupt frame into a "saved" location on the stack | |
1517 | + * o Copy the interrupt frame into a "copy" location on the stack | |
1518 | + * o Continue processing the NMI | |
1519 | + * If the variable is set or the previous stack is the NMI stack: | |
1520 | + * o Modify the "copy" location to jump to the repeate_nmi | |
1521 | + * o return back to the first NMI | |
1522 | + * | |
1523 | + * Now on exit of the first NMI, we first clear the stack variable | |
1524 | + * The NMI stack will tell any nested NMIs at that point that it is | |
1525 | + * nested. Then we pop the stack normally with iret, and if there was | |
1526 | + * a nested NMI that updated the copy interrupt stack frame, a | |
1527 | + * jump will be made to the repeat_nmi code that will handle the second | |
1528 | + * NMI. | |
1529 | + */ | |
1530 | + | |
1531 | + /* Use %rdx as out temp variable throughout */ | |
1532 | + pushq_cfi %rdx | |
1533 | + | |
1534 | + /* | |
1535 | + * Check the special variable on the stack to see if NMIs are | |
1536 | + * executing. | |
1537 | + */ | |
1538 | + cmp $1, -8(%rsp) | |
1539 | + je nested_nmi | |
1540 | + | |
1541 | + /* | |
1542 | + * Now test if the previous stack was an NMI stack. | |
1543 | + * We need the double check. We check the NMI stack to satisfy the | |
1544 | + * race when the first NMI clears the variable before returning. | |
1545 | + * We check the variable because the first NMI could be in a | |
1546 | + * breakpoint routine using a breakpoint stack. | |
1547 | + */ | |
1548 | + lea 6*8(%rsp), %rdx | |
1549 | + test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi | |
1550 | + | |
1551 | +nested_nmi: | |
1552 | + /* | |
1553 | + * Do nothing if we interrupted the fixup in repeat_nmi. | |
1554 | + * It's about to repeat the NMI handler, so we are fine | |
1555 | + * with ignoring this one. | |
1556 | + */ | |
1557 | + movq $repeat_nmi, %rdx | |
1558 | + cmpq 8(%rsp), %rdx | |
1559 | + ja 1f | |
1560 | + movq $end_repeat_nmi, %rdx | |
1561 | + cmpq 8(%rsp), %rdx | |
1562 | + ja nested_nmi_out | |
1563 | + | |
1564 | +1: | |
1565 | + /* Set up the interrupted NMIs stack to jump to repeat_nmi */ | |
1566 | + leaq -6*8(%rsp), %rdx | |
1567 | + movq %rdx, %rsp | |
1568 | + CFI_ADJUST_CFA_OFFSET 6*8 | |
1569 | + pushq_cfi $__KERNEL_DS | |
1570 | + pushq_cfi %rdx | |
1571 | + pushfq_cfi | |
1572 | + pushq_cfi $__KERNEL_CS | |
1573 | + pushq_cfi $repeat_nmi | |
1574 | + | |
1575 | + /* Put stack back */ | |
1576 | + addq $(11*8), %rsp | |
1577 | + CFI_ADJUST_CFA_OFFSET -11*8 | |
1578 | + | |
1579 | +nested_nmi_out: | |
1580 | + popq_cfi %rdx | |
1581 | + | |
1582 | + /* No need to check faults here */ | |
1583 | + INTERRUPT_RETURN | |
1584 | + | |
1585 | +first_nmi: | |
1586 | + /* | |
1587 | + * Because nested NMIs will use the pushed location that we | |
1588 | + * stored in rdx, we must keep that space available. | |
1589 | + * Here's what our stack frame will look like: | |
1590 | + * +-------------------------+ | |
1591 | + * | original SS | | |
1592 | + * | original Return RSP | | |
1593 | + * | original RFLAGS | | |
1594 | + * | original CS | | |
1595 | + * | original RIP | | |
1596 | + * +-------------------------+ | |
1597 | + * | temp storage for rdx | | |
1598 | + * +-------------------------+ | |
1599 | + * | NMI executing variable | | |
1600 | + * +-------------------------+ | |
1601 | + * | Saved SS | | |
1602 | + * | Saved Return RSP | | |
1603 | + * | Saved RFLAGS | | |
1604 | + * | Saved CS | | |
1605 | + * | Saved RIP | | |
1606 | + * +-------------------------+ | |
1607 | + * | copied SS | | |
1608 | + * | copied Return RSP | | |
1609 | + * | copied RFLAGS | | |
1610 | + * | copied CS | | |
1611 | + * | copied RIP | | |
1612 | + * +-------------------------+ | |
1613 | + * | pt_regs | | |
1614 | + * +-------------------------+ | |
1615 | + * | |
1616 | + * The saved RIP is used to fix up the copied RIP that a nested | |
1617 | + * NMI may zero out. The original stack frame and the temp storage | |
1618 | + * is also used by nested NMIs and can not be trusted on exit. | |
1619 | + */ | |
1620 | + /* Set the NMI executing variable on the stack. */ | |
1621 | + pushq_cfi $1 | |
1622 | + | |
1623 | + /* Copy the stack frame to the Saved frame */ | |
1624 | + .rept 5 | |
1625 | + pushq_cfi 6*8(%rsp) | |
1626 | + .endr | |
1627 | + | |
1628 | + /* Make another copy, this one may be modified by nested NMIs */ | |
1629 | + .rept 5 | |
1630 | + pushq_cfi 4*8(%rsp) | |
1631 | + .endr | |
1632 | + | |
1633 | + /* Do not pop rdx, nested NMIs will corrupt it */ | |
1634 | + movq 11*8(%rsp), %rdx | |
1635 | + | |
1636 | + /* | |
1637 | + * Everything below this point can be preempted by a nested | |
1638 | + * NMI if the first NMI took an exception. Repeated NMIs | |
1639 | + * caused by an exception and nested NMI will start here, and | |
1640 | + * can still be preempted by another NMI. | |
1641 | + */ | |
1642 | +restart_nmi: | |
1643 | + pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | |
1489 | 1644 | subq $ORIG_RAX-R15, %rsp |
1490 | 1645 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1646 | + /* | |
1647 | + * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit | |
1648 | + * as we should not be calling schedule in NMI context. | |
1649 | + * Even with normal interrupts enabled. An NMI should not be | |
1650 | + * setting NEED_RESCHED or anything that normal interrupts and | |
1651 | + * exceptions might do. | |
1652 | + */ | |
1491 | 1653 | call save_paranoid |
1492 | 1654 | DEFAULT_FRAME 0 |
1493 | 1655 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ |
1494 | 1656 | movq %rsp,%rdi |
1495 | 1657 | movq $-1,%rsi |
1496 | 1658 | call do_nmi |
1497 | -#ifdef CONFIG_TRACE_IRQFLAGS | |
1498 | - /* paranoidexit; without TRACE_IRQS_OFF */ | |
1499 | - /* ebx: no swapgs flag */ | |
1500 | - DISABLE_INTERRUPTS(CLBR_NONE) | |
1501 | 1659 | testl %ebx,%ebx /* swapgs needed? */ |
1502 | 1660 | jnz nmi_restore |
1503 | - testl $3,CS(%rsp) | |
1504 | - jnz nmi_userspace | |
1505 | 1661 | nmi_swapgs: |
1506 | 1662 | SWAPGS_UNSAFE_STACK |
1507 | 1663 | nmi_restore: |
1508 | 1664 | RESTORE_ALL 8 |
1665 | + /* Clear the NMI executing stack variable */ | |
1666 | + movq $0, 10*8(%rsp) | |
1509 | 1667 | jmp irq_return |
1510 | -nmi_userspace: | |
1511 | - GET_THREAD_INFO(%rcx) | |
1512 | - movl TI_flags(%rcx),%ebx | |
1513 | - andl $_TIF_WORK_MASK,%ebx | |
1514 | - jz nmi_swapgs | |
1515 | - movq %rsp,%rdi /* &pt_regs */ | |
1516 | - call sync_regs | |
1517 | - movq %rax,%rsp /* switch stack for scheduling */ | |
1518 | - testl $_TIF_NEED_RESCHED,%ebx | |
1519 | - jnz nmi_schedule | |
1520 | - movl %ebx,%edx /* arg3: thread flags */ | |
1521 | - ENABLE_INTERRUPTS(CLBR_NONE) | |
1522 | - xorl %esi,%esi /* arg2: oldset */ | |
1523 | - movq %rsp,%rdi /* arg1: &pt_regs */ | |
1524 | - call do_notify_resume | |
1525 | - DISABLE_INTERRUPTS(CLBR_NONE) | |
1526 | - jmp nmi_userspace | |
1527 | -nmi_schedule: | |
1528 | - ENABLE_INTERRUPTS(CLBR_ANY) | |
1529 | - call schedule | |
1530 | - DISABLE_INTERRUPTS(CLBR_ANY) | |
1531 | - jmp nmi_userspace | |
1532 | 1668 | CFI_ENDPROC |
1533 | -#else | |
1534 | - jmp paranoid_exit | |
1535 | - CFI_ENDPROC | |
1536 | -#endif | |
1537 | 1669 | END(nmi) |
1670 | + | |
1671 | + /* | |
1672 | + * If an NMI hit an iret because of an exception or breakpoint, | |
1673 | + * it can lose its NMI context, and a nested NMI may come in. | |
1674 | + * In that case, the nested NMI will change the preempted NMI's | |
1675 | + * stack to jump to here when it does the final iret. | |
1676 | + */ | |
1677 | +repeat_nmi: | |
1678 | + INTR_FRAME | |
1679 | + /* Update the stack variable to say we are still in NMI */ | |
1680 | + movq $1, 5*8(%rsp) | |
1681 | + | |
1682 | + /* copy the saved stack back to copy stack */ | |
1683 | + .rept 5 | |
1684 | + pushq_cfi 4*8(%rsp) | |
1685 | + .endr | |
1686 | + | |
1687 | + jmp restart_nmi | |
1688 | + CFI_ENDPROC | |
1689 | +end_repeat_nmi: | |
1538 | 1690 | |
1539 | 1691 | ENTRY(ignore_sysret) |
1540 | 1692 | CFI_STARTPROC |
arch/x86/kernel/head_64.S
arch/x86/kernel/nmi.c
... | ... | @@ -405,9 +405,108 @@ |
405 | 405 | unknown_nmi_error(reason, regs); |
406 | 406 | } |
407 | 407 | |
408 | +/* | |
409 | + * NMIs can hit breakpoints which will cause it to lose its | |
410 | + * NMI context with the CPU when the breakpoint does an iret. | |
411 | + */ | |
412 | +#ifdef CONFIG_X86_32 | |
413 | +/* | |
414 | + * For i386, NMIs use the same stack as the kernel, and we can | |
415 | + * add a workaround to the iret problem in C. Simply have 3 states | |
416 | + * the NMI can be in. | |
417 | + * | |
418 | + * 1) not running | |
419 | + * 2) executing | |
420 | + * 3) latched | |
421 | + * | |
422 | + * When no NMI is in progress, it is in the "not running" state. | |
423 | + * When an NMI comes in, it goes into the "executing" state. | |
424 | + * Normally, if another NMI is triggered, it does not interrupt | |
425 | + * the running NMI and the HW will simply latch it so that when | |
426 | + * the first NMI finishes, it will restart the second NMI. | |
427 | + * (Note, the latch is binary, thus multiple NMIs triggering, | |
428 | + * when one is running, are ignored. Only one NMI is restarted.) | |
429 | + * | |
430 | + * If an NMI hits a breakpoint that executes an iret, another | |
431 | + * NMI can preempt it. We do not want to allow this new NMI | |
432 | + * to run, but we want to execute it when the first one finishes. | |
433 | + * We set the state to "latched", and the first NMI will perform | |
434 | + * an cmpxchg on the state, and if it doesn't successfully | |
435 | + * reset the state to "not running" it will restart the next | |
436 | + * NMI. | |
437 | + */ | |
438 | +enum nmi_states { | |
439 | + NMI_NOT_RUNNING, | |
440 | + NMI_EXECUTING, | |
441 | + NMI_LATCHED, | |
442 | +}; | |
443 | +static DEFINE_PER_CPU(enum nmi_states, nmi_state); | |
444 | + | |
445 | +#define nmi_nesting_preprocess(regs) \ | |
446 | + do { \ | |
447 | + if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \ | |
448 | + __get_cpu_var(nmi_state) = NMI_LATCHED; \ | |
449 | + return; \ | |
450 | + } \ | |
451 | + nmi_restart: \ | |
452 | + __get_cpu_var(nmi_state) = NMI_EXECUTING; \ | |
453 | + } while (0) | |
454 | + | |
455 | +#define nmi_nesting_postprocess() \ | |
456 | + do { \ | |
457 | + if (cmpxchg(&__get_cpu_var(nmi_state), \ | |
458 | + NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \ | |
459 | + goto nmi_restart; \ | |
460 | + } while (0) | |
461 | +#else /* x86_64 */ | |
462 | +/* | |
463 | + * In x86_64 things are a bit more difficult. This has the same problem | |
464 | + * where an NMI hitting a breakpoint that calls iret will remove the | |
465 | + * NMI context, allowing a nested NMI to enter. What makes this more | |
466 | + * difficult is that both NMIs and breakpoints have their own stack. | |
467 | + * When a new NMI or breakpoint is executed, the stack is set to a fixed | |
468 | + * point. If an NMI is nested, it will have its stack set at that same | |
469 | + * fixed address that the first NMI had, and will start corrupting the | |
470 | + * stack. This is handled in entry_64.S, but the same problem exists with | |
471 | + * the breakpoint stack. | |
472 | + * | |
473 | + * If a breakpoint is being processed, and the debug stack is being used, | |
474 | + * if an NMI comes in and also hits a breakpoint, the stack pointer | |
475 | + * will be set to the same fixed address as the breakpoint that was | |
476 | + * interrupted, causing that stack to be corrupted. To handle this case, | |
477 | + * check if the stack that was interrupted is the debug stack, and if | |
478 | + * so, change the IDT so that new breakpoints will use the current stack | |
479 | + * and not switch to the fixed address. On return of the NMI, switch back | |
480 | + * to the original IDT. | |
481 | + */ | |
482 | +static DEFINE_PER_CPU(int, update_debug_stack); | |
483 | + | |
484 | +static inline void nmi_nesting_preprocess(struct pt_regs *regs) | |
485 | +{ | |
486 | + /* | |
487 | + * If we interrupted a breakpoint, it is possible that | |
488 | + * the nmi handler will have breakpoints too. We need to | |
489 | + * change the IDT such that breakpoints that happen here | |
490 | + * continue to use the NMI stack. | |
491 | + */ | |
492 | + if (unlikely(is_debug_stack(regs->sp))) { | |
493 | + debug_stack_set_zero(); | |
494 | + __get_cpu_var(update_debug_stack) = 1; | |
495 | + } | |
496 | +} | |
497 | + | |
498 | +static inline void nmi_nesting_postprocess(void) | |
499 | +{ | |
500 | + if (unlikely(__get_cpu_var(update_debug_stack))) | |
501 | + debug_stack_reset(); | |
502 | +} | |
503 | +#endif | |
504 | + | |
408 | 505 | dotraplinkage notrace __kprobes void |
409 | 506 | do_nmi(struct pt_regs *regs, long error_code) |
410 | 507 | { |
508 | + nmi_nesting_preprocess(regs); | |
509 | + | |
411 | 510 | nmi_enter(); |
412 | 511 | |
413 | 512 | inc_irq_stat(__nmi_count); |
... | ... | @@ -416,6 +515,9 @@ |
416 | 515 | default_do_nmi(regs); |
417 | 516 | |
418 | 517 | nmi_exit(); |
518 | + | |
519 | + /* On i386, may loop back to preprocess */ | |
520 | + nmi_nesting_postprocess(); | |
419 | 521 | } |
420 | 522 | |
421 | 523 | void stop_nmi(void) |
arch/x86/kernel/traps.c
... | ... | @@ -311,9 +311,15 @@ |
311 | 311 | == NOTIFY_STOP) |
312 | 312 | return; |
313 | 313 | |
314 | + /* | |
315 | + * Let others (NMI) know that the debug stack is in use | |
316 | + * as we may switch to the interrupt stack. | |
317 | + */ | |
318 | + debug_stack_usage_inc(); | |
314 | 319 | preempt_conditional_sti(regs); |
315 | 320 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); |
316 | 321 | preempt_conditional_cli(regs); |
322 | + debug_stack_usage_dec(); | |
317 | 323 | } |
318 | 324 | |
319 | 325 | #ifdef CONFIG_X86_64 |
... | ... | @@ -406,6 +412,12 @@ |
406 | 412 | SIGTRAP) == NOTIFY_STOP) |
407 | 413 | return; |
408 | 414 | |
415 | + /* | |
416 | + * Let others (NMI) know that the debug stack is in use | |
417 | + * as we may switch to the interrupt stack. | |
418 | + */ | |
419 | + debug_stack_usage_inc(); | |
420 | + | |
409 | 421 | /* It's safe to allow irq's after DR6 has been saved */ |
410 | 422 | preempt_conditional_sti(regs); |
411 | 423 | |
... | ... | @@ -413,6 +425,7 @@ |
413 | 425 | handle_vm86_trap((struct kernel_vm86_regs *) regs, |
414 | 426 | error_code, 1); |
415 | 427 | preempt_conditional_cli(regs); |
428 | + debug_stack_usage_dec(); | |
416 | 429 | return; |
417 | 430 | } |
418 | 431 | |
... | ... | @@ -432,6 +445,7 @@ |
432 | 445 | if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) |
433 | 446 | send_sigtrap(tsk, regs, error_code, si_code); |
434 | 447 | preempt_conditional_cli(regs); |
448 | + debug_stack_usage_dec(); | |
435 | 449 | |
436 | 450 | return; |
437 | 451 | } |
... | ... | @@ -718,5 +732,11 @@ |
718 | 732 | cpu_init(); |
719 | 733 | |
720 | 734 | x86_init.irqs.trap_init(); |
735 | + | |
736 | +#ifdef CONFIG_X86_64 | |
737 | + memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16); | |
738 | + set_nmi_gate(1, &debug); | |
739 | + set_nmi_gate(3, &int3); | |
740 | +#endif | |
721 | 741 | } |
include/linux/compiler-gcc.h
... | ... | @@ -50,6 +50,11 @@ |
50 | 50 | # define inline inline __attribute__((always_inline)) |
51 | 51 | # define __inline__ __inline__ __attribute__((always_inline)) |
52 | 52 | # define __inline __inline __attribute__((always_inline)) |
53 | +#else | |
54 | +/* A lot of inline functions can cause havoc with function tracing */ | |
55 | +# define inline inline notrace | |
56 | +# define __inline__ __inline__ notrace | |
57 | +# define __inline __inline notrace | |
53 | 58 | #endif |
54 | 59 | |
55 | 60 | #define __deprecated __attribute__((deprecated)) |
include/linux/ftrace.h
... | ... | @@ -133,6 +133,8 @@ |
133 | 133 | int ftrace_arch_code_modify_prepare(void); |
134 | 134 | int ftrace_arch_code_modify_post_process(void); |
135 | 135 | |
136 | +void ftrace_bug(int err, unsigned long ip); | |
137 | + | |
136 | 138 | struct seq_file; |
137 | 139 | |
138 | 140 | struct ftrace_probe_ops { |
... | ... | @@ -161,7 +163,6 @@ |
161 | 163 | |
162 | 164 | enum { |
163 | 165 | FTRACE_FL_ENABLED = (1 << 30), |
164 | - FTRACE_FL_FREE = (1 << 31), | |
165 | 166 | }; |
166 | 167 | |
167 | 168 | #define FTRACE_FL_MASK (0x3UL << 30) |
... | ... | @@ -172,10 +173,7 @@ |
172 | 173 | unsigned long ip; /* address of mcount call-site */ |
173 | 174 | struct dyn_ftrace *freelist; |
174 | 175 | }; |
175 | - union { | |
176 | - unsigned long flags; | |
177 | - struct dyn_ftrace *newlist; | |
178 | - }; | |
176 | + unsigned long flags; | |
179 | 177 | struct dyn_arch_ftrace arch; |
180 | 178 | }; |
181 | 179 | |
... | ... | @@ -190,6 +188,56 @@ |
190 | 188 | int register_ftrace_command(struct ftrace_func_command *cmd); |
191 | 189 | int unregister_ftrace_command(struct ftrace_func_command *cmd); |
192 | 190 | |
191 | +enum { | |
192 | + FTRACE_UPDATE_CALLS = (1 << 0), | |
193 | + FTRACE_DISABLE_CALLS = (1 << 1), | |
194 | + FTRACE_UPDATE_TRACE_FUNC = (1 << 2), | |
195 | + FTRACE_START_FUNC_RET = (1 << 3), | |
196 | + FTRACE_STOP_FUNC_RET = (1 << 4), | |
197 | +}; | |
198 | + | |
199 | +enum { | |
200 | + FTRACE_UPDATE_IGNORE, | |
201 | + FTRACE_UPDATE_MAKE_CALL, | |
202 | + FTRACE_UPDATE_MAKE_NOP, | |
203 | +}; | |
204 | + | |
205 | +enum { | |
206 | + FTRACE_ITER_FILTER = (1 << 0), | |
207 | + FTRACE_ITER_NOTRACE = (1 << 1), | |
208 | + FTRACE_ITER_PRINTALL = (1 << 2), | |
209 | + FTRACE_ITER_DO_HASH = (1 << 3), | |
210 | + FTRACE_ITER_HASH = (1 << 4), | |
211 | + FTRACE_ITER_ENABLED = (1 << 5), | |
212 | +}; | |
213 | + | |
214 | +void arch_ftrace_update_code(int command); | |
215 | + | |
216 | +struct ftrace_rec_iter; | |
217 | + | |
218 | +struct ftrace_rec_iter *ftrace_rec_iter_start(void); | |
219 | +struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter); | |
220 | +struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter); | |
221 | + | |
222 | +int ftrace_update_record(struct dyn_ftrace *rec, int enable); | |
223 | +int ftrace_test_record(struct dyn_ftrace *rec, int enable); | |
224 | +void ftrace_run_stop_machine(int command); | |
225 | +int ftrace_location(unsigned long ip); | |
226 | + | |
227 | +extern ftrace_func_t ftrace_trace_function; | |
228 | + | |
229 | +int ftrace_regex_open(struct ftrace_ops *ops, int flag, | |
230 | + struct inode *inode, struct file *file); | |
231 | +ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf, | |
232 | + size_t cnt, loff_t *ppos); | |
233 | +ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf, | |
234 | + size_t cnt, loff_t *ppos); | |
235 | +loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin); | |
236 | +int ftrace_regex_release(struct inode *inode, struct file *file); | |
237 | + | |
238 | +void __init | |
239 | +ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable); | |
240 | + | |
193 | 241 | /* defined in arch */ |
194 | 242 | extern int ftrace_ip_converted(unsigned long ip); |
195 | 243 | extern int ftrace_dyn_arch_init(void *data); |
... | ... | @@ -284,6 +332,25 @@ |
284 | 332 | { |
285 | 333 | return 0; |
286 | 334 | } |
335 | + | |
336 | +/* | |
337 | + * Again users of functions that have ftrace_ops may not | |
338 | + * have them defined when ftrace is not enabled, but these | |
339 | + * functions may still be called. Use a macro instead of inline. | |
340 | + */ | |
341 | +#define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; }) | |
342 | +#define ftrace_set_early_filter(ops, buf, enable) do { } while (0) | |
343 | + | |
344 | +static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf, | |
345 | + size_t cnt, loff_t *ppos) { return -ENODEV; } | |
346 | +static inline ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf, | |
347 | + size_t cnt, loff_t *ppos) { return -ENODEV; } | |
348 | +static inline loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin) | |
349 | +{ | |
350 | + return -ENODEV; | |
351 | +} | |
352 | +static inline int | |
353 | +ftrace_regex_release(struct inode *inode, struct file *file) { return -ENODEV; } | |
287 | 354 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
288 | 355 | |
289 | 356 | /* totally disable ftrace - can not re-enable after this */ |
kernel/trace/ftrace.c
Changes suppressed. Click to show
... | ... | @@ -22,11 +22,13 @@ |
22 | 22 | #include <linux/hardirq.h> |
23 | 23 | #include <linux/kthread.h> |
24 | 24 | #include <linux/uaccess.h> |
25 | +#include <linux/bsearch.h> | |
25 | 26 | #include <linux/module.h> |
26 | 27 | #include <linux/ftrace.h> |
27 | 28 | #include <linux/sysctl.h> |
28 | 29 | #include <linux/slab.h> |
29 | 30 | #include <linux/ctype.h> |
31 | +#include <linux/sort.h> | |
30 | 32 | #include <linux/list.h> |
31 | 33 | #include <linux/hash.h> |
32 | 34 | #include <linux/rcupdate.h> |
... | ... | @@ -947,13 +949,6 @@ |
947 | 949 | struct rcu_head rcu; |
948 | 950 | }; |
949 | 951 | |
950 | -enum { | |
951 | - FTRACE_ENABLE_CALLS = (1 << 0), | |
952 | - FTRACE_DISABLE_CALLS = (1 << 1), | |
953 | - FTRACE_UPDATE_TRACE_FUNC = (1 << 2), | |
954 | - FTRACE_START_FUNC_RET = (1 << 3), | |
955 | - FTRACE_STOP_FUNC_RET = (1 << 4), | |
956 | -}; | |
957 | 952 | struct ftrace_func_entry { |
958 | 953 | struct hlist_node hlist; |
959 | 954 | unsigned long ip; |
960 | 955 | |
961 | 956 | |
962 | 957 | |
963 | 958 | |
964 | 959 | |
... | ... | @@ -984,26 +979,30 @@ |
984 | 979 | .filter_hash = EMPTY_HASH, |
985 | 980 | }; |
986 | 981 | |
987 | -static struct dyn_ftrace *ftrace_new_addrs; | |
988 | - | |
989 | 982 | static DEFINE_MUTEX(ftrace_regex_lock); |
990 | 983 | |
991 | 984 | struct ftrace_page { |
992 | 985 | struct ftrace_page *next; |
986 | + struct dyn_ftrace *records; | |
993 | 987 | int index; |
994 | - struct dyn_ftrace records[]; | |
988 | + int size; | |
995 | 989 | }; |
996 | 990 | |
997 | -#define ENTRIES_PER_PAGE \ | |
998 | - ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace)) | |
991 | +static struct ftrace_page *ftrace_new_pgs; | |
999 | 992 | |
993 | +#define ENTRY_SIZE sizeof(struct dyn_ftrace) | |
994 | +#define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE) | |
995 | + | |
1000 | 996 | /* estimate from running different kernels */ |
1001 | 997 | #define NR_TO_INIT 10000 |
1002 | 998 | |
1003 | 999 | static struct ftrace_page *ftrace_pages_start; |
1004 | 1000 | static struct ftrace_page *ftrace_pages; |
1005 | 1001 | |
1006 | -static struct dyn_ftrace *ftrace_free_records; | |
1002 | +static bool ftrace_hash_empty(struct ftrace_hash *hash) | |
1003 | +{ | |
1004 | + return !hash || !hash->count; | |
1005 | +} | |
1007 | 1006 | |
1008 | 1007 | static struct ftrace_func_entry * |
1009 | 1008 | ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) |
... | ... | @@ -1013,7 +1012,7 @@ |
1013 | 1012 | struct hlist_head *hhd; |
1014 | 1013 | struct hlist_node *n; |
1015 | 1014 | |
1016 | - if (!hash->count) | |
1015 | + if (ftrace_hash_empty(hash)) | |
1017 | 1016 | return NULL; |
1018 | 1017 | |
1019 | 1018 | if (hash->size_bits > 0) |
... | ... | @@ -1157,7 +1156,7 @@ |
1157 | 1156 | return NULL; |
1158 | 1157 | |
1159 | 1158 | /* Empty hash? */ |
1160 | - if (!hash || !hash->count) | |
1159 | + if (ftrace_hash_empty(hash)) | |
1161 | 1160 | return new_hash; |
1162 | 1161 | |
1163 | 1162 | size = 1 << hash->size_bits; |
1164 | 1163 | |
... | ... | @@ -1282,9 +1281,9 @@ |
1282 | 1281 | filter_hash = rcu_dereference_raw(ops->filter_hash); |
1283 | 1282 | notrace_hash = rcu_dereference_raw(ops->notrace_hash); |
1284 | 1283 | |
1285 | - if ((!filter_hash || !filter_hash->count || | |
1284 | + if ((ftrace_hash_empty(filter_hash) || | |
1286 | 1285 | ftrace_lookup_ip(filter_hash, ip)) && |
1287 | - (!notrace_hash || !notrace_hash->count || | |
1286 | + (ftrace_hash_empty(notrace_hash) || | |
1288 | 1287 | !ftrace_lookup_ip(notrace_hash, ip))) |
1289 | 1288 | ret = 1; |
1290 | 1289 | else |
... | ... | @@ -1307,6 +1306,47 @@ |
1307 | 1306 | } \ |
1308 | 1307 | } |
1309 | 1308 | |
1309 | + | |
1310 | +static int ftrace_cmp_recs(const void *a, const void *b) | |
1311 | +{ | |
1312 | + const struct dyn_ftrace *reca = a; | |
1313 | + const struct dyn_ftrace *recb = b; | |
1314 | + | |
1315 | + if (reca->ip > recb->ip) | |
1316 | + return 1; | |
1317 | + if (reca->ip < recb->ip) | |
1318 | + return -1; | |
1319 | + return 0; | |
1320 | +} | |
1321 | + | |
1322 | +/** | |
1323 | + * ftrace_location - return true if the ip giving is a traced location | |
1324 | + * @ip: the instruction pointer to check | |
1325 | + * | |
1326 | + * Returns 1 if @ip given is a pointer to a ftrace location. | |
1327 | + * That is, the instruction that is either a NOP or call to | |
1328 | + * the function tracer. It checks the ftrace internal tables to | |
1329 | + * determine if the address belongs or not. | |
1330 | + */ | |
1331 | +int ftrace_location(unsigned long ip) | |
1332 | +{ | |
1333 | + struct ftrace_page *pg; | |
1334 | + struct dyn_ftrace *rec; | |
1335 | + struct dyn_ftrace key; | |
1336 | + | |
1337 | + key.ip = ip; | |
1338 | + | |
1339 | + for (pg = ftrace_pages_start; pg; pg = pg->next) { | |
1340 | + rec = bsearch(&key, pg->records, pg->index, | |
1341 | + sizeof(struct dyn_ftrace), | |
1342 | + ftrace_cmp_recs); | |
1343 | + if (rec) | |
1344 | + return 1; | |
1345 | + } | |
1346 | + | |
1347 | + return 0; | |
1348 | +} | |
1349 | + | |
1310 | 1350 | static void __ftrace_hash_rec_update(struct ftrace_ops *ops, |
1311 | 1351 | int filter_hash, |
1312 | 1352 | bool inc) |
... | ... | @@ -1336,7 +1376,7 @@ |
1336 | 1376 | if (filter_hash) { |
1337 | 1377 | hash = ops->filter_hash; |
1338 | 1378 | other_hash = ops->notrace_hash; |
1339 | - if (!hash || !hash->count) | |
1379 | + if (ftrace_hash_empty(hash)) | |
1340 | 1380 | all = 1; |
1341 | 1381 | } else { |
1342 | 1382 | inc = !inc; |
... | ... | @@ -1346,7 +1386,7 @@ |
1346 | 1386 | * If the notrace hash has no items, |
1347 | 1387 | * then there's nothing to do. |
1348 | 1388 | */ |
1349 | - if (hash && !hash->count) | |
1389 | + if (ftrace_hash_empty(hash)) | |
1350 | 1390 | return; |
1351 | 1391 | } |
1352 | 1392 | |
... | ... | @@ -1363,8 +1403,8 @@ |
1363 | 1403 | if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip)) |
1364 | 1404 | match = 1; |
1365 | 1405 | } else { |
1366 | - in_hash = hash && !!ftrace_lookup_ip(hash, rec->ip); | |
1367 | - in_other_hash = other_hash && !!ftrace_lookup_ip(other_hash, rec->ip); | |
1406 | + in_hash = !!ftrace_lookup_ip(hash, rec->ip); | |
1407 | + in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip); | |
1368 | 1408 | |
1369 | 1409 | /* |
1370 | 1410 | * |
... | ... | @@ -1372,7 +1412,7 @@ |
1372 | 1412 | if (filter_hash && in_hash && !in_other_hash) |
1373 | 1413 | match = 1; |
1374 | 1414 | else if (!filter_hash && in_hash && |
1375 | - (in_other_hash || !other_hash->count)) | |
1415 | + (in_other_hash || ftrace_hash_empty(other_hash))) | |
1376 | 1416 | match = 1; |
1377 | 1417 | } |
1378 | 1418 | if (!match) |
1379 | 1419 | |
1380 | 1420 | |
... | ... | @@ -1406,40 +1446,12 @@ |
1406 | 1446 | __ftrace_hash_rec_update(ops, filter_hash, 1); |
1407 | 1447 | } |
1408 | 1448 | |
1409 | -static void ftrace_free_rec(struct dyn_ftrace *rec) | |
1410 | -{ | |
1411 | - rec->freelist = ftrace_free_records; | |
1412 | - ftrace_free_records = rec; | |
1413 | - rec->flags |= FTRACE_FL_FREE; | |
1414 | -} | |
1415 | - | |
1416 | 1449 | static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) |
1417 | 1450 | { |
1418 | - struct dyn_ftrace *rec; | |
1419 | - | |
1420 | - /* First check for freed records */ | |
1421 | - if (ftrace_free_records) { | |
1422 | - rec = ftrace_free_records; | |
1423 | - | |
1424 | - if (unlikely(!(rec->flags & FTRACE_FL_FREE))) { | |
1425 | - FTRACE_WARN_ON_ONCE(1); | |
1426 | - ftrace_free_records = NULL; | |
1451 | + if (ftrace_pages->index == ftrace_pages->size) { | |
1452 | + /* We should have allocated enough */ | |
1453 | + if (WARN_ON(!ftrace_pages->next)) | |
1427 | 1454 | return NULL; |
1428 | - } | |
1429 | - | |
1430 | - ftrace_free_records = rec->freelist; | |
1431 | - memset(rec, 0, sizeof(*rec)); | |
1432 | - return rec; | |
1433 | - } | |
1434 | - | |
1435 | - if (ftrace_pages->index == ENTRIES_PER_PAGE) { | |
1436 | - if (!ftrace_pages->next) { | |
1437 | - /* allocate another page */ | |
1438 | - ftrace_pages->next = | |
1439 | - (void *)get_zeroed_page(GFP_KERNEL); | |
1440 | - if (!ftrace_pages->next) | |
1441 | - return NULL; | |
1442 | - } | |
1443 | 1455 | ftrace_pages = ftrace_pages->next; |
1444 | 1456 | } |
1445 | 1457 | |
... | ... | @@ -1459,8 +1471,6 @@ |
1459 | 1471 | return NULL; |
1460 | 1472 | |
1461 | 1473 | rec->ip = ip; |
1462 | - rec->newlist = ftrace_new_addrs; | |
1463 | - ftrace_new_addrs = rec; | |
1464 | 1474 | |
1465 | 1475 | return rec; |
1466 | 1476 | } |
... | ... | @@ -1475,7 +1485,19 @@ |
1475 | 1485 | printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); |
1476 | 1486 | } |
1477 | 1487 | |
1478 | -static void ftrace_bug(int failed, unsigned long ip) | |
1488 | +/** | |
1489 | + * ftrace_bug - report and shutdown function tracer | |
1490 | + * @failed: The failed type (EFAULT, EINVAL, EPERM) | |
1491 | + * @ip: The address that failed | |
1492 | + * | |
1493 | + * The arch code that enables or disables the function tracing | |
1494 | + * can call ftrace_bug() when it has detected a problem in | |
1495 | + * modifying the code. @failed should be one of either: | |
1496 | + * EFAULT - if the problem happens on reading the @ip address | |
1497 | + * EINVAL - if what is read at @ip is not what was expected | |
1498 | + * EPERM - if the problem happens on writting to the @ip address | |
1499 | + */ | |
1500 | +void ftrace_bug(int failed, unsigned long ip) | |
1479 | 1501 | { |
1480 | 1502 | switch (failed) { |
1481 | 1503 | case -EFAULT: |
1482 | 1504 | |
1483 | 1505 | |
1484 | 1506 | |
1485 | 1507 | |
... | ... | @@ -1517,24 +1539,19 @@ |
1517 | 1539 | return 0; |
1518 | 1540 | } |
1519 | 1541 | |
1520 | - | |
1521 | -static int | |
1522 | -__ftrace_replace_code(struct dyn_ftrace *rec, int enable) | |
1542 | +static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) | |
1523 | 1543 | { |
1524 | - unsigned long ftrace_addr; | |
1525 | 1544 | unsigned long flag = 0UL; |
1526 | 1545 | |
1527 | - ftrace_addr = (unsigned long)FTRACE_ADDR; | |
1528 | - | |
1529 | 1546 | /* |
1530 | - * If we are enabling tracing: | |
1547 | + * If we are updating calls: | |
1531 | 1548 | * |
1532 | 1549 | * If the record has a ref count, then we need to enable it |
1533 | 1550 | * because someone is using it. |
1534 | 1551 | * |
1535 | 1552 | * Otherwise we make sure its disabled. |
1536 | 1553 | * |
1537 | - * If we are disabling tracing, then disable all records that | |
1554 | + * If we are disabling calls, then disable all records that | |
1538 | 1555 | * are enabled. |
1539 | 1556 | */ |
1540 | 1557 | if (enable && (rec->flags & ~FTRACE_FL_MASK)) |
1541 | 1558 | |
1542 | 1559 | |
1543 | 1560 | |
1544 | 1561 | |
... | ... | @@ -1542,18 +1559,72 @@ |
1542 | 1559 | |
1543 | 1560 | /* If the state of this record hasn't changed, then do nothing */ |
1544 | 1561 | if ((rec->flags & FTRACE_FL_ENABLED) == flag) |
1545 | - return 0; | |
1562 | + return FTRACE_UPDATE_IGNORE; | |
1546 | 1563 | |
1547 | 1564 | if (flag) { |
1548 | - rec->flags |= FTRACE_FL_ENABLED; | |
1565 | + if (update) | |
1566 | + rec->flags |= FTRACE_FL_ENABLED; | |
1567 | + return FTRACE_UPDATE_MAKE_CALL; | |
1568 | + } | |
1569 | + | |
1570 | + if (update) | |
1571 | + rec->flags &= ~FTRACE_FL_ENABLED; | |
1572 | + | |
1573 | + return FTRACE_UPDATE_MAKE_NOP; | |
1574 | +} | |
1575 | + | |
1576 | +/** | |
1577 | + * ftrace_update_record, set a record that now is tracing or not | |
1578 | + * @rec: the record to update | |
1579 | + * @enable: set to 1 if the record is tracing, zero to force disable | |
1580 | + * | |
1581 | + * The records that represent all functions that can be traced need | |
1582 | + * to be updated when tracing has been enabled. | |
1583 | + */ | |
1584 | +int ftrace_update_record(struct dyn_ftrace *rec, int enable) | |
1585 | +{ | |
1586 | + return ftrace_check_record(rec, enable, 1); | |
1587 | +} | |
1588 | + | |
1589 | +/** | |
1590 | + * ftrace_test_record, check if the record has been enabled or not | |
1591 | + * @rec: the record to test | |
1592 | + * @enable: set to 1 to check if enabled, 0 if it is disabled | |
1593 | + * | |
1594 | + * The arch code may need to test if a record is already set to | |
1595 | + * tracing to determine how to modify the function code that it | |
1596 | + * represents. | |
1597 | + */ | |
1598 | +int ftrace_test_record(struct dyn_ftrace *rec, int enable) | |
1599 | +{ | |
1600 | + return ftrace_check_record(rec, enable, 0); | |
1601 | +} | |
1602 | + | |
1603 | +static int | |
1604 | +__ftrace_replace_code(struct dyn_ftrace *rec, int enable) | |
1605 | +{ | |
1606 | + unsigned long ftrace_addr; | |
1607 | + int ret; | |
1608 | + | |
1609 | + ftrace_addr = (unsigned long)FTRACE_ADDR; | |
1610 | + | |
1611 | + ret = ftrace_update_record(rec, enable); | |
1612 | + | |
1613 | + switch (ret) { | |
1614 | + case FTRACE_UPDATE_IGNORE: | |
1615 | + return 0; | |
1616 | + | |
1617 | + case FTRACE_UPDATE_MAKE_CALL: | |
1549 | 1618 | return ftrace_make_call(rec, ftrace_addr); |
1619 | + | |
1620 | + case FTRACE_UPDATE_MAKE_NOP: | |
1621 | + return ftrace_make_nop(NULL, rec, ftrace_addr); | |
1550 | 1622 | } |
1551 | 1623 | |
1552 | - rec->flags &= ~FTRACE_FL_ENABLED; | |
1553 | - return ftrace_make_nop(NULL, rec, ftrace_addr); | |
1624 | + return -1; /* unknow ftrace bug */ | |
1554 | 1625 | } |
1555 | 1626 | |
1556 | -static void ftrace_replace_code(int enable) | |
1627 | +static void ftrace_replace_code(int update) | |
1557 | 1628 | { |
1558 | 1629 | struct dyn_ftrace *rec; |
1559 | 1630 | struct ftrace_page *pg; |
... | ... | @@ -1563,11 +1634,7 @@ |
1563 | 1634 | return; |
1564 | 1635 | |
1565 | 1636 | do_for_each_ftrace_rec(pg, rec) { |
1566 | - /* Skip over free records */ | |
1567 | - if (rec->flags & FTRACE_FL_FREE) | |
1568 | - continue; | |
1569 | - | |
1570 | - failed = __ftrace_replace_code(rec, enable); | |
1637 | + failed = __ftrace_replace_code(rec, update); | |
1571 | 1638 | if (failed) { |
1572 |