Commit 690a973f48b6ba2954465992c08e65059c8374fe

Authored by Jan Beulich
Committed by Andi Kleen
1 parent cdfce1f571

[PATCH] x86-64: Speed up dwarf2 unwinder

This changes the dwarf2 unwinder to do a binary search for CIEs
instead of a linear work. The linker is unfortunately not
able to build a proper lookup table at link time, instead it creates
one at runtime as soon as the bootmem allocator is usable (so you'll continue
using the linear lookup for the first [hopefully] few calls).
The code should be ready to utilize a build-time created table once
a fixed linker becomes available.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>

Showing 5 changed files with 299 additions and 39 deletions Side-by-side Diff

... ... @@ -499,6 +499,7 @@
499 499  
500 500 ifdef CONFIG_UNWIND_INFO
501 501 CFLAGS += -fasynchronous-unwind-tables
  502 +LDFLAGS_vmlinux += --eh-frame-hdr
502 503 endif
503 504  
504 505 ifdef CONFIG_DEBUG_INFO
include/asm-generic/vmlinux.lds.h
... ... @@ -125,6 +125,10 @@
125 125 *(__param) \
126 126 VMLINUX_SYMBOL(__stop___param) = .; \
127 127 } \
  128 + \
  129 + /* Unwind data binary search table */ \
  130 + EH_FRAME_HDR \
  131 + \
128 132 __end_rodata = .; \
129 133 . = ALIGN(4096);
130 134  
... ... @@ -156,6 +160,18 @@
156 160 VMLINUX_SYMBOL(__kprobes_text_start) = .; \
157 161 *(.kprobes.text) \
158 162 VMLINUX_SYMBOL(__kprobes_text_end) = .;
  163 +
  164 +#ifdef CONFIG_STACK_UNWIND
  165 + /* Unwind data binary search table */
  166 +#define EH_FRAME_HDR \
  167 + .eh_frame_hdr : AT(ADDR(.eh_frame_hdr) - LOAD_OFFSET) { \
  168 + VMLINUX_SYMBOL(__start_unwind_hdr) = .; \
  169 + *(.eh_frame_hdr) \
  170 + VMLINUX_SYMBOL(__end_unwind_hdr) = .; \
  171 + }
  172 +#else
  173 +#define EH_FRAME_HDR
  174 +#endif
159 175  
160 176 /* DWARF debug sections.
161 177 Symbols in the DWARF debugging sections are relative to
include/linux/unwind.h
... ... @@ -26,6 +26,7 @@
26 26 * Initialize unwind support.
27 27 */
28 28 extern void unwind_init(void);
  29 +extern void unwind_setup(void);
29 30  
30 31 #ifdef CONFIG_MODULES
31 32  
... ... @@ -73,6 +74,7 @@
73 74 struct unwind_frame_info {};
74 75  
75 76 static inline void unwind_init(void) {}
  77 +static inline void unwind_setup(void) {}
76 78  
77 79 #ifdef CONFIG_MODULES
78 80  
... ... @@ -503,6 +503,7 @@
503 503 printk(KERN_NOTICE);
504 504 printk(linux_banner);
505 505 setup_arch(&command_line);
  506 + unwind_setup();
506 507 setup_per_cpu_areas();
507 508 smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
508 509  
... ... @@ -11,13 +11,15 @@
11 11  
12 12 #include <linux/unwind.h>
13 13 #include <linux/module.h>
14   -#include <linux/delay.h>
  14 +#include <linux/bootmem.h>
  15 +#include <linux/sort.h>
15 16 #include <linux/stop_machine.h>
16 17 #include <asm/sections.h>
17 18 #include <asm/uaccess.h>
18 19 #include <asm/unaligned.h>
19 20  
20 21 extern char __start_unwind[], __end_unwind[];
  22 +extern const u8 __start_unwind_hdr[], __end_unwind_hdr[];
21 23  
22 24 #define MAX_STACK_DEPTH 8
23 25  
... ... @@ -100,6 +102,8 @@
100 102 } core, init;
101 103 const void *address;
102 104 unsigned long size;
  105 + const unsigned char *header;
  106 + unsigned long hdrsz;
103 107 struct unwind_table *link;
104 108 const char *name;
105 109 } root_table;
... ... @@ -145,6 +149,10 @@
145 149 return table;
146 150 }
147 151  
  152 +static unsigned long read_pointer(const u8 **pLoc,
  153 + const void *end,
  154 + signed ptrType);
  155 +
148 156 static void init_unwind_table(struct unwind_table *table,
149 157 const char *name,
150 158 const void *core_start,
151 159  
152 160  
... ... @@ -152,14 +160,30 @@
152 160 const void *init_start,
153 161 unsigned long init_size,
154 162 const void *table_start,
155   - unsigned long table_size)
  163 + unsigned long table_size,
  164 + const u8 *header_start,
  165 + unsigned long header_size)
156 166 {
  167 + const u8 *ptr = header_start + 4;
  168 + const u8 *end = header_start + header_size;
  169 +
157 170 table->core.pc = (unsigned long)core_start;
158 171 table->core.range = core_size;
159 172 table->init.pc = (unsigned long)init_start;
160 173 table->init.range = init_size;
161 174 table->address = table_start;
162 175 table->size = table_size;
  176 + /* See if the linker provided table looks valid. */
  177 + if (header_size <= 4
  178 + || header_start[0] != 1
  179 + || (void *)read_pointer(&ptr, end, header_start[1]) != table_start
  180 + || header_start[2] == DW_EH_PE_omit
  181 + || read_pointer(&ptr, end, header_start[2]) <= 0
  182 + || header_start[3] == DW_EH_PE_omit)
  183 + header_start = NULL;
  184 + table->hdrsz = header_size;
  185 + smp_wmb();
  186 + table->header = header_start;
163 187 table->link = NULL;
164 188 table->name = name;
165 189 }
166 190  
... ... @@ -169,9 +193,145 @@
169 193 init_unwind_table(&root_table, "kernel",
170 194 _text, _end - _text,
171 195 NULL, 0,
172   - __start_unwind, __end_unwind - __start_unwind);
  196 + __start_unwind, __end_unwind - __start_unwind,
  197 + __start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr);
173 198 }
174 199  
  200 +static const u32 bad_cie, not_fde;
  201 +static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *);
  202 +static signed fde_pointer_type(const u32 *cie);
  203 +
  204 +struct eh_frame_hdr_table_entry {
  205 + unsigned long start, fde;
  206 +};
  207 +
  208 +static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2)
  209 +{
  210 + const struct eh_frame_hdr_table_entry *e1 = p1;
  211 + const struct eh_frame_hdr_table_entry *e2 = p2;
  212 +
  213 + return (e1->start > e2->start) - (e1->start < e2->start);
  214 +}
  215 +
  216 +static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
  217 +{
  218 + struct eh_frame_hdr_table_entry *e1 = p1;
  219 + struct eh_frame_hdr_table_entry *e2 = p2;
  220 + unsigned long v;
  221 +
  222 + v = e1->start;
  223 + e1->start = e2->start;
  224 + e2->start = v;
  225 + v = e1->fde;
  226 + e1->fde = e2->fde;
  227 + e2->fde = v;
  228 +}
  229 +
  230 +static void __init setup_unwind_table(struct unwind_table *table,
  231 + void *(*alloc)(unsigned long))
  232 +{
  233 + const u8 *ptr;
  234 + unsigned long tableSize = table->size, hdrSize;
  235 + unsigned n;
  236 + const u32 *fde;
  237 + struct {
  238 + u8 version;
  239 + u8 eh_frame_ptr_enc;
  240 + u8 fde_count_enc;
  241 + u8 table_enc;
  242 + unsigned long eh_frame_ptr;
  243 + unsigned int fde_count;
  244 + struct eh_frame_hdr_table_entry table[];
  245 + } __attribute__((__packed__)) *header;
  246 +
  247 + if (table->header)
  248 + return;
  249 +
  250 + if (table->hdrsz)
  251 + printk(KERN_WARNING ".eh_frame_hdr for '%s' present but unusable\n",
  252 + table->name);
  253 +
  254 + if (tableSize & (sizeof(*fde) - 1))
  255 + return;
  256 +
  257 + for (fde = table->address, n = 0;
  258 + tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
  259 + tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
  260 + const u32 *cie = cie_for_fde(fde, table);
  261 + signed ptrType;
  262 +
  263 + if (cie == &not_fde)
  264 + continue;
  265 + if (cie == NULL
  266 + || cie == &bad_cie
  267 + || (ptrType = fde_pointer_type(cie)) < 0)
  268 + return;
  269 + ptr = (const u8 *)(fde + 2);
  270 + if (!read_pointer(&ptr,
  271 + (const u8 *)(fde + 1) + *fde,
  272 + ptrType))
  273 + return;
  274 + ++n;
  275 + }
  276 +
  277 + if (tableSize || !n)
  278 + return;
  279 +
  280 + hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int)
  281 + + 2 * n * sizeof(unsigned long);
  282 + header = alloc(hdrSize);
  283 + if (!header)
  284 + return;
  285 + header->version = 1;
  286 + header->eh_frame_ptr_enc = DW_EH_PE_abs|DW_EH_PE_native;
  287 + header->fde_count_enc = DW_EH_PE_abs|DW_EH_PE_data4;
  288 + header->table_enc = DW_EH_PE_abs|DW_EH_PE_native;
  289 + put_unaligned((unsigned long)table->address, &header->eh_frame_ptr);
  290 + BUILD_BUG_ON(offsetof(typeof(*header), fde_count)
  291 + % __alignof(typeof(header->fde_count)));
  292 + header->fde_count = n;
  293 +
  294 + BUILD_BUG_ON(offsetof(typeof(*header), table)
  295 + % __alignof(typeof(*header->table)));
  296 + for (fde = table->address, tableSize = table->size, n = 0;
  297 + tableSize;
  298 + tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
  299 + const u32 *cie = fde + 1 - fde[1] / sizeof(*fde);
  300 +
  301 + if (!fde[1])
  302 + continue; /* this is a CIE */
  303 + ptr = (const u8 *)(fde + 2);
  304 + header->table[n].start = read_pointer(&ptr,
  305 + (const u8 *)(fde + 1) + *fde,
  306 + fde_pointer_type(cie));
  307 + header->table[n].fde = (unsigned long)fde;
  308 + ++n;
  309 + }
  310 + WARN_ON(n != header->fde_count);
  311 +
  312 + sort(header->table,
  313 + n,
  314 + sizeof(*header->table),
  315 + cmp_eh_frame_hdr_table_entries,
  316 + swap_eh_frame_hdr_table_entries);
  317 +
  318 + table->hdrsz = hdrSize;
  319 + smp_wmb();
  320 + table->header = (const void *)header;
  321 +}
  322 +
  323 +static void *__init balloc(unsigned long sz)
  324 +{
  325 + return __alloc_bootmem_nopanic(sz,
  326 + sizeof(unsigned int),
  327 + __pa(MAX_DMA_ADDRESS));
  328 +}
  329 +
  330 +void __init unwind_setup(void)
  331 +{
  332 + setup_unwind_table(&root_table, balloc);
  333 +}
  334 +
175 335 #ifdef CONFIG_MODULES
176 336  
177 337 static struct unwind_table *last_table;
... ... @@ -193,7 +353,8 @@
193 353 init_unwind_table(table, module->name,
194 354 module->module_core, module->core_size,
195 355 module->module_init, module->init_size,
196   - table_start, table_size);
  356 + table_start, table_size,
  357 + NULL, 0);
197 358  
198 359 if (last_table)
199 360 last_table->link = table;
... ... @@ -303,6 +464,26 @@
303 464 return value;
304 465 }
305 466  
  467 +static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table)
  468 +{
  469 + const u32 *cie;
  470 +
  471 + if (!*fde || (*fde & (sizeof(*fde) - 1)))
  472 + return &bad_cie;
  473 + if (!fde[1])
  474 + return &not_fde; /* this is a CIE */
  475 + if ((fde[1] & (sizeof(*fde) - 1))
  476 + || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address)
  477 + return NULL; /* this is not a valid FDE */
  478 + cie = fde + 1 - fde[1] / sizeof(*fde);
  479 + if (*cie <= sizeof(*cie) + 4
  480 + || *cie >= fde[1] - sizeof(*fde)
  481 + || (*cie & (sizeof(*cie) - 1))
  482 + || cie[1])
  483 + return NULL; /* this is not a (valid) CIE */
  484 + return cie;
  485 +}
  486 +
306 487 static unsigned long read_pointer(const u8 **pLoc,
307 488 const void *end,
308 489 signed ptrType)
309 490  
310 491  
311 492  
312 493  
... ... @@ -610,49 +791,108 @@
610 791 unsigned i;
611 792 signed ptrType = -1;
612 793 uleb128_t retAddrReg = 0;
613   - struct unwind_table *table;
  794 + const struct unwind_table *table;
614 795 struct unwind_state state;
615 796  
616 797 if (UNW_PC(frame) == 0)
617 798 return -EINVAL;
618 799 if ((table = find_table(pc)) != NULL
619 800 && !(table->size & (sizeof(*fde) - 1))) {
620   - unsigned long tableSize = table->size;
  801 + const u8 *hdr = table->header;
  802 + unsigned long tableSize;
621 803  
622   - for (fde = table->address;
623   - tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
624   - tableSize -= sizeof(*fde) + *fde,
625   - fde += 1 + *fde / sizeof(*fde)) {
626   - if (!*fde || (*fde & (sizeof(*fde) - 1)))
627   - break;
628   - if (!fde[1])
629   - continue; /* this is a CIE */
630   - if ((fde[1] & (sizeof(*fde) - 1))
631   - || fde[1] > (unsigned long)(fde + 1)
632   - - (unsigned long)table->address)
633   - continue; /* this is not a valid FDE */
634   - cie = fde + 1 - fde[1] / sizeof(*fde);
635   - if (*cie <= sizeof(*cie) + 4
636   - || *cie >= fde[1] - sizeof(*fde)
637   - || (*cie & (sizeof(*cie) - 1))
638   - || cie[1]
639   - || (ptrType = fde_pointer_type(cie)) < 0) {
640   - cie = NULL; /* this is not a (valid) CIE */
641   - continue;
  804 + smp_rmb();
  805 + if (hdr && hdr[0] == 1) {
  806 + switch(hdr[3] & DW_EH_PE_FORM) {
  807 + case DW_EH_PE_native: tableSize = sizeof(unsigned long); break;
  808 + case DW_EH_PE_data2: tableSize = 2; break;
  809 + case DW_EH_PE_data4: tableSize = 4; break;
  810 + case DW_EH_PE_data8: tableSize = 8; break;
  811 + default: tableSize = 0; break;
642 812 }
  813 + ptr = hdr + 4;
  814 + end = hdr + table->hdrsz;
  815 + if (tableSize
  816 + && read_pointer(&ptr, end, hdr[1])
  817 + == (unsigned long)table->address
  818 + && (i = read_pointer(&ptr, end, hdr[2])) > 0
  819 + && i == (end - ptr) / (2 * tableSize)
  820 + && !((end - ptr) % (2 * tableSize))) {
  821 + do {
  822 + const u8 *cur = ptr + (i / 2) * (2 * tableSize);
  823 +
  824 + startLoc = read_pointer(&cur,
  825 + cur + tableSize,
  826 + hdr[3]);
  827 + if (pc < startLoc)
  828 + i /= 2;
  829 + else {
  830 + ptr = cur - tableSize;
  831 + i = (i + 1) / 2;
  832 + }
  833 + } while (startLoc && i > 1);
  834 + if (i == 1
  835 + && (startLoc = read_pointer(&ptr,
  836 + ptr + tableSize,
  837 + hdr[3])) != 0
  838 + && pc >= startLoc)
  839 + fde = (void *)read_pointer(&ptr,
  840 + ptr + tableSize,
  841 + hdr[3]);
  842 + }
  843 + }
  844 +
  845 + if (fde != NULL) {
  846 + cie = cie_for_fde(fde, table);
643 847 ptr = (const u8 *)(fde + 2);
644   - startLoc = read_pointer(&ptr,
645   - (const u8 *)(fde + 1) + *fde,
646   - ptrType);
647   - endLoc = startLoc
648   - + read_pointer(&ptr,
649   - (const u8 *)(fde + 1) + *fde,
650   - ptrType & DW_EH_PE_indirect
651   - ? ptrType
652   - : ptrType & (DW_EH_PE_FORM|DW_EH_PE_signed));
653   - if (pc >= startLoc && pc < endLoc)
654   - break;
655   - cie = NULL;
  848 + if(cie != NULL
  849 + && cie != &bad_cie
  850 + && cie != &not_fde
  851 + && (ptrType = fde_pointer_type(cie)) >= 0
  852 + && read_pointer(&ptr,
  853 + (const u8 *)(fde + 1) + *fde,
  854 + ptrType) == startLoc) {
  855 + if (!(ptrType & DW_EH_PE_indirect))
  856 + ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
  857 + endLoc = startLoc
  858 + + read_pointer(&ptr,
  859 + (const u8 *)(fde + 1) + *fde,
  860 + ptrType);
  861 + if(pc >= endLoc)
  862 + fde = NULL;
  863 + } else
  864 + fde = NULL;
  865 + }
  866 + if (fde == NULL) {
  867 + for (fde = table->address, tableSize = table->size;
  868 + cie = NULL, tableSize > sizeof(*fde)
  869 + && tableSize - sizeof(*fde) >= *fde;
  870 + tableSize -= sizeof(*fde) + *fde,
  871 + fde += 1 + *fde / sizeof(*fde)) {
  872 + cie = cie_for_fde(fde, table);
  873 + if (cie == &bad_cie) {
  874 + cie = NULL;
  875 + break;
  876 + }
  877 + if (cie == NULL
  878 + || cie == &not_fde
  879 + || (ptrType = fde_pointer_type(cie)) < 0)
  880 + continue;
  881 + ptr = (const u8 *)(fde + 2);
  882 + startLoc = read_pointer(&ptr,
  883 + (const u8 *)(fde + 1) + *fde,
  884 + ptrType);
  885 + if (!startLoc)
  886 + continue;
  887 + if (!(ptrType & DW_EH_PE_indirect))
  888 + ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
  889 + endLoc = startLoc
  890 + + read_pointer(&ptr,
  891 + (const u8 *)(fde + 1) + *fde,
  892 + ptrType);
  893 + if (pc >= startLoc && pc < endLoc)
  894 + break;
  895 + }
656 896 }
657 897 }
658 898 if (cie != NULL) {