Blame view
scripts/kallsyms.c
13 KB
1da177e4c
|
1 2 3 4 5 6 7 8 9 |
/* Generate assembler source containing symbol information * * Copyright 2002 by Kai Germaschewski * * This software may be used and distributed according to the terms * of the GNU General Public License, incorporated herein by reference. * * Usage: nm -n vmlinux | scripts/kallsyms [--all-symbols] > symbols.S * |
1da177e4c
|
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
* Table compression uses all the unused char codes on the symbols and * maps these to the most used substrings (tokens). For instance, it might * map char code 0xF7 to represent "write_" and then in every symbol where * "write_" appears it can be replaced by 0xF7, saving 5 bytes. * The used codes themselves are also placed in the table so that the * decompresion can work without "special cases". * Applied to kernel symbols, this usually produces a compression ratio * of about 50%. * */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> |
9281acea6
|
25 |
#define KSYM_NAME_LEN 128 |
1da177e4c
|
26 |
|
1da177e4c
|
27 28 |
struct sym_entry { unsigned long long addr; |
b3dbb4ecd
|
29 |
unsigned int len; |
f2df3f65d
|
30 |
unsigned int start_pos; |
1da177e4c
|
31 32 |
unsigned char *sym; }; |
1da177e4c
|
33 |
static struct sym_entry *table; |
b3dbb4ecd
|
34 |
static unsigned int table_size, table_cnt; |
a3b81113f
|
35 |
static unsigned long long _text, _stext, _etext, _sinittext, _einittext; |
1da177e4c
|
36 |
static int all_symbols = 0; |
41f11a4fa
|
37 |
static char symbol_prefix_char = '\0'; |
1da177e4c
|
38 |
|
b3dbb4ecd
|
39 |
int token_profit[0x10000]; |
1da177e4c
|
40 41 |
/* the table that holds the result of the compression */ |
b3dbb4ecd
|
42 |
unsigned char best_table[256][2]; |
1da177e4c
|
43 |
unsigned char best_table_len[256]; |
b3dbb4ecd
|
44 |
static void usage(void) |
1da177e4c
|
45 |
{ |
41f11a4fa
|
46 47 |
fprintf(stderr, "Usage: kallsyms [--all-symbols] [--symbol-prefix=<prefix char>] < in.map > out.S "); |
1da177e4c
|
48 49 50 51 52 53 54 |
exit(1); } /* * This ignores the intensely annoying "mapping symbols" found * in ARM ELF files: $a, $t and $d. */ |
b3dbb4ecd
|
55 |
static inline int is_arm_mapping_symbol(const char *str) |
1da177e4c
|
56 57 58 59 |
{ return str[0] == '$' && strchr("atd", str[1]) && (str[2] == '\0' || str[2] == '.'); } |
b3dbb4ecd
|
60 |
static int read_symbol(FILE *in, struct sym_entry *s) |
1da177e4c
|
61 62 |
{ char str[500]; |
b3dbb4ecd
|
63 |
char *sym, stype; |
1da177e4c
|
64 |
int rc; |
b3dbb4ecd
|
65 66 |
rc = fscanf(in, "%llx %c %499s ", &s->addr, &stype, str); |
1da177e4c
|
67 68 69 70 71 72 73 |
if (rc != 3) { if (rc != EOF) { /* skip line */ fgets(str, 500, in); } return -1; } |
41f11a4fa
|
74 75 76 77 |
sym = str; /* skip prefix char */ if (symbol_prefix_char && str[0] == symbol_prefix_char) sym++; |
1da177e4c
|
78 |
/* Ignore most absolute/undefined (?) symbols. */ |
fd593d127
|
79 80 81 |
if (strcmp(sym, "_text") == 0) _text = s->addr; else if (strcmp(sym, "_stext") == 0) |
1da177e4c
|
82 |
_stext = s->addr; |
41f11a4fa
|
83 |
else if (strcmp(sym, "_etext") == 0) |
1da177e4c
|
84 |
_etext = s->addr; |
41f11a4fa
|
85 |
else if (strcmp(sym, "_sinittext") == 0) |
1da177e4c
|
86 |
_sinittext = s->addr; |
41f11a4fa
|
87 |
else if (strcmp(sym, "_einittext") == 0) |
1da177e4c
|
88 |
_einittext = s->addr; |
b3dbb4ecd
|
89 |
else if (toupper(stype) == 'A') |
1da177e4c
|
90 91 |
{ /* Keep these useful absolute symbols */ |
41f11a4fa
|
92 93 94 95 |
if (strcmp(sym, "__kernel_syscall_via_break") && strcmp(sym, "__kernel_syscall_via_epc") && strcmp(sym, "__kernel_sigtramp") && strcmp(sym, "__gp")) |
1da177e4c
|
96 97 98 |
return -1; } |
b3dbb4ecd
|
99 |
else if (toupper(stype) == 'U' || |
41f11a4fa
|
100 |
is_arm_mapping_symbol(sym)) |
1da177e4c
|
101 |
return -1; |
6f00df24e
|
102 103 104 |
/* exclude also MIPS ELF local symbols ($L123 instead of .L123) */ else if (str[0] == '$') return -1; |
aab34ac85
|
105 106 107 |
/* exclude debugging symbols */ else if (stype == 'N') return -1; |
1da177e4c
|
108 109 110 111 |
/* include the type field in the symbol name, so that it gets * compressed together */ s->len = strlen(str) + 1; |
b3dbb4ecd
|
112 |
s->sym = malloc(s->len + 1); |
f1a136e0d
|
113 114 115 116 117 118 |
if (!s->sym) { fprintf(stderr, "kallsyms failure: " "unable to allocate required amount of memory "); exit(EXIT_FAILURE); } |
b3dbb4ecd
|
119 120 |
strcpy((char *)s->sym + 1, str); s->sym[0] = stype; |
1da177e4c
|
121 122 123 |
return 0; } |
b3dbb4ecd
|
124 |
static int symbol_valid(struct sym_entry *s) |
1da177e4c
|
125 126 |
{ /* Symbols which vary between passes. Passes 1 and 2 must have |
9bb482476
|
127 |
* identical symbol lists. |
1da177e4c
|
128 129 |
*/ static char *special_symbols[] = { |
1da177e4c
|
130 131 132 133 134 |
/* Exclude linker generated symbols which vary between passes */ "_SDA_BASE_", /* ppc */ "_SDA2_BASE_", /* ppc */ NULL }; int i; |
41f11a4fa
|
135 136 137 138 139 |
int offset = 1; /* skip prefix char */ if (symbol_prefix_char && *(s->sym + 1) == symbol_prefix_char) offset++; |
1da177e4c
|
140 141 142 143 144 |
/* if --all-symbols is not specified, then symbols outside the text * and inittext sections are discarded */ if (!all_symbols) { if ((s->addr < _stext || s->addr > _etext) |
a3b81113f
|
145 |
&& (s->addr < _sinittext || s->addr > _einittext)) |
1da177e4c
|
146 147 |
return 0; /* Corner case. Discard any symbols with the same value as |
a3b81113f
|
148 149 150 151 |
* _etext _einittext; they can move between pass 1 and 2 when * the kallsyms data are added. If these symbols move then * they may get dropped in pass 2, which breaks the kallsyms * rules. |
1da177e4c
|
152 |
*/ |
a3b81113f
|
153 154 155 156 |
if ((s->addr == _etext && strcmp((char *)s->sym + offset, "_etext")) || (s->addr == _einittext && strcmp((char *)s->sym + offset, "_einittext"))) |
1da177e4c
|
157 158 159 160 |
return 0; } /* Exclude symbols which vary between passes. */ |
9bb482476
|
161 162 163 |
if (strstr((char *)s->sym + offset, "_compiled.") || strncmp((char*)s->sym + offset, "__compound_literal.", 19) == 0 || strncmp((char*)s->sym + offset, "__compound_literal$", 19) == 0) |
1da177e4c
|
164 165 166 |
return 0; for (i = 0; special_symbols[i]; i++) |
b3dbb4ecd
|
167 |
if( strcmp((char *)s->sym + offset, special_symbols[i]) == 0 ) |
1da177e4c
|
168 169 170 171 |
return 0; return 1; } |
b3dbb4ecd
|
172 |
static void read_map(FILE *in) |
1da177e4c
|
173 174 |
{ while (!feof(in)) { |
b3dbb4ecd
|
175 176 177 |
if (table_cnt >= table_size) { table_size += 10000; table = realloc(table, sizeof(*table) * table_size); |
1da177e4c
|
178 179 180 181 182 183 |
if (!table) { fprintf(stderr, "out of memory "); exit (1); } } |
f2df3f65d
|
184 185 |
if (read_symbol(in, &table[table_cnt]) == 0) { table[table_cnt].start_pos = table_cnt; |
b3dbb4ecd
|
186 |
table_cnt++; |
f2df3f65d
|
187 |
} |
1da177e4c
|
188 189 190 191 192 |
} } static void output_label(char *label) { |
41f11a4fa
|
193 194 195 196 197 198 |
if (symbol_prefix_char) printf(".globl %c%s ", symbol_prefix_char, label); else printf(".globl %s ", label); |
1da177e4c
|
199 200 |
printf("\tALGN "); |
41f11a4fa
|
201 202 203 204 205 206 |
if (symbol_prefix_char) printf("%c%s: ", symbol_prefix_char, label); else printf("%s: ", label); |
1da177e4c
|
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 |
} /* uncompress a compressed symbol. When this function is called, the best table * might still be compressed itself, so the function needs to be recursive */ static int expand_symbol(unsigned char *data, int len, char *result) { int c, rlen, total=0; while (len) { c = *data; /* if the table holds a single char that is the same as the one * we are looking for, then end the search */ if (best_table[c][0]==c && best_table_len[c]==1) { *result++ = c; total++; } else { /* if not, recurse and expand */ rlen = expand_symbol(best_table[c], best_table_len[c], result); total += rlen; result += rlen; } data++; len--; } *result=0; return total; } |
b3dbb4ecd
|
235 |
static void write_src(void) |
1da177e4c
|
236 |
{ |
b3dbb4ecd
|
237 |
unsigned int i, k, off; |
1da177e4c
|
238 239 |
unsigned int best_idx[256]; unsigned int *markers; |
9281acea6
|
240 |
char buf[KSYM_NAME_LEN]; |
1da177e4c
|
241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 |
printf("#include <asm/types.h> "); printf("#if BITS_PER_LONG == 64 "); printf("#define PTR .quad "); printf("#define ALGN .align 8 "); printf("#else "); printf("#define PTR .long "); printf("#define ALGN .align 4 "); printf("#endif "); |
aad094701
|
258 259 |
printf("\t.section .rodata, \"a\" "); |
1da177e4c
|
260 |
|
fd593d127
|
261 262 263 264 265 266 267 |
/* Provide proper symbols relocatability by their '_text' * relativeness. The symbol names cannot be used to construct * normal symbol references as the list of symbols contains * symbols that are declared static and are private to their * .o files. This prevents .tmp_kallsyms.o or any other * object from referencing them. */ |
1da177e4c
|
268 |
output_label("kallsyms_addresses"); |
b3dbb4ecd
|
269 |
for (i = 0; i < table_cnt; i++) { |
fd593d127
|
270 |
if (toupper(table[i].sym[0]) != 'A') { |
2c22d8baa
|
271 272 273 274 275 276 277 278 |
if (_text <= table[i].addr) printf("\tPTR\t_text + %#llx ", table[i].addr - _text); else printf("\tPTR\t_text - %#llx ", _text - table[i].addr); |
fd593d127
|
279 280 281 282 |
} else { printf("\tPTR\t%#llx ", table[i].addr); } |
1da177e4c
|
283 284 285 286 287 |
} printf(" "); output_label("kallsyms_num_syms"); |
b3dbb4ecd
|
288 289 |
printf("\tPTR\t%d ", table_cnt); |
1da177e4c
|
290 291 292 293 294 |
printf(" "); /* table of offset markers, that give the offset in the compressed stream * every 256 symbols */ |
f1a136e0d
|
295 296 297 298 299 300 301 |
markers = malloc(sizeof(unsigned int) * ((table_cnt + 255) / 256)); if (!markers) { fprintf(stderr, "kallsyms failure: " "unable to allocate required memory "); exit(EXIT_FAILURE); } |
1da177e4c
|
302 303 |
output_label("kallsyms_names"); |
1da177e4c
|
304 |
off = 0; |
b3dbb4ecd
|
305 306 307 |
for (i = 0; i < table_cnt; i++) { if ((i & 0xFF) == 0) markers[i >> 8] = off; |
1da177e4c
|
308 309 310 311 312 313 314 315 |
printf("\t.byte 0x%02x", table[i].len); for (k = 0; k < table[i].len; k++) printf(", 0x%02x", table[i].sym[k]); printf(" "); off += table[i].len + 1; |
1da177e4c
|
316 317 318 319 320 |
} printf(" "); output_label("kallsyms_markers"); |
b3dbb4ecd
|
321 |
for (i = 0; i < ((table_cnt + 255) >> 8); i++) |
1da177e4c
|
322 323 324 325 326 327 328 329 330 331 332 |
printf("\tPTR\t%d ", markers[i]); printf(" "); free(markers); output_label("kallsyms_token_table"); off = 0; for (i = 0; i < 256; i++) { best_idx[i] = off; |
b3dbb4ecd
|
333 |
expand_symbol(best_table[i], best_table_len[i], buf); |
1da177e4c
|
334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 |
printf("\t.asciz\t\"%s\" ", buf); off += strlen(buf) + 1; } printf(" "); output_label("kallsyms_token_index"); for (i = 0; i < 256; i++) printf("\t.short\t%d ", best_idx[i]); printf(" "); } /* table lookup compression functions */ |
1da177e4c
|
351 352 353 354 355 356 |
/* count all the possible tokens in a symbol */ static void learn_symbol(unsigned char *symbol, int len) { int i; for (i = 0; i < len - 1; i++) |
b3dbb4ecd
|
357 |
token_profit[ symbol[i] + (symbol[i + 1] << 8) ]++; |
1da177e4c
|
358 359 360 361 362 363 364 365 |
} /* decrease the count for all the possible tokens in a symbol */ static void forget_symbol(unsigned char *symbol, int len) { int i; for (i = 0; i < len - 1; i++) |
b3dbb4ecd
|
366 |
token_profit[ symbol[i] + (symbol[i + 1] << 8) ]--; |
1da177e4c
|
367 |
} |
b3dbb4ecd
|
368 |
/* remove all the invalid symbols from the table and do the initial token count */ |
1da177e4c
|
369 370 |
static void build_initial_tok_table(void) { |
b3dbb4ecd
|
371 |
unsigned int i, pos; |
1da177e4c
|
372 |
|
b3dbb4ecd
|
373 374 |
pos = 0; for (i = 0; i < table_cnt; i++) { |
1da177e4c
|
375 |
if ( symbol_valid(&table[i]) ) { |
b3dbb4ecd
|
376 377 378 379 |
if (pos != i) table[pos] = table[i]; learn_symbol(table[pos].sym, table[pos].len); pos++; |
1da177e4c
|
380 |
} |
1da177e4c
|
381 |
} |
b3dbb4ecd
|
382 |
table_cnt = pos; |
1da177e4c
|
383 |
} |
7c5d249ad
|
384 385 386 387 388 389 390 391 392 393 |
static void *find_token(unsigned char *str, int len, unsigned char *token) { int i; for (i = 0; i < len - 1; i++) { if (str[i] == token[0] && str[i+1] == token[1]) return &str[i]; } return NULL; } |
1da177e4c
|
394 395 |
/* replace a given token in all the valid symbols. Use the sampled symbols * to update the counts */ |
b3dbb4ecd
|
396 |
static void compress_symbols(unsigned char *str, int idx) |
1da177e4c
|
397 |
{ |
b3dbb4ecd
|
398 399 |
unsigned int i, len, size; unsigned char *p1, *p2; |
1da177e4c
|
400 |
|
b3dbb4ecd
|
401 |
for (i = 0; i < table_cnt; i++) { |
1da177e4c
|
402 403 |
len = table[i].len; |
b3dbb4ecd
|
404 405 406 |
p1 = table[i].sym; /* find the token on the symbol */ |
7c5d249ad
|
407 |
p2 = find_token(p1, len, str); |
b3dbb4ecd
|
408 409 410 411 412 413 |
if (!p2) continue; /* decrease the counts for this symbol's tokens */ forget_symbol(table[i].sym, len); size = len; |
1da177e4c
|
414 415 |
do { |
b3dbb4ecd
|
416 417 418 419 420 421 422 423 |
*p2 = idx; p2++; size -= (p2 - p1); memmove(p2, p2 + 1, size); p1 = p2; len--; if (size < 2) break; |
1da177e4c
|
424 |
/* find the token on the symbol */ |
7c5d249ad
|
425 |
p2 = find_token(p1, size, str); |
1da177e4c
|
426 |
|
b3dbb4ecd
|
427 |
} while (p2); |
1da177e4c
|
428 |
|
b3dbb4ecd
|
429 |
table[i].len = len; |
1da177e4c
|
430 |
|
b3dbb4ecd
|
431 432 |
/* increase the counts for this symbol's new tokens */ learn_symbol(table[i].sym, len); |
1da177e4c
|
433 434 435 436 |
} } /* search the token with the maximum profit */ |
b3dbb4ecd
|
437 |
static int find_best_token(void) |
1da177e4c
|
438 |
{ |
b3dbb4ecd
|
439 |
int i, best, bestprofit; |
1da177e4c
|
440 441 |
bestprofit=-10000; |
b3dbb4ecd
|
442 |
best = 0; |
1da177e4c
|
443 |
|
b3dbb4ecd
|
444 445 446 447 |
for (i = 0; i < 0x10000; i++) { if (token_profit[i] > bestprofit) { best = i; bestprofit = token_profit[i]; |
1da177e4c
|
448 |
} |
1da177e4c
|
449 |
} |
1da177e4c
|
450 451 452 453 454 455 |
return best; } /* this is the core of the algorithm: calculate the "best" table */ static void optimize_result(void) { |
b3dbb4ecd
|
456 |
int i, best; |
1da177e4c
|
457 458 459 460 461 462 463 464 465 466 467 468 469 |
/* using the '\0' symbol last allows compress_symbols to use standard * fast string functions */ for (i = 255; i >= 0; i--) { /* if this table slot is empty (it is not used by an actual * original char code */ if (!best_table_len[i]) { /* find the token with the breates profit value */ best = find_best_token(); /* place it in the "best" table */ |
b3dbb4ecd
|
470 471 472 |
best_table_len[i] = 2; best_table[i][0] = best & 0xFF; best_table[i][1] = (best >> 8) & 0xFF; |
1da177e4c
|
473 474 |
/* replace this token in all the valid symbols */ |
b3dbb4ecd
|
475 |
compress_symbols(best_table[i], i); |
1da177e4c
|
476 477 478 479 480 481 482 |
} } } /* start by placing the symbols that are actually used on the table */ static void insert_real_symbols_in_table(void) { |
b3dbb4ecd
|
483 |
unsigned int i, j, c; |
1da177e4c
|
484 485 486 |
memset(best_table, 0, sizeof(best_table)); memset(best_table_len, 0, sizeof(best_table_len)); |
b3dbb4ecd
|
487 488 489 490 491 |
for (i = 0; i < table_cnt; i++) { for (j = 0; j < table[i].len; j++) { c = table[i].sym[j]; best_table[c][0]=c; best_table_len[c]=1; |
1da177e4c
|
492 493 494 495 496 497 |
} } } static void optimize_token_table(void) { |
1da177e4c
|
498 499 500 |
build_initial_tok_table(); insert_real_symbols_in_table(); |
41f11a4fa
|
501 |
/* When valid symbol is not registered, exit to error */ |
b3dbb4ecd
|
502 |
if (!table_cnt) { |
41f11a4fa
|
503 504 505 506 |
fprintf(stderr, "No valid symbol. "); exit(1); } |
1da177e4c
|
507 508 |
optimize_result(); } |
f2df3f65d
|
509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 |
static int compare_symbols(const void *a, const void *b) { const struct sym_entry *sa; const struct sym_entry *sb; int wa, wb; sa = a; sb = b; /* sort by address first */ if (sa->addr > sb->addr) return 1; if (sa->addr < sb->addr) return -1; /* sort by "weakness" type */ wa = (sa->sym[0] == 'w') || (sa->sym[0] == 'W'); wb = (sb->sym[0] == 'w') || (sb->sym[0] == 'W'); if (wa != wb) return wa - wb; /* sort by initial order, so that other symbols are left undisturbed */ return sa->start_pos - sb->start_pos; } static void sort_symbols(void) { qsort(table, table_cnt, sizeof(struct sym_entry), compare_symbols); } |
1da177e4c
|
538 |
|
b3dbb4ecd
|
539 |
int main(int argc, char **argv) |
1da177e4c
|
540 |
{ |
41f11a4fa
|
541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 |
if (argc >= 2) { int i; for (i = 1; i < argc; i++) { if(strcmp(argv[i], "--all-symbols") == 0) all_symbols = 1; else if (strncmp(argv[i], "--symbol-prefix=", 16) == 0) { char *p = &argv[i][16]; /* skip quote */ if ((*p == '"' && *(p+2) == '"') || (*p == '\'' && *(p+2) == '\'')) p++; symbol_prefix_char = *p; } else usage(); } } else if (argc != 1) |
1da177e4c
|
556 557 558 |
usage(); read_map(stdin); |
9bb482476
|
559 560 561 562 |
if (table_cnt) { sort_symbols(); optimize_token_table(); } |
1da177e4c
|
563 564 565 566 |
write_src(); return 0; } |