Commit 48fd11880b5ef04270be8a87d9a9a9ee2fdae338
Committed by
Rusty Russell
1 parent
70b1e9161e
Exists in
master
and in
20 other branches
module: Fix performance regression on modules with large symbol tables
Looking at /proc/kallsyms, one starts to ponder whether all of the extra strtab-related complexity in module.c is worth the memory savings. Instead of making the add_kallsyms() loop even more complex, I tried the other route of deleting the strmap logic and naively copying each string into core_strtab with no consideration for consolidating duplicates. Performance on an "already exists" insmod of nvidia.ko (runs add_kallsyms() but does not actually initialize the module): Original scheme: 1.230s With naive copying: 0.058s Extra space used: 35k (of a 408k module). Signed-off-by: Kevin Cernekee <cernekee@gmail.com> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> LKML-Reference: <73defb5e4bca04a6431392cc341112b1@localhost>
Showing 1 changed file with 21 additions and 44 deletions Side-by-side Diff
kernel/module.c
... | ... | @@ -138,7 +138,6 @@ |
138 | 138 | unsigned long len; |
139 | 139 | Elf_Shdr *sechdrs; |
140 | 140 | char *secstrings, *strtab; |
141 | - unsigned long *strmap; | |
142 | 141 | unsigned long symoffs, stroffs; |
143 | 142 | struct _ddebug *debug; |
144 | 143 | unsigned int num_debug; |
145 | 144 | |
... | ... | @@ -2178,12 +2177,19 @@ |
2178 | 2177 | return true; |
2179 | 2178 | } |
2180 | 2179 | |
2180 | +/* | |
2181 | + * We only allocate and copy the strings needed by the parts of symtab | |
2182 | + * we keep. This is simple, but has the effect of making multiple | |
2183 | + * copies of duplicates. We could be more sophisticated, see | |
2184 | + * linux-kernel thread starting with | |
2185 | + * <73defb5e4bca04a6431392cc341112b1@localhost>. | |
2186 | + */ | |
2181 | 2187 | static void layout_symtab(struct module *mod, struct load_info *info) |
2182 | 2188 | { |
2183 | 2189 | Elf_Shdr *symsect = info->sechdrs + info->index.sym; |
2184 | 2190 | Elf_Shdr *strsect = info->sechdrs + info->index.str; |
2185 | 2191 | const Elf_Sym *src; |
2186 | - unsigned int i, nsrc, ndst; | |
2192 | + unsigned int i, nsrc, ndst, strtab_size; | |
2187 | 2193 | |
2188 | 2194 | /* Put symbol section at end of init part of module. */ |
2189 | 2195 | symsect->sh_flags |= SHF_ALLOC; |
2190 | 2196 | |
2191 | 2197 | |
2192 | 2198 | |
... | ... | @@ -2194,38 +2200,23 @@ |
2194 | 2200 | src = (void *)info->hdr + symsect->sh_offset; |
2195 | 2201 | nsrc = symsect->sh_size / sizeof(*src); |
2196 | 2202 | |
2197 | - /* | |
2198 | - * info->strmap has a '1' bit for each byte of .strtab we want to | |
2199 | - * keep resident in mod->core_strtab. Everything else in .strtab | |
2200 | - * is unreferenced by the symbols in mod->core_symtab, and will be | |
2201 | - * discarded when add_kallsyms() compacts the string table. | |
2202 | - */ | |
2203 | - for (ndst = i = 1; i < nsrc; ++i, ++src) | |
2203 | + /* Compute total space required for the core symbols' strtab. */ | |
2204 | + for (ndst = i = strtab_size = 1; i < nsrc; ++i, ++src) | |
2204 | 2205 | if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) { |
2205 | - unsigned int j = src->st_name; | |
2206 | - | |
2207 | - while (!__test_and_set_bit(j, info->strmap) | |
2208 | - && info->strtab[j]) | |
2209 | - ++j; | |
2210 | - ++ndst; | |
2206 | + strtab_size += strlen(&info->strtab[src->st_name]) + 1; | |
2207 | + ndst++; | |
2211 | 2208 | } |
2212 | 2209 | |
2213 | 2210 | /* Append room for core symbols at end of core part. */ |
2214 | 2211 | info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); |
2215 | - mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym); | |
2212 | + info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym); | |
2213 | + mod->core_size += strtab_size; | |
2216 | 2214 | |
2217 | 2215 | /* Put string table section at end of init part of module. */ |
2218 | 2216 | strsect->sh_flags |= SHF_ALLOC; |
2219 | 2217 | strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect, |
2220 | 2218 | info->index.str) | INIT_OFFSET_MASK; |
2221 | 2219 | DEBUGP("\t%s\n", info->secstrings + strsect->sh_name); |
2222 | - | |
2223 | - /* Append room for core symbols' strings at end of core part. */ | |
2224 | - info->stroffs = mod->core_size; | |
2225 | - | |
2226 | - /* First strtab byte (and first symtab entry) are zeroes. */ | |
2227 | - __set_bit(0, info->strmap); | |
2228 | - mod->core_size += bitmap_weight(info->strmap, strsect->sh_size); | |
2229 | 2220 | } |
2230 | 2221 | |
2231 | 2222 | static void add_kallsyms(struct module *mod, const struct load_info *info) |
2232 | 2223 | |
2233 | 2224 | |
2234 | 2225 | |
2235 | 2226 | |
... | ... | @@ -2246,22 +2237,19 @@ |
2246 | 2237 | mod->symtab[i].st_info = elf_type(&mod->symtab[i], info); |
2247 | 2238 | |
2248 | 2239 | mod->core_symtab = dst = mod->module_core + info->symoffs; |
2240 | + mod->core_strtab = s = mod->module_core + info->stroffs; | |
2249 | 2241 | src = mod->symtab; |
2250 | 2242 | *dst = *src; |
2243 | + *s++ = 0; | |
2251 | 2244 | for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) { |
2252 | 2245 | if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) |
2253 | 2246 | continue; |
2247 | + | |
2254 | 2248 | dst[ndst] = *src; |
2255 | - dst[ndst].st_name = bitmap_weight(info->strmap, | |
2256 | - dst[ndst].st_name); | |
2257 | - ++ndst; | |
2249 | + dst[ndst++].st_name = s - mod->core_strtab; | |
2250 | + s += strlcpy(s, &mod->strtab[src->st_name], KSYM_NAME_LEN) + 1; | |
2258 | 2251 | } |
2259 | 2252 | mod->core_num_syms = ndst; |
2260 | - | |
2261 | - mod->core_strtab = s = mod->module_core + info->stroffs; | |
2262 | - for (*s = 0, i = 1; i < info->sechdrs[info->index.str].sh_size; ++i) | |
2263 | - if (test_bit(i, info->strmap)) | |
2264 | - *++s = mod->strtab[i]; | |
2265 | 2253 | } |
2266 | 2254 | #else |
2267 | 2255 | static inline void layout_symtab(struct module *mod, struct load_info *info) |
2268 | 2256 | |
2269 | 2257 | |
... | ... | @@ -2751,27 +2739,18 @@ |
2751 | 2739 | this is done generically; there doesn't appear to be any |
2752 | 2740 | special cases for the architectures. */ |
2753 | 2741 | layout_sections(mod, info); |
2754 | - | |
2755 | - info->strmap = kzalloc(BITS_TO_LONGS(info->sechdrs[info->index.str].sh_size) | |
2756 | - * sizeof(long), GFP_KERNEL); | |
2757 | - if (!info->strmap) { | |
2758 | - err = -ENOMEM; | |
2759 | - goto free_percpu; | |
2760 | - } | |
2761 | 2742 | layout_symtab(mod, info); |
2762 | 2743 | |
2763 | 2744 | /* Allocate and move to the final place */ |
2764 | 2745 | err = move_module(mod, info); |
2765 | 2746 | if (err) |
2766 | - goto free_strmap; | |
2747 | + goto free_percpu; | |
2767 | 2748 | |
2768 | 2749 | /* Module has been copied to its final place now: return it. */ |
2769 | 2750 | mod = (void *)info->sechdrs[info->index.mod].sh_addr; |
2770 | 2751 | kmemleak_load_module(mod, info); |
2771 | 2752 | return mod; |
2772 | 2753 | |
2773 | -free_strmap: | |
2774 | - kfree(info->strmap); | |
2775 | 2754 | free_percpu: |
2776 | 2755 | percpu_modfree(mod); |
2777 | 2756 | out: |
... | ... | @@ -2781,7 +2760,6 @@ |
2781 | 2760 | /* mod is no longer valid after this! */ |
2782 | 2761 | static void module_deallocate(struct module *mod, struct load_info *info) |
2783 | 2762 | { |
2784 | - kfree(info->strmap); | |
2785 | 2763 | percpu_modfree(mod); |
2786 | 2764 | module_free(mod, mod->module_init); |
2787 | 2765 | module_free(mod, mod->module_core); |
... | ... | @@ -2911,8 +2889,7 @@ |
2911 | 2889 | if (err < 0) |
2912 | 2890 | goto unlink; |
2913 | 2891 | |
2914 | - /* Get rid of temporary copy and strmap. */ | |
2915 | - kfree(info.strmap); | |
2892 | + /* Get rid of temporary copy. */ | |
2916 | 2893 | free_copy(&info); |
2917 | 2894 | |
2918 | 2895 | /* Done! */ |