Commit 48fd11880b5ef04270be8a87d9a9a9ee2fdae338

Authored by Kevin Cernekee
Committed by Rusty Russell
1 parent 70b1e9161e

module: Fix performance regression on modules with large symbol tables

Looking at /proc/kallsyms, one starts to ponder whether all of the extra
strtab-related complexity in module.c is worth the memory savings.

Instead of making the add_kallsyms() loop even more complex, I tried the
other route of deleting the strmap logic and naively copying each string
into core_strtab with no consideration for consolidating duplicates.

Performance on an "already exists" insmod of nvidia.ko (runs
add_kallsyms() but does not actually initialize the module):

	Original scheme: 1.230s
	With naive copying: 0.058s

Extra space used: 35k (of a 408k module).

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
LKML-Reference: <73defb5e4bca04a6431392cc341112b1@localhost>

Showing 1 changed file with 21 additions and 44 deletions Side-by-side Diff

... ... @@ -138,7 +138,6 @@
138 138 unsigned long len;
139 139 Elf_Shdr *sechdrs;
140 140 char *secstrings, *strtab;
141   - unsigned long *strmap;
142 141 unsigned long symoffs, stroffs;
143 142 struct _ddebug *debug;
144 143 unsigned int num_debug;
145 144  
... ... @@ -2178,12 +2177,19 @@
2178 2177 return true;
2179 2178 }
2180 2179  
  2180 +/*
  2181 + * We only allocate and copy the strings needed by the parts of symtab
  2182 + * we keep. This is simple, but has the effect of making multiple
  2183 + * copies of duplicates. We could be more sophisticated, see
  2184 + * linux-kernel thread starting with
  2185 + * <73defb5e4bca04a6431392cc341112b1@localhost>.
  2186 + */
2181 2187 static void layout_symtab(struct module *mod, struct load_info *info)
2182 2188 {
2183 2189 Elf_Shdr *symsect = info->sechdrs + info->index.sym;
2184 2190 Elf_Shdr *strsect = info->sechdrs + info->index.str;
2185 2191 const Elf_Sym *src;
2186   - unsigned int i, nsrc, ndst;
  2192 + unsigned int i, nsrc, ndst, strtab_size;
2187 2193  
2188 2194 /* Put symbol section at end of init part of module. */
2189 2195 symsect->sh_flags |= SHF_ALLOC;
2190 2196  
2191 2197  
2192 2198  
... ... @@ -2194,38 +2200,23 @@
2194 2200 src = (void *)info->hdr + symsect->sh_offset;
2195 2201 nsrc = symsect->sh_size / sizeof(*src);
2196 2202  
2197   - /*
2198   - * info->strmap has a '1' bit for each byte of .strtab we want to
2199   - * keep resident in mod->core_strtab. Everything else in .strtab
2200   - * is unreferenced by the symbols in mod->core_symtab, and will be
2201   - * discarded when add_kallsyms() compacts the string table.
2202   - */
2203   - for (ndst = i = 1; i < nsrc; ++i, ++src)
  2203 + /* Compute total space required for the core symbols' strtab. */
  2204 + for (ndst = i = strtab_size = 1; i < nsrc; ++i, ++src)
2204 2205 if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) {
2205   - unsigned int j = src->st_name;
2206   -
2207   - while (!__test_and_set_bit(j, info->strmap)
2208   - && info->strtab[j])
2209   - ++j;
2210   - ++ndst;
  2206 + strtab_size += strlen(&info->strtab[src->st_name]) + 1;
  2207 + ndst++;
2211 2208 }
2212 2209  
2213 2210 /* Append room for core symbols at end of core part. */
2214 2211 info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
2215   - mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym);
  2212 + info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym);
  2213 + mod->core_size += strtab_size;
2216 2214  
2217 2215 /* Put string table section at end of init part of module. */
2218 2216 strsect->sh_flags |= SHF_ALLOC;
2219 2217 strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
2220 2218 info->index.str) | INIT_OFFSET_MASK;
2221 2219 DEBUGP("\t%s\n", info->secstrings + strsect->sh_name);
2222   -
2223   - /* Append room for core symbols' strings at end of core part. */
2224   - info->stroffs = mod->core_size;
2225   -
2226   - /* First strtab byte (and first symtab entry) are zeroes. */
2227   - __set_bit(0, info->strmap);
2228   - mod->core_size += bitmap_weight(info->strmap, strsect->sh_size);
2229 2220 }
2230 2221  
2231 2222 static void add_kallsyms(struct module *mod, const struct load_info *info)
2232 2223  
2233 2224  
2234 2225  
2235 2226  
... ... @@ -2246,22 +2237,19 @@
2246 2237 mod->symtab[i].st_info = elf_type(&mod->symtab[i], info);
2247 2238  
2248 2239 mod->core_symtab = dst = mod->module_core + info->symoffs;
  2240 + mod->core_strtab = s = mod->module_core + info->stroffs;
2249 2241 src = mod->symtab;
2250 2242 *dst = *src;
  2243 + *s++ = 0;
2251 2244 for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) {
2252 2245 if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum))
2253 2246 continue;
  2247 +
2254 2248 dst[ndst] = *src;
2255   - dst[ndst].st_name = bitmap_weight(info->strmap,
2256   - dst[ndst].st_name);
2257   - ++ndst;
  2249 + dst[ndst++].st_name = s - mod->core_strtab;
  2250 + s += strlcpy(s, &mod->strtab[src->st_name], KSYM_NAME_LEN) + 1;
2258 2251 }
2259 2252 mod->core_num_syms = ndst;
2260   -
2261   - mod->core_strtab = s = mod->module_core + info->stroffs;
2262   - for (*s = 0, i = 1; i < info->sechdrs[info->index.str].sh_size; ++i)
2263   - if (test_bit(i, info->strmap))
2264   - *++s = mod->strtab[i];
2265 2253 }
2266 2254 #else
2267 2255 static inline void layout_symtab(struct module *mod, struct load_info *info)
2268 2256  
2269 2257  
... ... @@ -2751,27 +2739,18 @@
2751 2739 this is done generically; there doesn't appear to be any
2752 2740 special cases for the architectures. */
2753 2741 layout_sections(mod, info);
2754   -
2755   - info->strmap = kzalloc(BITS_TO_LONGS(info->sechdrs[info->index.str].sh_size)
2756   - * sizeof(long), GFP_KERNEL);
2757   - if (!info->strmap) {
2758   - err = -ENOMEM;
2759   - goto free_percpu;
2760   - }
2761 2742 layout_symtab(mod, info);
2762 2743  
2763 2744 /* Allocate and move to the final place */
2764 2745 err = move_module(mod, info);
2765 2746 if (err)
2766   - goto free_strmap;
  2747 + goto free_percpu;
2767 2748  
2768 2749 /* Module has been copied to its final place now: return it. */
2769 2750 mod = (void *)info->sechdrs[info->index.mod].sh_addr;
2770 2751 kmemleak_load_module(mod, info);
2771 2752 return mod;
2772 2753  
2773   -free_strmap:
2774   - kfree(info->strmap);
2775 2754 free_percpu:
2776 2755 percpu_modfree(mod);
2777 2756 out:
... ... @@ -2781,7 +2760,6 @@
2781 2760 /* mod is no longer valid after this! */
2782 2761 static void module_deallocate(struct module *mod, struct load_info *info)
2783 2762 {
2784   - kfree(info->strmap);
2785 2763 percpu_modfree(mod);
2786 2764 module_free(mod, mod->module_init);
2787 2765 module_free(mod, mod->module_core);
... ... @@ -2911,8 +2889,7 @@
2911 2889 if (err < 0)
2912 2890 goto unlink;
2913 2891  
2914   - /* Get rid of temporary copy and strmap. */
2915   - kfree(info.strmap);
  2892 + /* Get rid of temporary copy. */
2916 2893 free_copy(&info);
2917 2894  
2918 2895 /* Done! */