Commit 1fb9341ac34825aa40354e74d9a2c69df7d2c304

Authored by Rusty Russell
1 parent 0d21b0e347

module: put modules in list much earlier.

Prarit's excellent bug report:
> In recent Fedora releases (F17 & F18) some users have reported seeing
> messages similar to
>
> [   15.478160] kvm: Could not allocate 304 bytes percpu data
> [   15.478174] PERCPU: allocation failed, size=304 align=32, alloc from
> reserved chunk failed
>
> during system boot.  In some cases, users have also reported seeing this
> message along with a failed load of other modules.
>
> What is happening is systemd is loading an instance of the kvm module for
> each cpu found (see commit e9bda3b).  When the module load occurs the kernel
> currently allocates the modules percpu data area prior to checking to see
> if the module is already loaded or is in the process of being loaded.  If
> the module is already loaded, or finishes load, the module loading code
> releases the current instance's module's percpu data.

Now we have a new state MODULE_STATE_UNFORMED, we can insert the
module into the list (and thus guarantee its uniqueness) before we
allocate the per-cpu region.

Reported-by: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Tested-by: Prarit Bhargava <prarit@redhat.com>

Showing 2 changed files with 50 additions and 41 deletions Side-by-side Diff

... ... @@ -3017,7 +3017,7 @@
3017 3017 bool ret;
3018 3018  
3019 3019 mutex_lock(&module_mutex);
3020   - mod = find_module(name);
  3020 + mod = find_module_all(name, true);
3021 3021 ret = !mod || mod->state == MODULE_STATE_LIVE
3022 3022 || mod->state == MODULE_STATE_GOING;
3023 3023 mutex_unlock(&module_mutex);
... ... @@ -3141,6 +3141,32 @@
3141 3141 goto free_copy;
3142 3142 }
3143 3143  
  3144 + /*
  3145 + * We try to place it in the list now to make sure it's unique
  3146 + * before we dedicate too many resources. In particular,
  3147 + * temporary percpu memory exhaustion.
  3148 + */
  3149 + mod->state = MODULE_STATE_UNFORMED;
  3150 +again:
  3151 + mutex_lock(&module_mutex);
  3152 + if ((old = find_module_all(mod->name, true)) != NULL) {
  3153 + if (old->state == MODULE_STATE_COMING
  3154 + || old->state == MODULE_STATE_UNFORMED) {
  3155 + /* Wait in case it fails to load. */
  3156 + mutex_unlock(&module_mutex);
  3157 + err = wait_event_interruptible(module_wq,
  3158 + finished_loading(mod->name));
  3159 + if (err)
  3160 + goto free_module;
  3161 + goto again;
  3162 + }
  3163 + err = -EEXIST;
  3164 + mutex_unlock(&module_mutex);
  3165 + goto free_module;
  3166 + }
  3167 + list_add_rcu(&mod->list, &modules);
  3168 + mutex_unlock(&module_mutex);
  3169 +
3144 3170 #ifdef CONFIG_MODULE_SIG
3145 3171 mod->sig_ok = info->sig_ok;
3146 3172 if (!mod->sig_ok)
... ... @@ -3150,7 +3176,7 @@
3150 3176 /* Now module is in final location, initialize linked lists, etc. */
3151 3177 err = module_unload_init(mod);
3152 3178 if (err)
3153   - goto free_module;
  3179 + goto unlink_mod;
3154 3180  
3155 3181 /* Now we've got everything in the final locations, we can
3156 3182 * find optional sections. */
3157 3183  
3158 3184  
3159 3185  
3160 3186  
3161 3187  
3162 3188  
... ... @@ -3185,54 +3211,33 @@
3185 3211 goto free_arch_cleanup;
3186 3212 }
3187 3213  
3188   - /* Mark state as coming so strong_try_module_get() ignores us. */
3189   - mod->state = MODULE_STATE_COMING;
3190   -
3191   - /* Now sew it into the lists so we can get lockdep and oops
3192   - * info during argument parsing. No one should access us, since
3193   - * strong_try_module_get() will fail.
3194   - * lockdep/oops can run asynchronous, so use the RCU list insertion
3195   - * function to insert in a way safe to concurrent readers.
3196   - * The mutex protects against concurrent writers.
3197   - */
3198   -again:
3199   - mutex_lock(&module_mutex);
3200   - if ((old = find_module(mod->name)) != NULL) {
3201   - if (old->state == MODULE_STATE_COMING) {
3202   - /* Wait in case it fails to load. */
3203   - mutex_unlock(&module_mutex);
3204   - err = wait_event_interruptible(module_wq,
3205   - finished_loading(mod->name));
3206   - if (err)
3207   - goto free_arch_cleanup;
3208   - goto again;
3209   - }
3210   - err = -EEXIST;
3211   - goto unlock;
3212   - }
3213   -
3214   - /* This has to be done once we're sure module name is unique. */
3215 3214 dynamic_debug_setup(info->debug, info->num_debug);
3216 3215  
3217   - /* Find duplicate symbols */
  3216 + mutex_lock(&module_mutex);
  3217 + /* Find duplicate symbols (must be called under lock). */
3218 3218 err = verify_export_symbols(mod);
3219 3219 if (err < 0)
3220   - goto ddebug;
  3220 + goto ddebug_cleanup;
3221 3221  
  3222 + /* This relies on module_mutex for list integrity. */
3222 3223 module_bug_finalize(info->hdr, info->sechdrs, mod);
3223   - list_add_rcu(&mod->list, &modules);
  3224 +
  3225 + /* Mark state as coming so strong_try_module_get() ignores us,
  3226 + * but kallsyms etc. can see us. */
  3227 + mod->state = MODULE_STATE_COMING;
  3228 +
3224 3229 mutex_unlock(&module_mutex);
3225 3230  
3226 3231 /* Module is ready to execute: parsing args may do that. */
3227 3232 err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
3228 3233 -32768, 32767, &ddebug_dyndbg_module_param_cb);
3229 3234 if (err < 0)
3230   - goto unlink;
  3235 + goto bug_cleanup;
3231 3236  
3232 3237 /* Link in to syfs. */
3233 3238 err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp);
3234 3239 if (err < 0)
3235   - goto unlink;
  3240 + goto bug_cleanup;
3236 3241  
3237 3242 /* Get rid of temporary copy. */
3238 3243 free_copy(info);
3239 3244  
3240 3245  
3241 3246  
... ... @@ -3242,16 +3247,13 @@
3242 3247  
3243 3248 return do_init_module(mod);
3244 3249  
3245   - unlink:
  3250 + bug_cleanup:
  3251 + /* module_bug_cleanup needs module_mutex protection */
3246 3252 mutex_lock(&module_mutex);
3247   - /* Unlink carefully: kallsyms could be walking list. */
3248   - list_del_rcu(&mod->list);
3249 3253 module_bug_cleanup(mod);
3250   - wake_up_all(&module_wq);
3251   - ddebug:
3252   - dynamic_debug_remove(info->debug);
3253   - unlock:
3254 3254 mutex_unlock(&module_mutex);
  3255 + ddebug_cleanup:
  3256 + dynamic_debug_remove(info->debug);
3255 3257 synchronize_sched();
3256 3258 kfree(mod->args);
3257 3259 free_arch_cleanup:
... ... @@ -3260,6 +3262,12 @@
3260 3262 free_modinfo(mod);
3261 3263 free_unload:
3262 3264 module_unload_free(mod);
  3265 + unlink_mod:
  3266 + mutex_lock(&module_mutex);
  3267 + /* Unlink carefully: kallsyms could be walking list. */
  3268 + list_del_rcu(&mod->list);
  3269 + wake_up_all(&module_wq);
  3270 + mutex_unlock(&module_mutex);
3263 3271 free_module:
3264 3272 module_deallocate(mod, info);
3265 3273 free_copy:
... ... @@ -55,6 +55,7 @@
55 55 }
56 56  
57 57 #ifdef CONFIG_MODULES
  58 +/* Updates are protected by module mutex */
58 59 static LIST_HEAD(module_bug_list);
59 60  
60 61 static const struct bug_entry *module_find_bug(unsigned long bugaddr)