Commit 3fc1f1e27a5b807791d72e5d992aa33b668a6626
1 parent
1142d81029
Exists in
master
and in
20 other branches
stop_machine: reimplement using cpu_stop
Reimplement stop_machine using cpu_stop. As cpu stoppers are guaranteed to be available for all online cpus, stop_machine_create/destroy() are no longer necessary and removed. With resource management and synchronization handled by cpu_stop, the new implementation is much simpler. Asking the cpu_stop to execute the stop_cpu() state machine on all online cpus with cpu hotplug disabled is enough. stop_machine itself doesn't need to manage any global resources anymore, so all per-instance information is rolled into struct stop_machine_data and the mutex and all static data variables are removed. The previous implementation created and destroyed RT workqueues as necessary which made stop_machine() calls highly expensive on very large machines. According to Dimitri Sivanich, preventing the dynamic creation/destruction makes booting faster more than twice on very large machines. cpu_stop resources are preallocated for all online cpus and should have the same effect. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Rusty Russell <rusty@rustcorp.com.au> Acked-by: Peter Zijlstra <peterz@infradead.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Dimitri Sivanich <sivanich@sgi.com>
Showing 6 changed files with 42 additions and 173 deletions Side-by-side Diff
arch/s390/kernel/time.c
drivers/xen/manage.c
... | ... | @@ -80,12 +80,6 @@ |
80 | 80 | |
81 | 81 | shutting_down = SHUTDOWN_SUSPEND; |
82 | 82 | |
83 | - err = stop_machine_create(); | |
84 | - if (err) { | |
85 | - printk(KERN_ERR "xen suspend: failed to setup stop_machine %d\n", err); | |
86 | - goto out; | |
87 | - } | |
88 | - | |
89 | 83 | #ifdef CONFIG_PREEMPT |
90 | 84 | /* If the kernel is preemptible, we need to freeze all the processes |
91 | 85 | to prevent them from being in the middle of a pagetable update |
... | ... | @@ -93,7 +87,7 @@ |
93 | 87 | err = freeze_processes(); |
94 | 88 | if (err) { |
95 | 89 | printk(KERN_ERR "xen suspend: freeze failed %d\n", err); |
96 | - goto out_destroy_sm; | |
90 | + goto out; | |
97 | 91 | } |
98 | 92 | #endif |
99 | 93 | |
100 | 94 | |
... | ... | @@ -136,12 +130,8 @@ |
136 | 130 | out_thaw: |
137 | 131 | #ifdef CONFIG_PREEMPT |
138 | 132 | thaw_processes(); |
139 | - | |
140 | -out_destroy_sm: | |
141 | -#endif | |
142 | - stop_machine_destroy(); | |
143 | - | |
144 | 133 | out: |
134 | +#endif | |
145 | 135 | shutting_down = SHUTDOWN_INVALID; |
146 | 136 | } |
147 | 137 | #endif /* CONFIG_PM_SLEEP */ |
include/linux/stop_machine.h
... | ... | @@ -67,23 +67,6 @@ |
67 | 67 | */ |
68 | 68 | int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); |
69 | 69 | |
70 | -/** | |
71 | - * stop_machine_create: create all stop_machine threads | |
72 | - * | |
73 | - * Description: This causes all stop_machine threads to be created before | |
74 | - * stop_machine actually gets called. This can be used by subsystems that | |
75 | - * need a non failing stop_machine infrastructure. | |
76 | - */ | |
77 | -int stop_machine_create(void); | |
78 | - | |
79 | -/** | |
80 | - * stop_machine_destroy: destroy all stop_machine threads | |
81 | - * | |
82 | - * Description: This causes all stop_machine threads which were created with | |
83 | - * stop_machine_create to be destroyed again. | |
84 | - */ | |
85 | -void stop_machine_destroy(void); | |
86 | - | |
87 | 70 | #else |
88 | 71 | |
89 | 72 | static inline int stop_machine(int (*fn)(void *), void *data, |
... | ... | @@ -95,9 +78,6 @@ |
95 | 78 | local_irq_enable(); |
96 | 79 | return ret; |
97 | 80 | } |
98 | - | |
99 | -static inline int stop_machine_create(void) { return 0; } | |
100 | -static inline void stop_machine_destroy(void) { } | |
101 | 81 | |
102 | 82 | #endif /* CONFIG_SMP */ |
103 | 83 | #endif /* _LINUX_STOP_MACHINE */ |
kernel/cpu.c
... | ... | @@ -266,9 +266,6 @@ |
266 | 266 | { |
267 | 267 | int err; |
268 | 268 | |
269 | - err = stop_machine_create(); | |
270 | - if (err) | |
271 | - return err; | |
272 | 269 | cpu_maps_update_begin(); |
273 | 270 | |
274 | 271 | if (cpu_hotplug_disabled) { |
... | ... | @@ -280,7 +277,6 @@ |
280 | 277 | |
281 | 278 | out: |
282 | 279 | cpu_maps_update_done(); |
283 | - stop_machine_destroy(); | |
284 | 280 | return err; |
285 | 281 | } |
286 | 282 | EXPORT_SYMBOL(cpu_down); |
... | ... | @@ -361,9 +357,6 @@ |
361 | 357 | { |
362 | 358 | int cpu, first_cpu, error; |
363 | 359 | |
364 | - error = stop_machine_create(); | |
365 | - if (error) | |
366 | - return error; | |
367 | 360 | cpu_maps_update_begin(); |
368 | 361 | first_cpu = cpumask_first(cpu_online_mask); |
369 | 362 | /* |
... | ... | @@ -394,7 +387,6 @@ |
394 | 387 | printk(KERN_ERR "Non-boot CPUs are not disabled\n"); |
395 | 388 | } |
396 | 389 | cpu_maps_update_done(); |
397 | - stop_machine_destroy(); | |
398 | 390 | return error; |
399 | 391 | } |
400 | 392 |
kernel/module.c
... | ... | @@ -723,17 +723,9 @@ |
723 | 723 | return -EFAULT; |
724 | 724 | name[MODULE_NAME_LEN-1] = '\0'; |
725 | 725 | |
726 | - /* Create stop_machine threads since free_module relies on | |
727 | - * a non-failing stop_machine call. */ | |
728 | - ret = stop_machine_create(); | |
729 | - if (ret) | |
730 | - return ret; | |
726 | + if (mutex_lock_interruptible(&module_mutex) != 0) | |
727 | + return -EINTR; | |
731 | 728 | |
732 | - if (mutex_lock_interruptible(&module_mutex) != 0) { | |
733 | - ret = -EINTR; | |
734 | - goto out_stop; | |
735 | - } | |
736 | - | |
737 | 729 | mod = find_module(name); |
738 | 730 | if (!mod) { |
739 | 731 | ret = -ENOENT; |
... | ... | @@ -792,8 +784,6 @@ |
792 | 784 | |
793 | 785 | out: |
794 | 786 | mutex_unlock(&module_mutex); |
795 | -out_stop: | |
796 | - stop_machine_destroy(); | |
797 | 787 | return ret; |
798 | 788 | } |
799 | 789 |
kernel/stop_machine.c
... | ... | @@ -388,174 +388,92 @@ |
388 | 388 | /* Exit */ |
389 | 389 | STOPMACHINE_EXIT, |
390 | 390 | }; |
391 | -static enum stopmachine_state state; | |
392 | 391 | |
393 | 392 | struct stop_machine_data { |
394 | - int (*fn)(void *); | |
395 | - void *data; | |
396 | - int fnret; | |
393 | + int (*fn)(void *); | |
394 | + void *data; | |
395 | + /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ | |
396 | + unsigned int num_threads; | |
397 | + const struct cpumask *active_cpus; | |
398 | + | |
399 | + enum stopmachine_state state; | |
400 | + atomic_t thread_ack; | |
397 | 401 | }; |
398 | 402 | |
399 | -/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ | |
400 | -static unsigned int num_threads; | |
401 | -static atomic_t thread_ack; | |
402 | -static DEFINE_MUTEX(lock); | |
403 | -/* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */ | |
404 | -static DEFINE_MUTEX(setup_lock); | |
405 | -/* Users of stop_machine. */ | |
406 | -static int refcount; | |
407 | -static struct workqueue_struct *stop_machine_wq; | |
408 | -static struct stop_machine_data active, idle; | |
409 | -static const struct cpumask *active_cpus; | |
410 | -static void __percpu *stop_machine_work; | |
411 | - | |
412 | -static void set_state(enum stopmachine_state newstate) | |
403 | +static void set_state(struct stop_machine_data *smdata, | |
404 | + enum stopmachine_state newstate) | |
413 | 405 | { |
414 | 406 | /* Reset ack counter. */ |
415 | - atomic_set(&thread_ack, num_threads); | |
407 | + atomic_set(&smdata->thread_ack, smdata->num_threads); | |
416 | 408 | smp_wmb(); |
417 | - state = newstate; | |
409 | + smdata->state = newstate; | |
418 | 410 | } |
419 | 411 | |
420 | 412 | /* Last one to ack a state moves to the next state. */ |
421 | -static void ack_state(void) | |
413 | +static void ack_state(struct stop_machine_data *smdata) | |
422 | 414 | { |
423 | - if (atomic_dec_and_test(&thread_ack)) | |
424 | - set_state(state + 1); | |
415 | + if (atomic_dec_and_test(&smdata->thread_ack)) | |
416 | + set_state(smdata, smdata->state + 1); | |
425 | 417 | } |
426 | 418 | |
427 | -/* This is the actual function which stops the CPU. It runs | |
428 | - * in the context of a dedicated stopmachine workqueue. */ | |
429 | -static void stop_cpu(struct work_struct *unused) | |
419 | +/* This is the cpu_stop function which stops the CPU. */ | |
420 | +static int stop_machine_cpu_stop(void *data) | |
430 | 421 | { |
422 | + struct stop_machine_data *smdata = data; | |
431 | 423 | enum stopmachine_state curstate = STOPMACHINE_NONE; |
432 | - struct stop_machine_data *smdata = &idle; | |
433 | - int cpu = smp_processor_id(); | |
434 | - int err; | |
424 | + int cpu = smp_processor_id(), err = 0; | |
425 | + bool is_active; | |
435 | 426 | |
436 | - if (!active_cpus) { | |
437 | - if (cpu == cpumask_first(cpu_online_mask)) | |
438 | - smdata = &active; | |
439 | - } else { | |
440 | - if (cpumask_test_cpu(cpu, active_cpus)) | |
441 | - smdata = &active; | |
442 | - } | |
427 | + if (!smdata->active_cpus) | |
428 | + is_active = cpu == cpumask_first(cpu_online_mask); | |
429 | + else | |
430 | + is_active = cpumask_test_cpu(cpu, smdata->active_cpus); | |
431 | + | |
443 | 432 | /* Simple state machine */ |
444 | 433 | do { |
445 | 434 | /* Chill out and ensure we re-read stopmachine_state. */ |
446 | 435 | cpu_relax(); |
447 | - if (state != curstate) { | |
448 | - curstate = state; | |
436 | + if (smdata->state != curstate) { | |
437 | + curstate = smdata->state; | |
449 | 438 | switch (curstate) { |
450 | 439 | case STOPMACHINE_DISABLE_IRQ: |
451 | 440 | local_irq_disable(); |
452 | 441 | hard_irq_disable(); |
453 | 442 | break; |
454 | 443 | case STOPMACHINE_RUN: |
455 | - /* On multiple CPUs only a single error code | |
456 | - * is needed to tell that something failed. */ | |
457 | - err = smdata->fn(smdata->data); | |
458 | - if (err) | |
459 | - smdata->fnret = err; | |
444 | + if (is_active) | |
445 | + err = smdata->fn(smdata->data); | |
460 | 446 | break; |
461 | 447 | default: |
462 | 448 | break; |
463 | 449 | } |
464 | - ack_state(); | |
450 | + ack_state(smdata); | |
465 | 451 | } |
466 | 452 | } while (curstate != STOPMACHINE_EXIT); |
467 | 453 | |
468 | 454 | local_irq_enable(); |
455 | + return err; | |
469 | 456 | } |
470 | 457 | |
471 | -/* Callback for CPUs which aren't supposed to do anything. */ | |
472 | -static int chill(void *unused) | |
473 | -{ | |
474 | - return 0; | |
475 | -} | |
476 | - | |
477 | -int stop_machine_create(void) | |
478 | -{ | |
479 | - mutex_lock(&setup_lock); | |
480 | - if (refcount) | |
481 | - goto done; | |
482 | - stop_machine_wq = create_rt_workqueue("kstop"); | |
483 | - if (!stop_machine_wq) | |
484 | - goto err_out; | |
485 | - stop_machine_work = alloc_percpu(struct work_struct); | |
486 | - if (!stop_machine_work) | |
487 | - goto err_out; | |
488 | -done: | |
489 | - refcount++; | |
490 | - mutex_unlock(&setup_lock); | |
491 | - return 0; | |
492 | - | |
493 | -err_out: | |
494 | - if (stop_machine_wq) | |
495 | - destroy_workqueue(stop_machine_wq); | |
496 | - mutex_unlock(&setup_lock); | |
497 | - return -ENOMEM; | |
498 | -} | |
499 | -EXPORT_SYMBOL_GPL(stop_machine_create); | |
500 | - | |
501 | -void stop_machine_destroy(void) | |
502 | -{ | |
503 | - mutex_lock(&setup_lock); | |
504 | - refcount--; | |
505 | - if (refcount) | |
506 | - goto done; | |
507 | - destroy_workqueue(stop_machine_wq); | |
508 | - free_percpu(stop_machine_work); | |
509 | -done: | |
510 | - mutex_unlock(&setup_lock); | |
511 | -} | |
512 | -EXPORT_SYMBOL_GPL(stop_machine_destroy); | |
513 | - | |
514 | 458 | int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) |
515 | 459 | { |
516 | - struct work_struct *sm_work; | |
517 | - int i, ret; | |
460 | + struct stop_machine_data smdata = { .fn = fn, .data = data, | |
461 | + .num_threads = num_online_cpus(), | |
462 | + .active_cpus = cpus }; | |
518 | 463 | |
519 | - /* Set up initial state. */ | |
520 | - mutex_lock(&lock); | |
521 | - num_threads = num_online_cpus(); | |
522 | - active_cpus = cpus; | |
523 | - active.fn = fn; | |
524 | - active.data = data; | |
525 | - active.fnret = 0; | |
526 | - idle.fn = chill; | |
527 | - idle.data = NULL; | |
528 | - | |
529 | - set_state(STOPMACHINE_PREPARE); | |
530 | - | |
531 | - /* Schedule the stop_cpu work on all cpus: hold this CPU so one | |
532 | - * doesn't hit this CPU until we're ready. */ | |
533 | - get_cpu(); | |
534 | - for_each_online_cpu(i) { | |
535 | - sm_work = per_cpu_ptr(stop_machine_work, i); | |
536 | - INIT_WORK(sm_work, stop_cpu); | |
537 | - queue_work_on(i, stop_machine_wq, sm_work); | |
538 | - } | |
539 | - /* This will release the thread on our CPU. */ | |
540 | - put_cpu(); | |
541 | - flush_workqueue(stop_machine_wq); | |
542 | - ret = active.fnret; | |
543 | - mutex_unlock(&lock); | |
544 | - return ret; | |
464 | + /* Set the initial state and stop all online cpus. */ | |
465 | + set_state(&smdata, STOPMACHINE_PREPARE); | |
466 | + return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata); | |
545 | 467 | } |
546 | 468 | |
547 | 469 | int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) |
548 | 470 | { |
549 | 471 | int ret; |
550 | 472 | |
551 | - ret = stop_machine_create(); | |
552 | - if (ret) | |
553 | - return ret; | |
554 | 473 | /* No CPUs can come up or down during this. */ |
555 | 474 | get_online_cpus(); |
556 | 475 | ret = __stop_machine(fn, data, cpus); |
557 | 476 | put_online_cpus(); |
558 | - stop_machine_destroy(); | |
559 | 477 | return ret; |
560 | 478 | } |
561 | 479 | EXPORT_SYMBOL_GPL(stop_machine); |