Commit 3fc1f1e27a5b807791d72e5d992aa33b668a6626

Authored by Tejun Heo
1 parent 1142d81029

stop_machine: reimplement using cpu_stop

Reimplement stop_machine using cpu_stop.  As cpu stoppers are
guaranteed to be available for all online cpus,
stop_machine_create/destroy() are no longer necessary and removed.

With resource management and synchronization handled by cpu_stop, the
new implementation is much simpler.  Asking the cpu_stop to execute
the stop_cpu() state machine on all online cpus with cpu hotplug
disabled is enough.

stop_machine itself doesn't need to manage any global resources
anymore, so all per-instance information is rolled into struct
stop_machine_data and the mutex and all static data variables are
removed.

The previous implementation created and destroyed RT workqueues as
necessary which made stop_machine() calls highly expensive on very
large machines.  According to Dimitri Sivanich, preventing the dynamic
creation/destruction makes booting faster more than twice on very
large machines.  cpu_stop resources are preallocated for all online
cpus and should have the same effect.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>

Showing 6 changed files with 42 additions and 173 deletions Side-by-side Diff

arch/s390/kernel/time.c
... ... @@ -390,7 +390,6 @@
390 390 if (time_sync_wq)
391 391 return;
392 392 time_sync_wq = create_singlethread_workqueue("timesync");
393   - stop_machine_create();
394 393 }
395 394  
396 395 /*
drivers/xen/manage.c
... ... @@ -80,12 +80,6 @@
80 80  
81 81 shutting_down = SHUTDOWN_SUSPEND;
82 82  
83   - err = stop_machine_create();
84   - if (err) {
85   - printk(KERN_ERR "xen suspend: failed to setup stop_machine %d\n", err);
86   - goto out;
87   - }
88   -
89 83 #ifdef CONFIG_PREEMPT
90 84 /* If the kernel is preemptible, we need to freeze all the processes
91 85 to prevent them from being in the middle of a pagetable update
... ... @@ -93,7 +87,7 @@
93 87 err = freeze_processes();
94 88 if (err) {
95 89 printk(KERN_ERR "xen suspend: freeze failed %d\n", err);
96   - goto out_destroy_sm;
  90 + goto out;
97 91 }
98 92 #endif
99 93  
100 94  
... ... @@ -136,12 +130,8 @@
136 130 out_thaw:
137 131 #ifdef CONFIG_PREEMPT
138 132 thaw_processes();
139   -
140   -out_destroy_sm:
141   -#endif
142   - stop_machine_destroy();
143   -
144 133 out:
  134 +#endif
145 135 shutting_down = SHUTDOWN_INVALID;
146 136 }
147 137 #endif /* CONFIG_PM_SLEEP */
include/linux/stop_machine.h
... ... @@ -67,23 +67,6 @@
67 67 */
68 68 int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
69 69  
70   -/**
71   - * stop_machine_create: create all stop_machine threads
72   - *
73   - * Description: This causes all stop_machine threads to be created before
74   - * stop_machine actually gets called. This can be used by subsystems that
75   - * need a non failing stop_machine infrastructure.
76   - */
77   -int stop_machine_create(void);
78   -
79   -/**
80   - * stop_machine_destroy: destroy all stop_machine threads
81   - *
82   - * Description: This causes all stop_machine threads which were created with
83   - * stop_machine_create to be destroyed again.
84   - */
85   -void stop_machine_destroy(void);
86   -
87 70 #else
88 71  
89 72 static inline int stop_machine(int (*fn)(void *), void *data,
... ... @@ -95,9 +78,6 @@
95 78 local_irq_enable();
96 79 return ret;
97 80 }
98   -
99   -static inline int stop_machine_create(void) { return 0; }
100   -static inline void stop_machine_destroy(void) { }
101 81  
102 82 #endif /* CONFIG_SMP */
103 83 #endif /* _LINUX_STOP_MACHINE */
... ... @@ -266,9 +266,6 @@
266 266 {
267 267 int err;
268 268  
269   - err = stop_machine_create();
270   - if (err)
271   - return err;
272 269 cpu_maps_update_begin();
273 270  
274 271 if (cpu_hotplug_disabled) {
... ... @@ -280,7 +277,6 @@
280 277  
281 278 out:
282 279 cpu_maps_update_done();
283   - stop_machine_destroy();
284 280 return err;
285 281 }
286 282 EXPORT_SYMBOL(cpu_down);
... ... @@ -361,9 +357,6 @@
361 357 {
362 358 int cpu, first_cpu, error;
363 359  
364   - error = stop_machine_create();
365   - if (error)
366   - return error;
367 360 cpu_maps_update_begin();
368 361 first_cpu = cpumask_first(cpu_online_mask);
369 362 /*
... ... @@ -394,7 +387,6 @@
394 387 printk(KERN_ERR "Non-boot CPUs are not disabled\n");
395 388 }
396 389 cpu_maps_update_done();
397   - stop_machine_destroy();
398 390 return error;
399 391 }
400 392  
... ... @@ -723,17 +723,9 @@
723 723 return -EFAULT;
724 724 name[MODULE_NAME_LEN-1] = '\0';
725 725  
726   - /* Create stop_machine threads since free_module relies on
727   - * a non-failing stop_machine call. */
728   - ret = stop_machine_create();
729   - if (ret)
730   - return ret;
  726 + if (mutex_lock_interruptible(&module_mutex) != 0)
  727 + return -EINTR;
731 728  
732   - if (mutex_lock_interruptible(&module_mutex) != 0) {
733   - ret = -EINTR;
734   - goto out_stop;
735   - }
736   -
737 729 mod = find_module(name);
738 730 if (!mod) {
739 731 ret = -ENOENT;
... ... @@ -792,8 +784,6 @@
792 784  
793 785 out:
794 786 mutex_unlock(&module_mutex);
795   -out_stop:
796   - stop_machine_destroy();
797 787 return ret;
798 788 }
799 789  
kernel/stop_machine.c
... ... @@ -388,174 +388,92 @@
388 388 /* Exit */
389 389 STOPMACHINE_EXIT,
390 390 };
391   -static enum stopmachine_state state;
392 391  
393 392 struct stop_machine_data {
394   - int (*fn)(void *);
395   - void *data;
396   - int fnret;
  393 + int (*fn)(void *);
  394 + void *data;
  395 + /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
  396 + unsigned int num_threads;
  397 + const struct cpumask *active_cpus;
  398 +
  399 + enum stopmachine_state state;
  400 + atomic_t thread_ack;
397 401 };
398 402  
399   -/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
400   -static unsigned int num_threads;
401   -static atomic_t thread_ack;
402   -static DEFINE_MUTEX(lock);
403   -/* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */
404   -static DEFINE_MUTEX(setup_lock);
405   -/* Users of stop_machine. */
406   -static int refcount;
407   -static struct workqueue_struct *stop_machine_wq;
408   -static struct stop_machine_data active, idle;
409   -static const struct cpumask *active_cpus;
410   -static void __percpu *stop_machine_work;
411   -
412   -static void set_state(enum stopmachine_state newstate)
  403 +static void set_state(struct stop_machine_data *smdata,
  404 + enum stopmachine_state newstate)
413 405 {
414 406 /* Reset ack counter. */
415   - atomic_set(&thread_ack, num_threads);
  407 + atomic_set(&smdata->thread_ack, smdata->num_threads);
416 408 smp_wmb();
417   - state = newstate;
  409 + smdata->state = newstate;
418 410 }
419 411  
420 412 /* Last one to ack a state moves to the next state. */
421   -static void ack_state(void)
  413 +static void ack_state(struct stop_machine_data *smdata)
422 414 {
423   - if (atomic_dec_and_test(&thread_ack))
424   - set_state(state + 1);
  415 + if (atomic_dec_and_test(&smdata->thread_ack))
  416 + set_state(smdata, smdata->state + 1);
425 417 }
426 418  
427   -/* This is the actual function which stops the CPU. It runs
428   - * in the context of a dedicated stopmachine workqueue. */
429   -static void stop_cpu(struct work_struct *unused)
  419 +/* This is the cpu_stop function which stops the CPU. */
  420 +static int stop_machine_cpu_stop(void *data)
430 421 {
  422 + struct stop_machine_data *smdata = data;
431 423 enum stopmachine_state curstate = STOPMACHINE_NONE;
432   - struct stop_machine_data *smdata = &idle;
433   - int cpu = smp_processor_id();
434   - int err;
  424 + int cpu = smp_processor_id(), err = 0;
  425 + bool is_active;
435 426  
436   - if (!active_cpus) {
437   - if (cpu == cpumask_first(cpu_online_mask))
438   - smdata = &active;
439   - } else {
440   - if (cpumask_test_cpu(cpu, active_cpus))
441   - smdata = &active;
442   - }
  427 + if (!smdata->active_cpus)
  428 + is_active = cpu == cpumask_first(cpu_online_mask);
  429 + else
  430 + is_active = cpumask_test_cpu(cpu, smdata->active_cpus);
  431 +
443 432 /* Simple state machine */
444 433 do {
445 434 /* Chill out and ensure we re-read stopmachine_state. */
446 435 cpu_relax();
447   - if (state != curstate) {
448   - curstate = state;
  436 + if (smdata->state != curstate) {
  437 + curstate = smdata->state;
449 438 switch (curstate) {
450 439 case STOPMACHINE_DISABLE_IRQ:
451 440 local_irq_disable();
452 441 hard_irq_disable();
453 442 break;
454 443 case STOPMACHINE_RUN:
455   - /* On multiple CPUs only a single error code
456   - * is needed to tell that something failed. */
457   - err = smdata->fn(smdata->data);
458   - if (err)
459   - smdata->fnret = err;
  444 + if (is_active)
  445 + err = smdata->fn(smdata->data);
460 446 break;
461 447 default:
462 448 break;
463 449 }
464   - ack_state();
  450 + ack_state(smdata);
465 451 }
466 452 } while (curstate != STOPMACHINE_EXIT);
467 453  
468 454 local_irq_enable();
  455 + return err;
469 456 }
470 457  
471   -/* Callback for CPUs which aren't supposed to do anything. */
472   -static int chill(void *unused)
473   -{
474   - return 0;
475   -}
476   -
477   -int stop_machine_create(void)
478   -{
479   - mutex_lock(&setup_lock);
480   - if (refcount)
481   - goto done;
482   - stop_machine_wq = create_rt_workqueue("kstop");
483   - if (!stop_machine_wq)
484   - goto err_out;
485   - stop_machine_work = alloc_percpu(struct work_struct);
486   - if (!stop_machine_work)
487   - goto err_out;
488   -done:
489   - refcount++;
490   - mutex_unlock(&setup_lock);
491   - return 0;
492   -
493   -err_out:
494   - if (stop_machine_wq)
495   - destroy_workqueue(stop_machine_wq);
496   - mutex_unlock(&setup_lock);
497   - return -ENOMEM;
498   -}
499   -EXPORT_SYMBOL_GPL(stop_machine_create);
500   -
501   -void stop_machine_destroy(void)
502   -{
503   - mutex_lock(&setup_lock);
504   - refcount--;
505   - if (refcount)
506   - goto done;
507   - destroy_workqueue(stop_machine_wq);
508   - free_percpu(stop_machine_work);
509   -done:
510   - mutex_unlock(&setup_lock);
511   -}
512   -EXPORT_SYMBOL_GPL(stop_machine_destroy);
513   -
514 458 int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
515 459 {
516   - struct work_struct *sm_work;
517   - int i, ret;
  460 + struct stop_machine_data smdata = { .fn = fn, .data = data,
  461 + .num_threads = num_online_cpus(),
  462 + .active_cpus = cpus };
518 463  
519   - /* Set up initial state. */
520   - mutex_lock(&lock);
521   - num_threads = num_online_cpus();
522   - active_cpus = cpus;
523   - active.fn = fn;
524   - active.data = data;
525   - active.fnret = 0;
526   - idle.fn = chill;
527   - idle.data = NULL;
528   -
529   - set_state(STOPMACHINE_PREPARE);
530   -
531   - /* Schedule the stop_cpu work on all cpus: hold this CPU so one
532   - * doesn't hit this CPU until we're ready. */
533   - get_cpu();
534   - for_each_online_cpu(i) {
535   - sm_work = per_cpu_ptr(stop_machine_work, i);
536   - INIT_WORK(sm_work, stop_cpu);
537   - queue_work_on(i, stop_machine_wq, sm_work);
538   - }
539   - /* This will release the thread on our CPU. */
540   - put_cpu();
541   - flush_workqueue(stop_machine_wq);
542   - ret = active.fnret;
543   - mutex_unlock(&lock);
544   - return ret;
  464 + /* Set the initial state and stop all online cpus. */
  465 + set_state(&smdata, STOPMACHINE_PREPARE);
  466 + return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
545 467 }
546 468  
547 469 int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
548 470 {
549 471 int ret;
550 472  
551   - ret = stop_machine_create();
552   - if (ret)
553   - return ret;
554 473 /* No CPUs can come up or down during this. */
555 474 get_online_cpus();
556 475 ret = __stop_machine(fn, data, cpus);
557 476 put_online_cpus();
558   - stop_machine_destroy();
559 477 return ret;
560 478 }
561 479 EXPORT_SYMBOL_GPL(stop_machine);