Commit eaf649e9fe6685f4c5a392cd0e16df5fd6660b7c
Committed by
Ingo Molnar
1 parent
e260be673a
Exists in
master
and in
7 other branches
Preempt-RCU: CPU Hotplug handling
This patch allows preemptible RCU to tolerate CPU-hotplug operations. It accomplishes this by maintaining a local copy of a map of online CPUs, which it accesses under its own lock. Signed-off-by: Gautham R Shenoy <ego@in.ibm.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Showing 1 changed file with 142 additions and 5 deletions Side-by-side Diff
kernel/rcupreempt.c
... | ... | @@ -147,6 +147,8 @@ |
147 | 147 | { "idle", "waitack", "waitzero", "waitmb" }; |
148 | 148 | #endif /* #ifdef CONFIG_RCU_TRACE */ |
149 | 149 | |
150 | +static cpumask_t rcu_cpu_online_map __read_mostly = CPU_MASK_NONE; | |
151 | + | |
150 | 152 | /* |
151 | 153 | * Enum and per-CPU flag to determine when each CPU has seen |
152 | 154 | * the most recent counter flip. |
... | ... | @@ -445,7 +447,7 @@ |
445 | 447 | |
446 | 448 | /* Now ask each CPU for acknowledgement of the flip. */ |
447 | 449 | |
448 | - for_each_possible_cpu(cpu) | |
450 | + for_each_cpu_mask(cpu, rcu_cpu_online_map) | |
449 | 451 | per_cpu(rcu_flip_flag, cpu) = rcu_flipped; |
450 | 452 | |
451 | 453 | return 1; |
... | ... | @@ -461,7 +463,7 @@ |
461 | 463 | int cpu; |
462 | 464 | |
463 | 465 | RCU_TRACE_ME(rcupreempt_trace_try_flip_a1); |
464 | - for_each_possible_cpu(cpu) | |
466 | + for_each_cpu_mask(cpu, rcu_cpu_online_map) | |
465 | 467 | if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) { |
466 | 468 | RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1); |
467 | 469 | return 0; |
... | ... | @@ -492,7 +494,7 @@ |
492 | 494 | /* Check to see if the sum of the "last" counters is zero. */ |
493 | 495 | |
494 | 496 | RCU_TRACE_ME(rcupreempt_trace_try_flip_z1); |
495 | - for_each_possible_cpu(cpu) | |
497 | + for_each_cpu_mask(cpu, rcu_cpu_online_map) | |
496 | 498 | sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx]; |
497 | 499 | if (sum != 0) { |
498 | 500 | RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1); |
... | ... | @@ -507,7 +509,7 @@ |
507 | 509 | smp_mb(); /* ^^^^^^^^^^^^ */ |
508 | 510 | |
509 | 511 | /* Call for a memory barrier from each CPU. */ |
510 | - for_each_possible_cpu(cpu) | |
512 | + for_each_cpu_mask(cpu, rcu_cpu_online_map) | |
511 | 513 | per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed; |
512 | 514 | |
513 | 515 | RCU_TRACE_ME(rcupreempt_trace_try_flip_z2); |
... | ... | @@ -525,7 +527,7 @@ |
525 | 527 | int cpu; |
526 | 528 | |
527 | 529 | RCU_TRACE_ME(rcupreempt_trace_try_flip_m1); |
528 | - for_each_possible_cpu(cpu) | |
530 | + for_each_cpu_mask(cpu, rcu_cpu_online_map) | |
529 | 531 | if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) { |
530 | 532 | RCU_TRACE_ME(rcupreempt_trace_try_flip_me1); |
531 | 533 | return 0; |
... | ... | @@ -637,6 +639,98 @@ |
637 | 639 | spin_unlock_irqrestore(&rdp->lock, flags); |
638 | 640 | } |
639 | 641 | |
642 | +#ifdef CONFIG_HOTPLUG_CPU | |
643 | +#define rcu_offline_cpu_enqueue(srclist, srctail, dstlist, dsttail) do { \ | |
644 | + *dsttail = srclist; \ | |
645 | + if (srclist != NULL) { \ | |
646 | + dsttail = srctail; \ | |
647 | + srclist = NULL; \ | |
648 | + srctail = &srclist;\ | |
649 | + } \ | |
650 | + } while (0) | |
651 | + | |
652 | +void rcu_offline_cpu(int cpu) | |
653 | +{ | |
654 | + int i; | |
655 | + struct rcu_head *list = NULL; | |
656 | + unsigned long flags; | |
657 | + struct rcu_data *rdp = RCU_DATA_CPU(cpu); | |
658 | + struct rcu_head **tail = &list; | |
659 | + | |
660 | + /* | |
661 | + * Remove all callbacks from the newly dead CPU, retaining order. | |
662 | + * Otherwise rcu_barrier() will fail | |
663 | + */ | |
664 | + | |
665 | + spin_lock_irqsave(&rdp->lock, flags); | |
666 | + rcu_offline_cpu_enqueue(rdp->donelist, rdp->donetail, list, tail); | |
667 | + for (i = GP_STAGES - 1; i >= 0; i--) | |
668 | + rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i], | |
669 | + list, tail); | |
670 | + rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail); | |
671 | + spin_unlock_irqrestore(&rdp->lock, flags); | |
672 | + rdp->waitlistcount = 0; | |
673 | + | |
674 | + /* Disengage the newly dead CPU from the grace-period computation. */ | |
675 | + | |
676 | + spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags); | |
677 | + rcu_check_mb(cpu); | |
678 | + if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) { | |
679 | + smp_mb(); /* Subsequent counter accesses must see new value */ | |
680 | + per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen; | |
681 | + smp_mb(); /* Subsequent RCU read-side critical sections */ | |
682 | + /* seen -after- acknowledgement. */ | |
683 | + } | |
684 | + | |
685 | + RCU_DATA_ME()->rcu_flipctr[0] += RCU_DATA_CPU(cpu)->rcu_flipctr[0]; | |
686 | + RCU_DATA_ME()->rcu_flipctr[1] += RCU_DATA_CPU(cpu)->rcu_flipctr[1]; | |
687 | + | |
688 | + RCU_DATA_CPU(cpu)->rcu_flipctr[0] = 0; | |
689 | + RCU_DATA_CPU(cpu)->rcu_flipctr[1] = 0; | |
690 | + | |
691 | + cpu_clear(cpu, rcu_cpu_online_map); | |
692 | + | |
693 | + spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); | |
694 | + | |
695 | + /* | |
696 | + * Place the removed callbacks on the current CPU's queue. | |
697 | + * Make them all start a new grace period: simple approach, | |
698 | + * in theory could starve a given set of callbacks, but | |
699 | + * you would need to be doing some serious CPU hotplugging | |
700 | + * to make this happen. If this becomes a problem, adding | |
701 | + * a synchronize_rcu() to the hotplug path would be a simple | |
702 | + * fix. | |
703 | + */ | |
704 | + | |
705 | + rdp = RCU_DATA_ME(); | |
706 | + spin_lock_irqsave(&rdp->lock, flags); | |
707 | + *rdp->nexttail = list; | |
708 | + if (list) | |
709 | + rdp->nexttail = tail; | |
710 | + spin_unlock_irqrestore(&rdp->lock, flags); | |
711 | +} | |
712 | + | |
713 | +void __devinit rcu_online_cpu(int cpu) | |
714 | +{ | |
715 | + unsigned long flags; | |
716 | + | |
717 | + spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags); | |
718 | + cpu_set(cpu, rcu_cpu_online_map); | |
719 | + spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); | |
720 | +} | |
721 | + | |
722 | +#else /* #ifdef CONFIG_HOTPLUG_CPU */ | |
723 | + | |
724 | +void rcu_offline_cpu(int cpu) | |
725 | +{ | |
726 | +} | |
727 | + | |
728 | +void __devinit rcu_online_cpu(int cpu) | |
729 | +{ | |
730 | +} | |
731 | + | |
732 | +#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ | |
733 | + | |
640 | 734 | static void rcu_process_callbacks(struct softirq_action *unused) |
641 | 735 | { |
642 | 736 | unsigned long flags; |
... | ... | @@ -746,6 +840,32 @@ |
746 | 840 | return 0; |
747 | 841 | } |
748 | 842 | |
843 | +static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |
844 | + unsigned long action, void *hcpu) | |
845 | +{ | |
846 | + long cpu = (long)hcpu; | |
847 | + | |
848 | + switch (action) { | |
849 | + case CPU_UP_PREPARE: | |
850 | + case CPU_UP_PREPARE_FROZEN: | |
851 | + rcu_online_cpu(cpu); | |
852 | + break; | |
853 | + case CPU_UP_CANCELED: | |
854 | + case CPU_UP_CANCELED_FROZEN: | |
855 | + case CPU_DEAD: | |
856 | + case CPU_DEAD_FROZEN: | |
857 | + rcu_offline_cpu(cpu); | |
858 | + break; | |
859 | + default: | |
860 | + break; | |
861 | + } | |
862 | + return NOTIFY_OK; | |
863 | +} | |
864 | + | |
865 | +static struct notifier_block __cpuinitdata rcu_nb = { | |
866 | + .notifier_call = rcu_cpu_notify, | |
867 | +}; | |
868 | + | |
749 | 869 | void __init __rcu_init(void) |
750 | 870 | { |
751 | 871 | int cpu; |
... | ... | @@ -769,6 +889,23 @@ |
769 | 889 | rdp->rcu_flipctr[0] = 0; |
770 | 890 | rdp->rcu_flipctr[1] = 0; |
771 | 891 | } |
892 | + register_cpu_notifier(&rcu_nb); | |
893 | + | |
894 | + /* | |
895 | + * We don't need protection against CPU-Hotplug here | |
896 | + * since | |
897 | + * a) If a CPU comes online while we are iterating over the | |
898 | + * cpu_online_map below, we would only end up making a | |
899 | + * duplicate call to rcu_online_cpu() which sets the corresponding | |
900 | + * CPU's mask in the rcu_cpu_online_map. | |
901 | + * | |
902 | + * b) A CPU cannot go offline at this point in time since the user | |
903 | + * does not have access to the sysfs interface, nor do we | |
904 | + * suspend the system. | |
905 | + */ | |
906 | + for_each_online_cpu(cpu) | |
907 | + rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long) cpu); | |
908 | + | |
772 | 909 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL); |
773 | 910 | } |
774 | 911 |