Preempt-RCU: CPU Hotplug handling

This patch allows preemptible RCU to tolerate CPU-hotplug operations. It accomplishes this by maintaining a local copy of a map of online CPUs, which it accesses under its own lock. Signed-off-by: Gautham R Shenoy <ego@in.ibm.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>

Preempt-RCU: CPU Hotplug handling
This patch allows preemptible RCU to tolerate CPU-hotplug operations. It accomplishes this by maintaining a local copy of a map of online CPUs, which it accesses under its own lock. Signed-off-by: Gautham R Shenoy <ego@in.ibm.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Paul E. McKenney · Ingo Molnar
1 parent e260be673a
Showing 1 changed file with 142 additions and 5 deletions Side-by-side Diff
kernel/rcupreempt.c
@@ -147,6 +147,8 @@
 	{ "idle", "waitack", "waitzero", "waitmb" };
 #endif /* #ifdef CONFIG_RCU_TRACE */
  
+static cpumask_t rcu_cpu_online_map __read_mostly = CPU_MASK_NONE;
+
 /*
  * Enum and per-CPU flag to determine when each CPU has seen
  * the most recent counter flip.
@@ -445,7 +447,7 @@
  
 	/* Now ask each CPU for acknowledgement of the flip. */
  
-	for_each_possible_cpu(cpu)
+	for_each_cpu_mask(cpu, rcu_cpu_online_map)
 		per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
  
 	return 1;
@@ -461,7 +463,7 @@
 	int cpu;
  
 	RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
-	for_each_possible_cpu(cpu)
+	for_each_cpu_mask(cpu, rcu_cpu_online_map)
 		if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
 			RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
 			return 0;
@@ -492,7 +494,7 @@
 	/* Check to see if the sum of the "last" counters is zero. */
  
 	RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
-	for_each_possible_cpu(cpu)
+	for_each_cpu_mask(cpu, rcu_cpu_online_map)
 		sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
 	if (sum != 0) {
 		RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
@@ -507,7 +509,7 @@
 	smp_mb();  /*  ^^^^^^^^^^^^ */
  
 	/* Call for a memory barrier from each CPU. */
-	for_each_possible_cpu(cpu)
+	for_each_cpu_mask(cpu, rcu_cpu_online_map)
 		per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
  
 	RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
@@ -525,7 +527,7 @@
 	int cpu;
  
 	RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
-	for_each_possible_cpu(cpu)
+	for_each_cpu_mask(cpu, rcu_cpu_online_map)
 		if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
 			RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
 			return 0;
@@ -637,6 +639,98 @@
 	spin_unlock_irqrestore(&rdp->lock, flags);
 }
  
+#ifdef CONFIG_HOTPLUG_CPU
+#define rcu_offline_cpu_enqueue(srclist, srctail, dstlist, dsttail) do { \
+		*dsttail = srclist; \
+		if (srclist != NULL) { \
+			dsttail = srctail; \
+			srclist = NULL; \
+			srctail = &srclist;\
+		} \
+	} while (0)
+
+void rcu_offline_cpu(int cpu)
+{
+	int i;
+	struct rcu_head *list = NULL;
+	unsigned long flags;
+	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
+	struct rcu_head **tail = &list;
+
+	/*
+	 * Remove all callbacks from the newly dead CPU, retaining order.
+	 * Otherwise rcu_barrier() will fail
+	 */
+
+	spin_lock_irqsave(&rdp->lock, flags);
+	rcu_offline_cpu_enqueue(rdp->donelist, rdp->donetail, list, tail);
+	for (i = GP_STAGES - 1; i >= 0; i--)
+		rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i],
+						list, tail);
+	rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail);
+	spin_unlock_irqrestore(&rdp->lock, flags);
+	rdp->waitlistcount = 0;
+
+	/* Disengage the newly dead CPU from the grace-period computation. */
+
+	spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
+	rcu_check_mb(cpu);
+	if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
+		smp_mb();  /* Subsequent counter accesses must see new value */
+		per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
+		smp_mb();  /* Subsequent RCU read-side critical sections */
+			   /*  seen -after- acknowledgement. */
+	}
+
+	RCU_DATA_ME()->rcu_flipctr[0] += RCU_DATA_CPU(cpu)->rcu_flipctr[0];
+	RCU_DATA_ME()->rcu_flipctr[1] += RCU_DATA_CPU(cpu)->rcu_flipctr[1];
+
+	RCU_DATA_CPU(cpu)->rcu_flipctr[0] = 0;
+	RCU_DATA_CPU(cpu)->rcu_flipctr[1] = 0;
+
+	cpu_clear(cpu, rcu_cpu_online_map);
+
+	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
+
+	/*
+	 * Place the removed callbacks on the current CPU's queue.
+	 * Make them all start a new grace period: simple approach,
+	 * in theory could starve a given set of callbacks, but
+	 * you would need to be doing some serious CPU hotplugging
+	 * to make this happen.  If this becomes a problem, adding
+	 * a synchronize_rcu() to the hotplug path would be a simple
+	 * fix.
+	 */
+
+	rdp = RCU_DATA_ME();
+	spin_lock_irqsave(&rdp->lock, flags);
+	*rdp->nexttail = list;
+	if (list)
+		rdp->nexttail = tail;
+	spin_unlock_irqrestore(&rdp->lock, flags);
+}
+
+void __devinit rcu_online_cpu(int cpu)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
+	cpu_set(cpu, rcu_cpu_online_map);
+	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
+}
+
+#else /* #ifdef CONFIG_HOTPLUG_CPU */
+
+void rcu_offline_cpu(int cpu)
+{
+}
+
+void __devinit rcu_online_cpu(int cpu)
+{
+}
+
+#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
+
 static void rcu_process_callbacks(struct softirq_action *unused)
 {
 	unsigned long flags;
@@ -746,6 +840,32 @@
 	return 0;
 }
  
+static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
+				unsigned long action, void *hcpu)
+{
+	long cpu = (long)hcpu;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		rcu_online_cpu(cpu);
+		break;
+	case CPU_UP_CANCELED:
+	case CPU_UP_CANCELED_FROZEN:
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		rcu_offline_cpu(cpu);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata rcu_nb = {
+	.notifier_call = rcu_cpu_notify,
+};
+
 void __init __rcu_init(void)
 {
 	int cpu;
@@ -769,6 +889,23 @@
 		rdp->rcu_flipctr[0] = 0;
 		rdp->rcu_flipctr[1] = 0;
 	}
+	register_cpu_notifier(&rcu_nb);
+
+	/*
+	 * We don't need protection against CPU-Hotplug here
+	 * since
+	 * a) If a CPU comes online while we are iterating over the
+	 *    cpu_online_map below, we would only end up making a
+	 *    duplicate call to rcu_online_cpu() which sets the corresponding
+	 *    CPU's mask in the rcu_cpu_online_map.
+	 *
+	 * b) A CPU cannot go offline at this point in time since the user
+	 *    does not have access to the sysfs interface, nor do we
+	 *    suspend the system.
+	 */
+	for_each_online_cpu(cpu)
+		rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,	(void *)(long) cpu);
+
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
 }
...	...	@@ -147,6 +147,8 @@
147	147	{ "idle", "waitack", "waitzero", "waitmb" };
148	148	#endif /* #ifdef CONFIG_RCU_TRACE */
149	149
	150	+static cpumask_t rcu_cpu_online_map __read_mostly = CPU_MASK_NONE;
	151	+
150	152	/*
151	153	* Enum and per-CPU flag to determine when each CPU has seen
152	154	* the most recent counter flip.
...	...	@@ -445,7 +447,7 @@
445	447
446	448	/* Now ask each CPU for acknowledgement of the flip. */
447	449
448		- for_each_possible_cpu(cpu)
	450	+ for_each_cpu_mask(cpu, rcu_cpu_online_map)
449	451	per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
450	452
451	453	return 1;
...	...	@@ -461,7 +463,7 @@
461	463	int cpu;
462	464
463	465	RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
464		- for_each_possible_cpu(cpu)
	466	+ for_each_cpu_mask(cpu, rcu_cpu_online_map)
465	467	if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
466	468	RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
467	469	return 0;
...	...	@@ -492,7 +494,7 @@
492	494	/* Check to see if the sum of the "last" counters is zero. */
493	495
494	496	RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
495		- for_each_possible_cpu(cpu)
	497	+ for_each_cpu_mask(cpu, rcu_cpu_online_map)
496	498	sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
497	499	if (sum != 0) {
498	500	RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
...	...	@@ -507,7 +509,7 @@
507	509	smp_mb(); /* ^^^^^^^^^^^^ */
508	510
509	511	/* Call for a memory barrier from each CPU. */
510		- for_each_possible_cpu(cpu)
	512	+ for_each_cpu_mask(cpu, rcu_cpu_online_map)
511	513	per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
512	514
513	515	RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
...	...	@@ -525,7 +527,7 @@
525	527	int cpu;
526	528
527	529	RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
528		- for_each_possible_cpu(cpu)
	530	+ for_each_cpu_mask(cpu, rcu_cpu_online_map)
529	531	if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
530	532	RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
531	533	return 0;
...	...	@@ -637,6 +639,98 @@
637	639	spin_unlock_irqrestore(&rdp->lock, flags);
638	640	}
639	641
	642	+#ifdef CONFIG_HOTPLUG_CPU
	643	+#define rcu_offline_cpu_enqueue(srclist, srctail, dstlist, dsttail) do { \
	644	+ *dsttail = srclist; \
	645	+ if (srclist != NULL) { \
	646	+ dsttail = srctail; \
	647	+ srclist = NULL; \
	648	+ srctail = &srclist;\
	649	+ } \
	650	+ } while (0)
	651	+
	652	+void rcu_offline_cpu(int cpu)
	653	+{
	654	+ int i;
	655	+ struct rcu_head *list = NULL;
	656	+ unsigned long flags;
	657	+ struct rcu_data *rdp = RCU_DATA_CPU(cpu);
	658	+ struct rcu_head **tail = &list;
	659	+
	660	+ /*
	661	+ * Remove all callbacks from the newly dead CPU, retaining order.
	662	+ * Otherwise rcu_barrier() will fail
	663	+ */
	664	+
	665	+ spin_lock_irqsave(&rdp->lock, flags);
	666	+ rcu_offline_cpu_enqueue(rdp->donelist, rdp->donetail, list, tail);
	667	+ for (i = GP_STAGES - 1; i >= 0; i--)
	668	+ rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i],
	669	+ list, tail);
	670	+ rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail);
	671	+ spin_unlock_irqrestore(&rdp->lock, flags);
	672	+ rdp->waitlistcount = 0;
	673	+
	674	+ /* Disengage the newly dead CPU from the grace-period computation. */
	675	+
	676	+ spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
	677	+ rcu_check_mb(cpu);
	678	+ if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
	679	+ smp_mb(); /* Subsequent counter accesses must see new value */
	680	+ per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
	681	+ smp_mb(); /* Subsequent RCU read-side critical sections */
	682	+ /* seen -after- acknowledgement. */
	683	+ }
	684	+
	685	+ RCU_DATA_ME()->rcu_flipctr[0] += RCU_DATA_CPU(cpu)->rcu_flipctr[0];
	686	+ RCU_DATA_ME()->rcu_flipctr[1] += RCU_DATA_CPU(cpu)->rcu_flipctr[1];
	687	+
	688	+ RCU_DATA_CPU(cpu)->rcu_flipctr[0] = 0;
	689	+ RCU_DATA_CPU(cpu)->rcu_flipctr[1] = 0;
	690	+
	691	+ cpu_clear(cpu, rcu_cpu_online_map);
	692	+
	693	+ spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
	694	+
	695	+ /*
	696	+ * Place the removed callbacks on the current CPU's queue.
	697	+ * Make them all start a new grace period: simple approach,
	698	+ * in theory could starve a given set of callbacks, but
	699	+ * you would need to be doing some serious CPU hotplugging
	700	+ * to make this happen. If this becomes a problem, adding
	701	+ * a synchronize_rcu() to the hotplug path would be a simple
	702	+ * fix.
	703	+ */
	704	+
	705	+ rdp = RCU_DATA_ME();
	706	+ spin_lock_irqsave(&rdp->lock, flags);
	707	+ *rdp->nexttail = list;
	708	+ if (list)
	709	+ rdp->nexttail = tail;
	710	+ spin_unlock_irqrestore(&rdp->lock, flags);
	711	+}
	712	+
	713	+void __devinit rcu_online_cpu(int cpu)
	714	+{
	715	+ unsigned long flags;
	716	+
	717	+ spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
	718	+ cpu_set(cpu, rcu_cpu_online_map);
	719	+ spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
	720	+}
	721	+
	722	+#else /* #ifdef CONFIG_HOTPLUG_CPU */
	723	+
	724	+void rcu_offline_cpu(int cpu)
	725	+{
	726	+}
	727	+
	728	+void __devinit rcu_online_cpu(int cpu)
	729	+{
	730	+}
	731	+
	732	+#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
	733	+
640	734	static void rcu_process_callbacks(struct softirq_action *unused)
641	735	{
642	736	unsigned long flags;
...	...	@@ -746,6 +840,32 @@
746	840	return 0;
747	841	}
748	842
	843	+static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
	844	+ unsigned long action, void *hcpu)
	845	+{
	846	+ long cpu = (long)hcpu;
	847	+
	848	+ switch (action) {
	849	+ case CPU_UP_PREPARE:
	850	+ case CPU_UP_PREPARE_FROZEN:
	851	+ rcu_online_cpu(cpu);
	852	+ break;
	853	+ case CPU_UP_CANCELED:
	854	+ case CPU_UP_CANCELED_FROZEN:
	855	+ case CPU_DEAD:
	856	+ case CPU_DEAD_FROZEN:
	857	+ rcu_offline_cpu(cpu);
	858	+ break;
	859	+ default:
	860	+ break;
	861	+ }
	862	+ return NOTIFY_OK;
	863	+}
	864	+
	865	+static struct notifier_block __cpuinitdata rcu_nb = {
	866	+ .notifier_call = rcu_cpu_notify,
	867	+};
	868	+
749	869	void __init __rcu_init(void)
750	870	{
751	871	int cpu;
...	...	@@ -769,6 +889,23 @@
769	889	rdp->rcu_flipctr[0] = 0;
770	890	rdp->rcu_flipctr[1] = 0;
771	891	}
	892	+ register_cpu_notifier(&rcu_nb);
	893	+
	894	+ /*
	895	+ * We don't need protection against CPU-Hotplug here
	896	+ * since
	897	+ * a) If a CPU comes online while we are iterating over the
	898	+ * cpu_online_map below, we would only end up making a
	899	+ * duplicate call to rcu_online_cpu() which sets the corresponding
	900	+ * CPU's mask in the rcu_cpu_online_map.
	901	+ *
	902	+ * b) A CPU cannot go offline at this point in time since the user
	903	+ * does not have access to the sysfs interface, nor do we
	904	+ * suspend the system.
	905	+ */
	906	+ for_each_online_cpu(cpu)
	907	+ rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long) cpu);
	908	+
772	909	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
773	910	}
774	911