rcu: add support for dynamic ticks and preempt rcu

The PREEMPT-RCU can get stuck if a CPU goes idle and NO_HZ is set. The idle CPU will not progress the RCU through its grace period and a synchronize_rcu my get stuck. Without this patch I have a box that will not boot when PREEMPT_RCU and NO_HZ are set. That same box boots fine with this patch. This patch comes from the -rt kernel where it has been tested for several months. Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>

rcu: add support for dynamic ticks and preempt rcu
The PREEMPT-RCU can get stuck if a CPU goes idle and NO_HZ is set. The idle CPU will not progress the RCU through its grace period and a synchronize_rcu my get stuck. Without this patch I have a box that will not boot when PREEMPT_RCU and NO_HZ are set. That same box boots fine with this patch. This patch comes from the -rt kernel where it has been tested for several months. Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Steven Rostedt · Ingo Molnar
1 parent c0f4133b8f
Showing 6 changed files with 259 additions and 4 deletions Side-by-side Diff
include/linux/hardirq.h
include/linux/rcuclassic.h
include/linux/rcupreempt.h
kernel/rcupreempt.c
kernel/softirq.c
kernel/time/tick-sched.c
@@ -109,6 +109,14 @@
 }
 #endif
  
+#if defined(CONFIG_PREEMPT_RCU) && defined(CONFIG_NO_HZ)
+extern void rcu_irq_enter(void);
+extern void rcu_irq_exit(void);
+#else
+# define rcu_irq_enter() do { } while (0)
+# define rcu_irq_exit() do { } while (0)
+#endif /* CONFIG_PREEMPT_RCU */
+
 /*
  * It is safe to do non-atomic ops on ->hardirq_context,
  * because NMI handlers may not preempt and the ops are
@@ -117,6 +125,7 @@
  */
 #define __irq_enter()					\
 	do {						\
+		rcu_irq_enter();			\
 		account_system_vtime(current);		\
 		add_preempt_count(HARDIRQ_OFFSET);	\
 		trace_hardirq_enter();			\
@@ -135,6 +144,7 @@
 		trace_hardirq_exit();			\
 		account_system_vtime(current);		\
 		sub_preempt_count(HARDIRQ_OFFSET);	\
+		rcu_irq_exit();				\
 	} while (0)
  
 /*
@@ -160,6 +160,9 @@
 extern long rcu_batches_completed(void);
 extern long rcu_batches_completed_bh(void);
  
+#define rcu_enter_nohz()	do { } while (0)
+#define rcu_exit_nohz()		do { } while (0)
+
 #endif /* __KERNEL__ */
 #endif /* __LINUX_RCUCLASSIC_H */
@@ -82,6 +82,28 @@
  
 struct softirq_action;
  
+#ifdef CONFIG_NO_HZ
+DECLARE_PER_CPU(long, dynticks_progress_counter);
+
+static inline void rcu_enter_nohz(void)
+{
+	__get_cpu_var(dynticks_progress_counter)++;
+	WARN_ON(__get_cpu_var(dynticks_progress_counter) & 0x1);
+	mb();
+}
+
+static inline void rcu_exit_nohz(void)
+{
+	mb();
+	__get_cpu_var(dynticks_progress_counter)++;
+	WARN_ON(!(__get_cpu_var(dynticks_progress_counter) & 0x1));
+}
+
+#else /* CONFIG_NO_HZ */
+#define rcu_enter_nohz()	do { } while (0)
+#define rcu_exit_nohz()		do { } while (0)
+#endif /* CONFIG_NO_HZ */
+
 #endif /* __KERNEL__ */
 #endif /* __LINUX_RCUPREEMPT_H */
@@ -23,6 +23,10 @@
  *		to Suparna Bhattacharya for pushing me completely away
  *		from atomic instructions on the read side.
  *
+ *  - Added handling of Dynamic Ticks
+ *      Copyright 2007 - Paul E. Mckenney <paulmck@us.ibm.com>
+ *                     - Steven Rostedt <srostedt@redhat.com>
+ *
  * Papers:  http://www.rdrop.com/users/paulmck/RCU
  *
  * Design Document: http://lwn.net/Articles/253651/
@@ -409,6 +413,212 @@
 	}
 }
  
+#ifdef CONFIG_NO_HZ
+
+DEFINE_PER_CPU(long, dynticks_progress_counter) = 1;
+static DEFINE_PER_CPU(long, rcu_dyntick_snapshot);
+static DEFINE_PER_CPU(int, rcu_update_flag);
+
+/**
+ * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
+ *
+ * If the CPU was idle with dynamic ticks active, this updates the
+ * dynticks_progress_counter to let the RCU handling know that the
+ * CPU is active.
+ */
+void rcu_irq_enter(void)
+{
+	int cpu = smp_processor_id();
+
+	if (per_cpu(rcu_update_flag, cpu))
+		per_cpu(rcu_update_flag, cpu)++;
+
+	/*
+	 * Only update if we are coming from a stopped ticks mode
+	 * (dynticks_progress_counter is even).
+	 */
+	if (!in_interrupt() &&
+	    (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) {
+		/*
+		 * The following might seem like we could have a race
+		 * with NMI/SMIs. But this really isn't a problem.
+		 * Here we do a read/modify/write, and the race happens
+		 * when an NMI/SMI comes in after the read and before
+		 * the write. But NMI/SMIs will increment this counter
+		 * twice before returning, so the zero bit will not
+		 * be corrupted by the NMI/SMI which is the most important
+		 * part.
+		 *
+		 * The only thing is that we would bring back the counter
+		 * to a postion that it was in during the NMI/SMI.
+		 * But the zero bit would be set, so the rest of the
+		 * counter would again be ignored.
+		 *
+		 * On return from the IRQ, the counter may have the zero
+		 * bit be 0 and the counter the same as the return from
+		 * the NMI/SMI. If the state machine was so unlucky to
+		 * see that, it still doesn't matter, since all
+		 * RCU read-side critical sections on this CPU would
+		 * have already completed.
+		 */
+		per_cpu(dynticks_progress_counter, cpu)++;
+		/*
+		 * The following memory barrier ensures that any
+		 * rcu_read_lock() primitives in the irq handler
+		 * are seen by other CPUs to follow the above
+		 * increment to dynticks_progress_counter. This is
+		 * required in order for other CPUs to correctly
+		 * determine when it is safe to advance the RCU
+		 * grace-period state machine.
+		 */
+		smp_mb(); /* see above block comment. */
+		/*
+		 * Since we can't determine the dynamic tick mode from
+		 * the dynticks_progress_counter after this routine,
+		 * we use a second flag to acknowledge that we came
+		 * from an idle state with ticks stopped.
+		 */
+		per_cpu(rcu_update_flag, cpu)++;
+		/*
+		 * If we take an NMI/SMI now, they will also increment
+		 * the rcu_update_flag, and will not update the
+		 * dynticks_progress_counter on exit. That is for
+		 * this IRQ to do.
+		 */
+	}
+}
+
+/**
+ * rcu_irq_exit - Called from exiting Hard irq context.
+ *
+ * If the CPU was idle with dynamic ticks active, update the
+ * dynticks_progress_counter to put let the RCU handling be
+ * aware that the CPU is going back to idle with no ticks.
+ */
+void rcu_irq_exit(void)
+{
+	int cpu = smp_processor_id();
+
+	/*
+	 * rcu_update_flag is set if we interrupted the CPU
+	 * when it was idle with ticks stopped.
+	 * Once this occurs, we keep track of interrupt nesting
+	 * because a NMI/SMI could also come in, and we still
+	 * only want the IRQ that started the increment of the
+	 * dynticks_progress_counter to be the one that modifies
+	 * it on exit.
+	 */
+	if (per_cpu(rcu_update_flag, cpu)) {
+		if (--per_cpu(rcu_update_flag, cpu))
+			return;
+
+		/* This must match the interrupt nesting */
+		WARN_ON(in_interrupt());
+
+		/*
+		 * If an NMI/SMI happens now we are still
+		 * protected by the dynticks_progress_counter being odd.
+		 */
+
+		/*
+		 * The following memory barrier ensures that any
+		 * rcu_read_unlock() primitives in the irq handler
+		 * are seen by other CPUs to preceed the following
+		 * increment to dynticks_progress_counter. This
+		 * is required in order for other CPUs to determine
+		 * when it is safe to advance the RCU grace-period
+		 * state machine.
+		 */
+		smp_mb(); /* see above block comment. */
+		per_cpu(dynticks_progress_counter, cpu)++;
+		WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1);
+	}
+}
+
+static void dyntick_save_progress_counter(int cpu)
+{
+	per_cpu(rcu_dyntick_snapshot, cpu) =
+		per_cpu(dynticks_progress_counter, cpu);
+}
+
+static inline int
+rcu_try_flip_waitack_needed(int cpu)
+{
+	long curr;
+	long snap;
+
+	curr = per_cpu(dynticks_progress_counter, cpu);
+	snap = per_cpu(rcu_dyntick_snapshot, cpu);
+	smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
+
+	/*
+	 * If the CPU remained in dynticks mode for the entire time
+	 * and didn't take any interrupts, NMIs, SMIs, or whatever,
+	 * then it cannot be in the middle of an rcu_read_lock(), so
+	 * the next rcu_read_lock() it executes must use the new value
+	 * of the counter.  So we can safely pretend that this CPU
+	 * already acknowledged the counter.
+	 */
+
+	if ((curr == snap) && ((curr & 0x1) == 0))
+		return 0;
+
+	/*
+	 * If the CPU passed through or entered a dynticks idle phase with
+	 * no active irq handlers, then, as above, we can safely pretend
+	 * that this CPU already acknowledged the counter.
+	 */
+
+	if ((curr - snap) > 2 || (snap & 0x1) == 0)
+		return 0;
+
+	/* We need this CPU to explicitly acknowledge the counter flip. */
+
+	return 1;
+}
+
+static inline int
+rcu_try_flip_waitmb_needed(int cpu)
+{
+	long curr;
+	long snap;
+
+	curr = per_cpu(dynticks_progress_counter, cpu);
+	snap = per_cpu(rcu_dyntick_snapshot, cpu);
+	smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
+
+	/*
+	 * If the CPU remained in dynticks mode for the entire time
+	 * and didn't take any interrupts, NMIs, SMIs, or whatever,
+	 * then it cannot have executed an RCU read-side critical section
+	 * during that time, so there is no need for it to execute a
+	 * memory barrier.
+	 */
+
+	if ((curr == snap) && ((curr & 0x1) == 0))
+		return 0;
+
+	/*
+	 * If the CPU either entered or exited an outermost interrupt,
+	 * SMI, NMI, or whatever handler, then we know that it executed
+	 * a memory barrier when doing so.  So we don't need another one.
+	 */
+	if (curr != snap)
+		return 0;
+
+	/* We need the CPU to execute a memory barrier. */
+
+	return 1;
+}
+
+#else /* !CONFIG_NO_HZ */
+
+# define dyntick_save_progress_counter(cpu)	do { } while (0)
+# define rcu_try_flip_waitack_needed(cpu)	(1)
+# define rcu_try_flip_waitmb_needed(cpu)	(1)
+
+#endif /* CONFIG_NO_HZ */
+
 /*
  * Get here when RCU is idle.  Decide whether we need to
  * move out of idle state, and return non-zero if so.
  
@@ -447,8 +657,10 @@
  
 	/* Now ask each CPU for acknowledgement of the flip. */
  
-	for_each_cpu_mask(cpu, rcu_cpu_online_map)
+	for_each_cpu_mask(cpu, rcu_cpu_online_map) {
 		per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
+		dyntick_save_progress_counter(cpu);
+	}
  
 	return 1;
 }
@@ -464,7 +676,8 @@
  
 	RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
 	for_each_cpu_mask(cpu, rcu_cpu_online_map)
-		if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
+		if (rcu_try_flip_waitack_needed(cpu) &&
+		    per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
 			RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
 			return 0;
 		}
  
@@ -509,8 +722,10 @@
 	smp_mb();  /*  ^^^^^^^^^^^^ */
  
 	/* Call for a memory barrier from each CPU. */
-	for_each_cpu_mask(cpu, rcu_cpu_online_map)
+	for_each_cpu_mask(cpu, rcu_cpu_online_map) {
 		per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
+		dyntick_save_progress_counter(cpu);
+	}
  
 	RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
 	return 1;
@@ -528,7 +743,8 @@
  
 	RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
 	for_each_cpu_mask(cpu, rcu_cpu_online_map)
-		if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
+		if (rcu_try_flip_waitmb_needed(cpu) &&
+		    per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
 			RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
 			return 0;
 		}
@@ -313,6 +313,7 @@
 	/* Make sure that timer wheel updates are propagated */
 	if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
 		tick_nohz_stop_sched_tick();
+	rcu_irq_exit();
 #endif
 	preempt_enable_no_resched();
 }
@@ -282,6 +282,7 @@
 			ts->idle_tick = ts->sched_timer.expires;
 			ts->tick_stopped = 1;
 			ts->idle_jiffies = last_jiffies;
+			rcu_enter_nohz();
 		}
  
 		/*
@@ -374,6 +375,8 @@
 		local_irq_enable();
 		return;
 	}
+
+	rcu_exit_nohz();
  
 	/* Update jiffies first */
 	select_nohz_load_balancer(0);
...	...	@@ -109,6 +109,14 @@
109	109	}
110	110	#endif
111	111
	112	+#if defined(CONFIG_PREEMPT_RCU) && defined(CONFIG_NO_HZ)
	113	+extern void rcu_irq_enter(void);
	114	+extern void rcu_irq_exit(void);
	115	+#else
	116	+# define rcu_irq_enter() do { } while (0)
	117	+# define rcu_irq_exit() do { } while (0)
	118	+#endif /* CONFIG_PREEMPT_RCU */
	119	+
112	120	/*
113	121	* It is safe to do non-atomic ops on ->hardirq_context,
114	122	* because NMI handlers may not preempt and the ops are
...	...	@@ -117,6 +125,7 @@
117	125	*/
118	126	#define __irq_enter() \
119	127	do { \
	128	+ rcu_irq_enter(); \
120	129	account_system_vtime(current); \
121	130	add_preempt_count(HARDIRQ_OFFSET); \
122	131	trace_hardirq_enter(); \
...	...	@@ -135,6 +144,7 @@
135	144	trace_hardirq_exit(); \
136	145	account_system_vtime(current); \
137	146	sub_preempt_count(HARDIRQ_OFFSET); \
	147	+ rcu_irq_exit(); \
138	148	} while (0)
139	149
140	150	/*
...	...	@@ -160,6 +160,9 @@
160	160	extern long rcu_batches_completed(void);
161	161	extern long rcu_batches_completed_bh(void);
162	162
	163	+#define rcu_enter_nohz() do { } while (0)
	164	+#define rcu_exit_nohz() do { } while (0)
	165	+
163	166	#endif /* __KERNEL__ */
164	167	#endif /* __LINUX_RCUCLASSIC_H */
...	...	@@ -82,6 +82,28 @@
82	82
83	83	struct softirq_action;
84	84
	85	+#ifdef CONFIG_NO_HZ
	86	+DECLARE_PER_CPU(long, dynticks_progress_counter);
	87	+
	88	+static inline void rcu_enter_nohz(void)
	89	+{
	90	+ __get_cpu_var(dynticks_progress_counter)++;
	91	+ WARN_ON(__get_cpu_var(dynticks_progress_counter) & 0x1);
	92	+ mb();
	93	+}
	94	+
	95	+static inline void rcu_exit_nohz(void)
	96	+{
	97	+ mb();
	98	+ __get_cpu_var(dynticks_progress_counter)++;
	99	+ WARN_ON(!(__get_cpu_var(dynticks_progress_counter) & 0x1));
	100	+}
	101	+
	102	+#else /* CONFIG_NO_HZ */
	103	+#define rcu_enter_nohz() do { } while (0)
	104	+#define rcu_exit_nohz() do { } while (0)
	105	+#endif /* CONFIG_NO_HZ */
	106	+
85	107	#endif /* __KERNEL__ */
86	108	#endif /* __LINUX_RCUPREEMPT_H */
...	...	@@ -23,6 +23,10 @@
23	23	* to Suparna Bhattacharya for pushing me completely away
24	24	* from atomic instructions on the read side.
25	25	*
	26	+ * - Added handling of Dynamic Ticks
	27	+ * Copyright 2007 - Paul E. Mckenney <paulmck@us.ibm.com>
	28	+ * - Steven Rostedt <srostedt@redhat.com>
	29	+ *
26	30	* Papers: http://www.rdrop.com/users/paulmck/RCU
27	31	*
28	32	* Design Document: http://lwn.net/Articles/253651/
...	...	@@ -409,6 +413,212 @@
409	413	}
410	414	}
411	415
	416	+#ifdef CONFIG_NO_HZ
	417	+
	418	+DEFINE_PER_CPU(long, dynticks_progress_counter) = 1;
	419	+static DEFINE_PER_CPU(long, rcu_dyntick_snapshot);
	420	+static DEFINE_PER_CPU(int, rcu_update_flag);
	421	+
	422	+/**
	423	+ * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
	424	+ *
	425	+ * If the CPU was idle with dynamic ticks active, this updates the
	426	+ * dynticks_progress_counter to let the RCU handling know that the
	427	+ * CPU is active.
	428	+ */
	429	+void rcu_irq_enter(void)
	430	+{
	431	+ int cpu = smp_processor_id();
	432	+
	433	+ if (per_cpu(rcu_update_flag, cpu))
	434	+ per_cpu(rcu_update_flag, cpu)++;
	435	+
	436	+ /*
	437	+ * Only update if we are coming from a stopped ticks mode
	438	+ * (dynticks_progress_counter is even).
	439	+ */
	440	+ if (!in_interrupt() &&
	441	+ (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) {
	442	+ /*
	443	+ * The following might seem like we could have a race
	444	+ * with NMI/SMIs. But this really isn't a problem.
	445	+ * Here we do a read/modify/write, and the race happens
	446	+ * when an NMI/SMI comes in after the read and before
	447	+ * the write. But NMI/SMIs will increment this counter
	448	+ * twice before returning, so the zero bit will not
	449	+ * be corrupted by the NMI/SMI which is the most important
	450	+ * part.
	451	+ *
	452	+ * The only thing is that we would bring back the counter
	453	+ * to a postion that it was in during the NMI/SMI.
	454	+ * But the zero bit would be set, so the rest of the
	455	+ * counter would again be ignored.
	456	+ *
	457	+ * On return from the IRQ, the counter may have the zero
	458	+ * bit be 0 and the counter the same as the return from
	459	+ * the NMI/SMI. If the state machine was so unlucky to
	460	+ * see that, it still doesn't matter, since all
	461	+ * RCU read-side critical sections on this CPU would
	462	+ * have already completed.
	463	+ */
	464	+ per_cpu(dynticks_progress_counter, cpu)++;
	465	+ /*
	466	+ * The following memory barrier ensures that any
	467	+ * rcu_read_lock() primitives in the irq handler
	468	+ * are seen by other CPUs to follow the above
	469	+ * increment to dynticks_progress_counter. This is
	470	+ * required in order for other CPUs to correctly
	471	+ * determine when it is safe to advance the RCU
	472	+ * grace-period state machine.
	473	+ */
	474	+ smp_mb(); /* see above block comment. */
	475	+ /*
	476	+ * Since we can't determine the dynamic tick mode from
	477	+ * the dynticks_progress_counter after this routine,
	478	+ * we use a second flag to acknowledge that we came
	479	+ * from an idle state with ticks stopped.
	480	+ */
	481	+ per_cpu(rcu_update_flag, cpu)++;
	482	+ /*
	483	+ * If we take an NMI/SMI now, they will also increment
	484	+ * the rcu_update_flag, and will not update the
	485	+ * dynticks_progress_counter on exit. That is for
	486	+ * this IRQ to do.
	487	+ */
	488	+ }
	489	+}
	490	+
	491	+/**
	492	+ * rcu_irq_exit - Called from exiting Hard irq context.
	493	+ *
	494	+ * If the CPU was idle with dynamic ticks active, update the
	495	+ * dynticks_progress_counter to put let the RCU handling be
	496	+ * aware that the CPU is going back to idle with no ticks.
	497	+ */
	498	+void rcu_irq_exit(void)
	499	+{
	500	+ int cpu = smp_processor_id();
	501	+
	502	+ /*
	503	+ * rcu_update_flag is set if we interrupted the CPU
	504	+ * when it was idle with ticks stopped.
	505	+ * Once this occurs, we keep track of interrupt nesting
	506	+ * because a NMI/SMI could also come in, and we still
	507	+ * only want the IRQ that started the increment of the
	508	+ * dynticks_progress_counter to be the one that modifies
	509	+ * it on exit.
	510	+ */
	511	+ if (per_cpu(rcu_update_flag, cpu)) {
	512	+ if (--per_cpu(rcu_update_flag, cpu))
	513	+ return;
	514	+
	515	+ /* This must match the interrupt nesting */
	516	+ WARN_ON(in_interrupt());
	517	+
	518	+ /*
	519	+ * If an NMI/SMI happens now we are still
	520	+ * protected by the dynticks_progress_counter being odd.
	521	+ */
	522	+
	523	+ /*
	524	+ * The following memory barrier ensures that any
	525	+ * rcu_read_unlock() primitives in the irq handler
	526	+ * are seen by other CPUs to preceed the following
	527	+ * increment to dynticks_progress_counter. This
	528	+ * is required in order for other CPUs to determine
	529	+ * when it is safe to advance the RCU grace-period
	530	+ * state machine.
	531	+ */
	532	+ smp_mb(); /* see above block comment. */
	533	+ per_cpu(dynticks_progress_counter, cpu)++;
	534	+ WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1);
	535	+ }
	536	+}
	537	+
	538	+static void dyntick_save_progress_counter(int cpu)
	539	+{
	540	+ per_cpu(rcu_dyntick_snapshot, cpu) =
	541	+ per_cpu(dynticks_progress_counter, cpu);
	542	+}
	543	+
	544	+static inline int
	545	+rcu_try_flip_waitack_needed(int cpu)
	546	+{
	547	+ long curr;
	548	+ long snap;
	549	+
	550	+ curr = per_cpu(dynticks_progress_counter, cpu);
	551	+ snap = per_cpu(rcu_dyntick_snapshot, cpu);
	552	+ smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
	553	+
	554	+ /*
	555	+ * If the CPU remained in dynticks mode for the entire time
	556	+ * and didn't take any interrupts, NMIs, SMIs, or whatever,
	557	+ * then it cannot be in the middle of an rcu_read_lock(), so
	558	+ * the next rcu_read_lock() it executes must use the new value
	559	+ * of the counter. So we can safely pretend that this CPU
	560	+ * already acknowledged the counter.
	561	+ */
	562	+
	563	+ if ((curr == snap) && ((curr & 0x1) == 0))
	564	+ return 0;
	565	+
	566	+ /*
	567	+ * If the CPU passed through or entered a dynticks idle phase with
	568	+ * no active irq handlers, then, as above, we can safely pretend
	569	+ * that this CPU already acknowledged the counter.
	570	+ */
	571	+
	572	+ if ((curr - snap) > 2 \|\| (snap & 0x1) == 0)
	573	+ return 0;
	574	+
	575	+ /* We need this CPU to explicitly acknowledge the counter flip. */
	576	+
	577	+ return 1;
	578	+}
	579	+
	580	+static inline int
	581	+rcu_try_flip_waitmb_needed(int cpu)
	582	+{
	583	+ long curr;
	584	+ long snap;
	585	+
	586	+ curr = per_cpu(dynticks_progress_counter, cpu);
	587	+ snap = per_cpu(rcu_dyntick_snapshot, cpu);
	588	+ smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
	589	+
	590	+ /*
	591	+ * If the CPU remained in dynticks mode for the entire time
	592	+ * and didn't take any interrupts, NMIs, SMIs, or whatever,
	593	+ * then it cannot have executed an RCU read-side critical section
	594	+ * during that time, so there is no need for it to execute a
	595	+ * memory barrier.
	596	+ */
	597	+
	598	+ if ((curr == snap) && ((curr & 0x1) == 0))
	599	+ return 0;
	600	+
	601	+ /*
	602	+ * If the CPU either entered or exited an outermost interrupt,
	603	+ * SMI, NMI, or whatever handler, then we know that it executed
	604	+ * a memory barrier when doing so. So we don't need another one.
	605	+ */
	606	+ if (curr != snap)
	607	+ return 0;
	608	+
	609	+ /* We need the CPU to execute a memory barrier. */
	610	+
	611	+ return 1;
	612	+}
	613	+
	614	+#else /* !CONFIG_NO_HZ */
	615	+
	616	+# define dyntick_save_progress_counter(cpu) do { } while (0)
	617	+# define rcu_try_flip_waitack_needed(cpu) (1)
	618	+# define rcu_try_flip_waitmb_needed(cpu) (1)
	619	+
	620	+#endif /* CONFIG_NO_HZ */
	621	+
412	622	/*
413	623	* Get here when RCU is idle. Decide whether we need to
414	624	* move out of idle state, and return non-zero if so.
415	625
...	...	@@ -447,8 +657,10 @@
447	657
448	658	/* Now ask each CPU for acknowledgement of the flip. */
449	659
450		- for_each_cpu_mask(cpu, rcu_cpu_online_map)
	660	+ for_each_cpu_mask(cpu, rcu_cpu_online_map) {
451	661	per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
	662	+ dyntick_save_progress_counter(cpu);
	663	+ }
452	664
453	665	return 1;
454	666	}
...	...	@@ -464,7 +676,8 @@
464	676
465	677	RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
466	678	for_each_cpu_mask(cpu, rcu_cpu_online_map)
467		- if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
	679	+ if (rcu_try_flip_waitack_needed(cpu) &&
	680	+ per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
468	681	RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
469	682	return 0;
470	683	}
471	684
...	...	@@ -509,8 +722,10 @@
509	722	smp_mb(); /* ^^^^^^^^^^^^ */
510	723
511	724	/* Call for a memory barrier from each CPU. */
512		- for_each_cpu_mask(cpu, rcu_cpu_online_map)
	725	+ for_each_cpu_mask(cpu, rcu_cpu_online_map) {
513	726	per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
	727	+ dyntick_save_progress_counter(cpu);
	728	+ }
514	729
515	730	RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
516	731	return 1;
...	...	@@ -528,7 +743,8 @@
528	743
529	744	RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
530	745	for_each_cpu_mask(cpu, rcu_cpu_online_map)
531		- if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
	746	+ if (rcu_try_flip_waitmb_needed(cpu) &&
	747	+ per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
532	748	RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
533	749	return 0;
534	750	}
...	...	@@ -313,6 +313,7 @@
313	313	/* Make sure that timer wheel updates are propagated */
314	314	if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
315	315	tick_nohz_stop_sched_tick();
	316	+ rcu_irq_exit();
316	317	#endif
317	318	preempt_enable_no_resched();
318	319	}
...	...	@@ -282,6 +282,7 @@
282	282	ts->idle_tick = ts->sched_timer.expires;
283	283	ts->tick_stopped = 1;
284	284	ts->idle_jiffies = last_jiffies;
	285	+ rcu_enter_nohz();
285	286	}
286	287
287	288	/*
...	...	@@ -374,6 +375,8 @@
374	375	local_irq_enable();
375	376	return;
376	377	}
	378	+
	379	+ rcu_exit_nohz();
377	380
378	381	/* Update jiffies first */
379	382	select_nohz_load_balancer(0);