Preempt-RCU: implementation

This patch implements a new version of RCU which allows its read-side critical sections to be preempted. It uses a set of counter pairs to keep track of the read-side critical sections and flips them when all tasks exit read-side critical section. The details of this implementation can be found in this paper - http://www.rdrop.com/users/paulmck/RCU/OLSrtRCU.2006.08.11a.pdf and the article- http://lwn.net/Articles/253651/ This patch was developed as a part of the -rt kernel development and meant to provide better latencies when read-side critical sections of RCU don't disable preemption. As a consequence of keeping track of RCU readers, the readers have a slight overhead (optimizations in the paper). This implementation co-exists with the "classic" RCU implementations and can be switched to at compiler. Also includes RCU tracing summarized in debugfs. [ akpm@linux-foundation.org: build fixes on non-preempt architectures ] Signed-off-by: Gautham R Shenoy <ego@in.ibm.com> Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com> Signed-off-by: Paul E. McKenney <paulmck@us.ibm.com> Reviewed-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>

Preempt-RCU: implementation
This patch implements a new version of RCU which allows its read-side critical sections to be preempted. It uses a set of counter pairs to keep track of the read-side critical sections and flips them when all tasks exit read-side critical section. The details of this implementation can be found in this paper - http://www.rdrop.com/users/paulmck/RCU/OLSrtRCU.2006.08.11a.pdf and the article- http://lwn.net/Articles/253651/ This patch was developed as a part of the -rt kernel development and meant to provide better latencies when read-side critical sections of RCU don't disable preemption. As a consequence of keeping track of RCU readers, the readers have a slight overhead (optimizations in the paper). This implementation co-exists with the "classic" RCU implementations and can be switched to at compiler. Also includes RCU tracing summarized in debugfs. [ akpm@linux-foundation.org: build fixes on non-preempt architectures ] Signed-off-by: Gautham R Shenoy <ego@in.ibm.com> Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com> Signed-off-by: Paul E. McKenney <paulmck@us.ibm.com> Reviewed-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Paul E. McKenney · Ingo Molnar
1 parent e0ecfa7917
Showing 12 changed files with 1394 additions and 7 deletions Side-by-side Diff
include/linux/rcuclassic.h
include/linux/rcupdate.h
include/linux/rcupreempt.h
include/linux/rcupreempt_trace.h
include/linux/sched.h
init/Kconfig
kernel/Kconfig.preempt
kernel/Makefile
kernel/fork.c
kernel/rcuclassic.c
kernel/rcupreempt.c
kernel/rcupreempt_trace.c
@@ -157,6 +157,9 @@
 extern void rcu_check_callbacks(int cpu, int user);
 extern void rcu_restart_cpu(int cpu);
  
+extern long rcu_batches_completed(void);
+extern long rcu_batches_completed_bh(void);
+
 #endif /* __KERNEL__ */
 #endif /* __LINUX_RCUCLASSIC_H */
@@ -53,7 +53,11 @@
 	void (*func)(struct rcu_head *head);
 };
  
+#ifdef CONFIG_CLASSIC_RCU
 #include <linux/rcuclassic.h>
+#else /* #ifdef CONFIG_CLASSIC_RCU */
+#include <linux/rcupreempt.h>
+#endif /* #else #ifdef CONFIG_CLASSIC_RCU */
  
 #define RCU_HEAD_INIT 	{ .next = NULL, .func = NULL }
 #define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT
  
@@ -231,13 +235,12 @@
 /* Exported common interfaces */
 extern void synchronize_rcu(void);
 extern void rcu_barrier(void);
+extern long rcu_batches_completed(void);
+extern long rcu_batches_completed_bh(void);
  
 /* Internal to kernel */
 extern void rcu_init(void);
-extern void rcu_check_callbacks(int cpu, int user);
-
-extern long rcu_batches_completed(void);
-extern long rcu_batches_completed_bh(void);
+extern int rcu_needs_cpu(int cpu);
  
 #endif /* __KERNEL__ */
 #endif /* __LINUX_RCUPDATE_H */
+/*
+ * Read-Copy Update mechanism for mutual exclusion (RT implementation)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2006
+ *
+ * Author:  Paul McKenney <paulmck@us.ibm.com>
+ *
+ * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
+ * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
+ * Papers:
+ * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
+ * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 		Documentation/RCU
+ *
+ */
+
+#ifndef __LINUX_RCUPREEMPT_H
+#define __LINUX_RCUPREEMPT_H
+
+#ifdef __KERNEL__
+
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/threads.h>
+#include <linux/percpu.h>
+#include <linux/cpumask.h>
+#include <linux/seqlock.h>
+
+#define rcu_qsctr_inc(cpu)
+#define rcu_bh_qsctr_inc(cpu)
+#define call_rcu_bh(head, rcu) call_rcu(head, rcu)
+
+extern void __rcu_read_lock(void);
+extern void __rcu_read_unlock(void);
+extern int rcu_pending(int cpu);
+extern int rcu_needs_cpu(int cpu);
+
+#define __rcu_read_lock_bh()	{ rcu_read_lock(); local_bh_disable(); }
+#define __rcu_read_unlock_bh()	{ local_bh_enable(); rcu_read_unlock(); }
+
+extern void __synchronize_sched(void);
+
+extern void __rcu_init(void);
+extern void rcu_check_callbacks(int cpu, int user);
+extern void rcu_restart_cpu(int cpu);
+extern long rcu_batches_completed(void);
+
+/*
+ * Return the number of RCU batches processed thus far. Useful for debug
+ * and statistic. The _bh variant is identifcal to straight RCU
+ */
+static inline long rcu_batches_completed_bh(void)
+{
+	return rcu_batches_completed();
+}
+
+#ifdef CONFIG_RCU_TRACE
+struct rcupreempt_trace;
+extern long *rcupreempt_flipctr(int cpu);
+extern long rcupreempt_data_completed(void);
+extern int rcupreempt_flip_flag(int cpu);
+extern int rcupreempt_mb_flag(int cpu);
+extern char *rcupreempt_try_flip_state_name(void);
+extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu);
+#endif
+
+struct softirq_action;
+
+#endif /* __KERNEL__ */
+#endif /* __LINUX_RCUPREEMPT_H */
+/*
+ * Read-Copy Update mechanism for mutual exclusion (RT implementation)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2006
+ *
+ * Author:  Paul McKenney <paulmck@us.ibm.com>
+ *
+ * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
+ * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
+ * Papers:
+ * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
+ * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
+ *
+ * For detailed explanation of the Preemptible Read-Copy Update mechanism see -
+ * 		 http://lwn.net/Articles/253651/
+ */
+
+#ifndef __LINUX_RCUPREEMPT_TRACE_H
+#define __LINUX_RCUPREEMPT_TRACE_H
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#include <linux/kernel.h>
+
+#include <asm/atomic.h>
+
+/*
+ * PREEMPT_RCU data structures.
+ */
+
+struct rcupreempt_trace {
+	long		next_length;
+	long		next_add;
+	long		wait_length;
+	long		wait_add;
+	long		done_length;
+	long		done_add;
+	long		done_remove;
+	atomic_t	done_invoked;
+	long		rcu_check_callbacks;
+	atomic_t	rcu_try_flip_1;
+	atomic_t	rcu_try_flip_e1;
+	long		rcu_try_flip_i1;
+	long		rcu_try_flip_ie1;
+	long		rcu_try_flip_g1;
+	long		rcu_try_flip_a1;
+	long		rcu_try_flip_ae1;
+	long		rcu_try_flip_a2;
+	long		rcu_try_flip_z1;
+	long		rcu_try_flip_ze1;
+	long		rcu_try_flip_z2;
+	long		rcu_try_flip_m1;
+	long		rcu_try_flip_me1;
+	long		rcu_try_flip_m2;
+};
+
+#ifdef CONFIG_RCU_TRACE
+#define RCU_TRACE(fn, arg) 	fn(arg);
+#else
+#define RCU_TRACE(fn, arg)
+#endif
+
+extern void rcupreempt_trace_move2done(struct rcupreempt_trace *trace);
+extern void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace);
+extern void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace);
+extern void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace);
+extern void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace);
+extern void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace);
+extern void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace);
+extern void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace);
+extern void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace);
+extern void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace);
+extern void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace);
+extern void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace);
+extern void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace);
+extern void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace);
+extern void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace);
+extern void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace);
+extern void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace);
+extern void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace);
+extern void rcupreempt_trace_invoke(struct rcupreempt_trace *trace);
+extern void rcupreempt_trace_next_add(struct rcupreempt_trace *trace);
+
+#endif /* __KERNEL__ */
+#endif /* __LINUX_RCUPREEMPT_TRACE_H */
@@ -974,6 +974,11 @@
 	int nr_cpus_allowed;
 	unsigned int time_slice;
  
+#ifdef CONFIG_PREEMPT_RCU
+	int rcu_read_lock_nesting;
+	int rcu_flipctr_idx;
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
+
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 	struct sched_info sched_info;
 #endif
@@ -763,4 +763,32 @@
  
 config PREEMPT_NOTIFIERS
 	bool
+
+choice
+	prompt "RCU implementation type:"
+	default CLASSIC_RCU
+
+config CLASSIC_RCU
+	bool "Classic RCU"
+	help
+	  This option selects the classic RCU implementation that is
+	  designed for best read-side performance on non-realtime
+	  systems.
+
+	  Say Y if you are unsure.
+
+config PREEMPT_RCU
+	bool "Preemptible RCU"
+	depends on PREEMPT
+	help
+	  This option reduces the latency of the kernel by making certain
+	  RCU sections preemptible. Normally RCU code is non-preemptible, if
+	  this option is selected then read-only RCU sections become
+	  preemptible. This helps latency, but may expose bugs due to
+	  now-naive assumptions about each RCU read-side critical section
+	  remaining on a given CPU through its execution.
+
+	  Say N if you are unsure.
+
+endchoice
@@ -62,4 +62,15 @@
  
 	  Say Y here if you are building a kernel for a desktop system.
 	  Say N if you are unsure.
+
+config RCU_TRACE
+	bool "Enable tracing for RCU - currently stats in debugfs"
+	select DEBUG_FS
+	default y
+	help
+	  This option provides tracing in RCU which presents stats
+	  in debugfs for debugging RCU implementation.
+
+	  Say Y here if you want to enable RCU tracing
+	  Say N if you are unsure.
@@ -6,7 +6,7 @@
 	    exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
-	    rcupdate.o rcuclassic.o extable.o params.o posix-timers.o \
+	    rcupdate.o extable.o params.o posix-timers.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \
 	    utsname.o notifier.o
@@ -52,6 +52,11 @@
 obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
+obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o
+obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o
+ifeq ($(CONFIG_PREEMPT_RCU),y)
+obj-$(CONFIG_RCU_TRACE) += rcupreempt_trace.o
+endif
 obj-$(CONFIG_RELAY) += relay.o
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
@@ -1045,6 +1045,10 @@
 	copy_flags(clone_flags, p);
 	INIT_LIST_HEAD(&p->children);
 	INIT_LIST_HEAD(&p->sibling);
+#ifdef CONFIG_PREEMPT_RCU
+	p->rcu_read_lock_nesting = 0;
+	p->rcu_flipctr_idx = 0;
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
 	p->vfork_done = NULL;
 	spin_lock_init(&p->alloc_lock);
  
@@ -45,7 +45,6 @@
 #include <linux/moduleparam.h>
 #include <linux/percpu.h>
 #include <linux/notifier.h>
-/* #include <linux/rcupdate.h> @@@ */
 #include <linux/cpu.h>
 #include <linux/mutex.h>
  
+/*
+ * Read-Copy Update mechanism for mutual exclusion, realtime implementation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2006
+ *
+ * Authors: Paul E. McKenney <paulmck@us.ibm.com>
+ *		With thanks to Esben Nielsen, Bill Huey, and Ingo Molnar
+ *		for pushing me away from locks and towards counters, and
+ *		to Suparna Bhattacharya for pushing me completely away
+ *		from atomic instructions on the read side.
+ *
+ * Papers:  http://www.rdrop.com/users/paulmck/RCU
+ *
+ * Design Document: http://lwn.net/Articles/253651/
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 		Documentation/RCU/ *.txt
+ *
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/rcupdate.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <asm/atomic.h>
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/completion.h>
+#include <linux/moduleparam.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/cpu.h>
+#include <linux/random.h>
+#include <linux/delay.h>
+#include <linux/byteorder/swabb.h>
+#include <linux/cpumask.h>
+#include <linux/rcupreempt_trace.h>
+
+/*
+ * Macro that prevents the compiler from reordering accesses, but does
+ * absolutely -nothing- to prevent CPUs from reordering.  This is used
+ * only to mediate communication between mainline code and hardware
+ * interrupt and NMI handlers.
+ */
+#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+/*
+ * PREEMPT_RCU data structures.
+ */
+
+/*
+ * GP_STAGES specifies the number of times the state machine has
+ * to go through the all the rcu_try_flip_states (see below)
+ * in a single Grace Period.
+ *
+ * GP in GP_STAGES stands for Grace Period ;)
+ */
+#define GP_STAGES    2
+struct rcu_data {
+	spinlock_t	lock;		/* Protect rcu_data fields. */
+	long		completed;	/* Number of last completed batch. */
+	int		waitlistcount;
+	struct tasklet_struct rcu_tasklet;
+	struct rcu_head *nextlist;
+	struct rcu_head **nexttail;
+	struct rcu_head *waitlist[GP_STAGES];
+	struct rcu_head **waittail[GP_STAGES];
+	struct rcu_head *donelist;
+	struct rcu_head **donetail;
+	long rcu_flipctr[2];
+#ifdef CONFIG_RCU_TRACE
+	struct rcupreempt_trace trace;
+#endif /* #ifdef CONFIG_RCU_TRACE */
+};
+
+/*
+ * States for rcu_try_flip() and friends.
+ */
+
+enum rcu_try_flip_states {
+
+	/*
+	 * Stay here if nothing is happening. Flip the counter if somthing
+	 * starts happening. Denoted by "I"
+	 */
+	rcu_try_flip_idle_state,
+
+	/*
+	 * Wait here for all CPUs to notice that the counter has flipped. This
+	 * prevents the old set of counters from ever being incremented once
+	 * we leave this state, which in turn is necessary because we cannot
+	 * test any individual counter for zero -- we can only check the sum.
+	 * Denoted by "A".
+	 */
+	rcu_try_flip_waitack_state,
+
+	/*
+	 * Wait here for the sum of the old per-CPU counters to reach zero.
+	 * Denoted by "Z".
+	 */
+	rcu_try_flip_waitzero_state,
+
+	/*
+	 * Wait here for each of the other CPUs to execute a memory barrier.
+	 * This is necessary to ensure that these other CPUs really have
+	 * completed executing their RCU read-side critical sections, despite
+	 * their CPUs wildly reordering memory. Denoted by "M".
+	 */
+	rcu_try_flip_waitmb_state,
+};
+
+struct rcu_ctrlblk {
+	spinlock_t	fliplock;	/* Protect state-machine transitions. */
+	long		completed;	/* Number of last completed batch. */
+	enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
+							the rcu state machine */
+};
+
+static DEFINE_PER_CPU(struct rcu_data, rcu_data);
+static struct rcu_ctrlblk rcu_ctrlblk = {
+	.fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
+	.completed = 0,
+	.rcu_try_flip_state = rcu_try_flip_idle_state,
+};
+
+
+#ifdef CONFIG_RCU_TRACE
+static char *rcu_try_flip_state_names[] =
+	{ "idle", "waitack", "waitzero", "waitmb" };
+#endif /* #ifdef CONFIG_RCU_TRACE */
+
+/*
+ * Enum and per-CPU flag to determine when each CPU has seen
+ * the most recent counter flip.
+ */
+
+enum rcu_flip_flag_values {
+	rcu_flip_seen,		/* Steady/initial state, last flip seen. */
+				/* Only GP detector can update. */
+	rcu_flipped		/* Flip just completed, need confirmation. */
+				/* Only corresponding CPU can update. */
+};
+static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_flip_flag_values, rcu_flip_flag)
+								= rcu_flip_seen;
+
+/*
+ * Enum and per-CPU flag to determine when each CPU has executed the
+ * needed memory barrier to fence in memory references from its last RCU
+ * read-side critical section in the just-completed grace period.
+ */
+
+enum rcu_mb_flag_values {
+	rcu_mb_done,		/* Steady/initial state, no mb()s required. */
+				/* Only GP detector can update. */
+	rcu_mb_needed		/* Flip just completed, need an mb(). */
+				/* Only corresponding CPU can update. */
+};
+static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag)
+								= rcu_mb_done;
+
+/*
+ * RCU_DATA_ME: find the current CPU's rcu_data structure.
+ * RCU_DATA_CPU: find the specified CPU's rcu_data structure.
+ */
+#define RCU_DATA_ME()		(&__get_cpu_var(rcu_data))
+#define RCU_DATA_CPU(cpu)	(&per_cpu(rcu_data, cpu))
+
+/*
+ * Helper macro for tracing when the appropriate rcu_data is not
+ * cached in a local variable, but where the CPU number is so cached.
+ */
+#define RCU_TRACE_CPU(f, cpu) RCU_TRACE(f, &(RCU_DATA_CPU(cpu)->trace));
+
+/*
+ * Helper macro for tracing when the appropriate rcu_data is not
+ * cached in a local variable.
+ */
+#define RCU_TRACE_ME(f) RCU_TRACE(f, &(RCU_DATA_ME()->trace));
+
+/*
+ * Helper macro for tracing when the appropriate rcu_data is pointed
+ * to by a local variable.
+ */
+#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
+
+/*
+ * Return the number of RCU batches processed thus far.  Useful
+ * for debug and statistics.
+ */
+long rcu_batches_completed(void)
+{
+	return rcu_ctrlblk.completed;
+}
+EXPORT_SYMBOL_GPL(rcu_batches_completed);
+
+EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
+
+void __rcu_read_lock(void)
+{
+	int idx;
+	struct task_struct *t = current;
+	int nesting;
+
+	nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
+	if (nesting != 0) {
+
+		/* An earlier rcu_read_lock() covers us, just count it. */
+
+		t->rcu_read_lock_nesting = nesting + 1;
+
+	} else {
+		unsigned long flags;
+
+		/*
+		 * We disable interrupts for the following reasons:
+		 * - If we get scheduling clock interrupt here, and we
+		 *   end up acking the counter flip, it's like a promise
+		 *   that we will never increment the old counter again.
+		 *   Thus we will break that promise if that
+		 *   scheduling clock interrupt happens between the time
+		 *   we pick the .completed field and the time that we
+		 *   increment our counter.
+		 *
+		 * - We don't want to be preempted out here.
+		 *
+		 * NMIs can still occur, of course, and might themselves
+		 * contain rcu_read_lock().
+		 */
+
+		local_irq_save(flags);
+
+		/*
+		 * Outermost nesting of rcu_read_lock(), so increment
+		 * the current counter for the current CPU.  Use volatile
+		 * casts to prevent the compiler from reordering.
+		 */
+
+		idx = ACCESS_ONCE(rcu_ctrlblk.completed) & 0x1;
+		ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])++;
+
+		/*
+		 * Now that the per-CPU counter has been incremented, we
+		 * are protected from races with rcu_read_lock() invoked
+		 * from NMI handlers on this CPU.  We can therefore safely
+		 * increment the nesting counter, relieving further NMIs
+		 * of the need to increment the per-CPU counter.
+		 */
+
+		ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting + 1;
+
+		/*
+		 * Now that we have preventing any NMIs from storing
+		 * to the ->rcu_flipctr_idx, we can safely use it to
+		 * remember which counter to decrement in the matching
+		 * rcu_read_unlock().
+		 */
+
+		ACCESS_ONCE(t->rcu_flipctr_idx) = idx;
+		local_irq_restore(flags);
+	}
+}
+EXPORT_SYMBOL_GPL(__rcu_read_lock);
+
+void __rcu_read_unlock(void)
+{
+	int idx;
+	struct task_struct *t = current;
+	int nesting;
+
+	nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
+	if (nesting > 1) {
+
+		/*
+		 * We are still protected by the enclosing rcu_read_lock(),
+		 * so simply decrement the counter.
+		 */
+
+		t->rcu_read_lock_nesting = nesting - 1;
+
+	} else {
+		unsigned long flags;
+
+		/*
+		 * Disable local interrupts to prevent the grace-period
+		 * detection state machine from seeing us half-done.
+		 * NMIs can still occur, of course, and might themselves
+		 * contain rcu_read_lock() and rcu_read_unlock().
+		 */
+
+		local_irq_save(flags);
+
+		/*
+		 * Outermost nesting of rcu_read_unlock(), so we must
+		 * decrement the current counter for the current CPU.
+		 * This must be done carefully, because NMIs can
+		 * occur at any point in this code, and any rcu_read_lock()
+		 * and rcu_read_unlock() pairs in the NMI handlers
+		 * must interact non-destructively with this code.
+		 * Lots of volatile casts, and -very- careful ordering.
+		 *
+		 * Changes to this code, including this one, must be
+		 * inspected, validated, and tested extremely carefully!!!
+		 */
+
+		/*
+		 * First, pick up the index.
+		 */
+
+		idx = ACCESS_ONCE(t->rcu_flipctr_idx);
+
+		/*
+		 * Now that we have fetched the counter index, it is
+		 * safe to decrement the per-task RCU nesting counter.
+		 * After this, any interrupts or NMIs will increment and
+		 * decrement the per-CPU counters.
+		 */
+		ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting - 1;
+
+		/*
+		 * It is now safe to decrement this task's nesting count.
+		 * NMIs that occur after this statement will route their
+		 * rcu_read_lock() calls through this "else" clause, and
+		 * will thus start incrementing the per-CPU counter on
+		 * their own.  They will also clobber ->rcu_flipctr_idx,
+		 * but that is OK, since we have already fetched it.
+		 */
+
+		ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])--;
+		local_irq_restore(flags);
+	}
+}
+EXPORT_SYMBOL_GPL(__rcu_read_unlock);
+
+/*
+ * If a global counter flip has occurred since the last time that we
+ * advanced callbacks, advance them.  Hardware interrupts must be
+ * disabled when calling this function.
+ */
+static void __rcu_advance_callbacks(struct rcu_data *rdp)
+{
+	int cpu;
+	int i;
+	int wlc = 0;
+
+	if (rdp->completed != rcu_ctrlblk.completed) {
+		if (rdp->waitlist[GP_STAGES - 1] != NULL) {
+			*rdp->donetail = rdp->waitlist[GP_STAGES - 1];
+			rdp->donetail = rdp->waittail[GP_STAGES - 1];
+			RCU_TRACE_RDP(rcupreempt_trace_move2done, rdp);
+		}
+		for (i = GP_STAGES - 2; i >= 0; i--) {
+			if (rdp->waitlist[i] != NULL) {
+				rdp->waitlist[i + 1] = rdp->waitlist[i];
+				rdp->waittail[i + 1] = rdp->waittail[i];
+				wlc++;
+			} else {
+				rdp->waitlist[i + 1] = NULL;
+				rdp->waittail[i + 1] =
+					&rdp->waitlist[i + 1];
+			}
+		}
+		if (rdp->nextlist != NULL) {
+			rdp->waitlist[0] = rdp->nextlist;
+			rdp->waittail[0] = rdp->nexttail;
+			wlc++;
+			rdp->nextlist = NULL;
+			rdp->nexttail = &rdp->nextlist;
+			RCU_TRACE_RDP(rcupreempt_trace_move2wait, rdp);
+		} else {
+			rdp->waitlist[0] = NULL;
+			rdp->waittail[0] = &rdp->waitlist[0];
+		}
+		rdp->waitlistcount = wlc;
+		rdp->completed = rcu_ctrlblk.completed;
+	}
+
+	/*
+	 * Check to see if this CPU needs to report that it has seen
+	 * the most recent counter flip, thereby declaring that all
+	 * subsequent rcu_read_lock() invocations will respect this flip.
+	 */
+
+	cpu = raw_smp_processor_id();
+	if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
+		smp_mb();  /* Subsequent counter accesses must see new value */
+		per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
+		smp_mb();  /* Subsequent RCU read-side critical sections */
+			   /*  seen -after- acknowledgement. */
+	}
+}
+
+/*
+ * Get here when RCU is idle.  Decide whether we need to
+ * move out of idle state, and return non-zero if so.
+ * "Straightforward" approach for the moment, might later
+ * use callback-list lengths, grace-period duration, or
+ * some such to determine when to exit idle state.
+ * Might also need a pre-idle test that does not acquire
+ * the lock, but let's get the simple case working first...
+ */
+
+static int
+rcu_try_flip_idle(void)
+{
+	int cpu;
+
+	RCU_TRACE_ME(rcupreempt_trace_try_flip_i1);
+	if (!rcu_pending(smp_processor_id())) {
+		RCU_TRACE_ME(rcupreempt_trace_try_flip_ie1);
+		return 0;
+	}
+
+	/*
+	 * Do the flip.
+	 */
+
+	RCU_TRACE_ME(rcupreempt_trace_try_flip_g1);
+	rcu_ctrlblk.completed++;  /* stands in for rcu_try_flip_g2 */
+
+	/*
+	 * Need a memory barrier so that other CPUs see the new
+	 * counter value before they see the subsequent change of all
+	 * the rcu_flip_flag instances to rcu_flipped.
+	 */
+
+	smp_mb();	/* see above block comment. */
+
+	/* Now ask each CPU for acknowledgement of the flip. */
+
+	for_each_possible_cpu(cpu)
+		per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
+
+	return 1;
+}
+
+/*
+ * Wait for CPUs to acknowledge the flip.
+ */
+
+static int
+rcu_try_flip_waitack(void)
+{
+	int cpu;
+
+	RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
+	for_each_possible_cpu(cpu)
+		if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
+			RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
+			return 0;
+		}
+
+	/*
+	 * Make sure our checks above don't bleed into subsequent
+	 * waiting for the sum of the counters to reach zero.
+	 */
+
+	smp_mb();	/* see above block comment. */
+	RCU_TRACE_ME(rcupreempt_trace_try_flip_a2);
+	return 1;
+}
+
+/*
+ * Wait for collective ``last'' counter to reach zero,
+ * then tell all CPUs to do an end-of-grace-period memory barrier.
+ */
+
+static int
+rcu_try_flip_waitzero(void)
+{
+	int cpu;
+	int lastidx = !(rcu_ctrlblk.completed & 0x1);
+	int sum = 0;
+
+	/* Check to see if the sum of the "last" counters is zero. */
+
+	RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
+	for_each_possible_cpu(cpu)
+		sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
+	if (sum != 0) {
+		RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
+		return 0;
+	}
+
+	/*
+	 * This ensures that the other CPUs see the call for
+	 * memory barriers -after- the sum to zero has been
+	 * detected here
+	 */
+	smp_mb();  /*  ^^^^^^^^^^^^ */
+
+	/* Call for a memory barrier from each CPU. */
+	for_each_possible_cpu(cpu)
+		per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
+
+	RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
+	return 1;
+}
+
+/*
+ * Wait for all CPUs to do their end-of-grace-period memory barrier.
+ * Return 0 once all CPUs have done so.
+ */
+
+static int
+rcu_try_flip_waitmb(void)
+{
+	int cpu;
+
+	RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
+	for_each_possible_cpu(cpu)
+		if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
+			RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
+			return 0;
+		}
+
+	smp_mb(); /* Ensure that the above checks precede any following flip. */
+	RCU_TRACE_ME(rcupreempt_trace_try_flip_m2);
+	return 1;
+}
+
+/*
+ * Attempt a single flip of the counters.  Remember, a single flip does
+ * -not- constitute a grace period.  Instead, the interval between
+ * at least GP_STAGES consecutive flips is a grace period.
+ *
+ * If anyone is nuts enough to run this CONFIG_PREEMPT_RCU implementation
+ * on a large SMP, they might want to use a hierarchical organization of
+ * the per-CPU-counter pairs.
+ */
+static void rcu_try_flip(void)
+{
+	unsigned long flags;
+
+	RCU_TRACE_ME(rcupreempt_trace_try_flip_1);
+	if (unlikely(!spin_trylock_irqsave(&rcu_ctrlblk.fliplock, flags))) {
+		RCU_TRACE_ME(rcupreempt_trace_try_flip_e1);
+		return;
+	}
+
+	/*
+	 * Take the next transition(s) through the RCU grace-period
+	 * flip-counter state machine.
+	 */
+
+	switch (rcu_ctrlblk.rcu_try_flip_state) {
+	case rcu_try_flip_idle_state:
+		if (rcu_try_flip_idle())
+			rcu_ctrlblk.rcu_try_flip_state =
+				rcu_try_flip_waitack_state;
+		break;
+	case rcu_try_flip_waitack_state:
+		if (rcu_try_flip_waitack())
+			rcu_ctrlblk.rcu_try_flip_state =
+				rcu_try_flip_waitzero_state;
+		break;
+	case rcu_try_flip_waitzero_state:
+		if (rcu_try_flip_waitzero())
+			rcu_ctrlblk.rcu_try_flip_state =
+				rcu_try_flip_waitmb_state;
+		break;
+	case rcu_try_flip_waitmb_state:
+		if (rcu_try_flip_waitmb())
+			rcu_ctrlblk.rcu_try_flip_state =
+				rcu_try_flip_idle_state;
+	}
+	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
+}
+
+/*
+ * Check to see if this CPU needs to do a memory barrier in order to
+ * ensure that any prior RCU read-side critical sections have committed
+ * their counter manipulations and critical-section memory references
+ * before declaring the grace period to be completed.
+ */
+static void rcu_check_mb(int cpu)
+{
+	if (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed) {
+		smp_mb();  /* Ensure RCU read-side accesses are visible. */
+		per_cpu(rcu_mb_flag, cpu) = rcu_mb_done;
+	}
+}
+
+void rcu_check_callbacks(int cpu, int user)
+{
+	unsigned long flags;
+	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
+
+	rcu_check_mb(cpu);
+	if (rcu_ctrlblk.completed == rdp->completed)
+		rcu_try_flip();
+	spin_lock_irqsave(&rdp->lock, flags);
+	RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
+	__rcu_advance_callbacks(rdp);
+	if (rdp->donelist == NULL) {
+		spin_unlock_irqrestore(&rdp->lock, flags);
+	} else {
+		spin_unlock_irqrestore(&rdp->lock, flags);
+		raise_softirq(RCU_SOFTIRQ);
+	}
+}
+
+/*
+ * Needed by dynticks, to make sure all RCU processing has finished
+ * when we go idle:
+ */
+void rcu_advance_callbacks(int cpu, int user)
+{
+	unsigned long flags;
+	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
+
+	if (rcu_ctrlblk.completed == rdp->completed) {
+		rcu_try_flip();
+		if (rcu_ctrlblk.completed == rdp->completed)
+			return;
+	}
+	spin_lock_irqsave(&rdp->lock, flags);
+	RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
+	__rcu_advance_callbacks(rdp);
+	spin_unlock_irqrestore(&rdp->lock, flags);
+}
+
+static void rcu_process_callbacks(struct softirq_action *unused)
+{
+	unsigned long flags;
+	struct rcu_head *next, *list;
+	struct rcu_data *rdp = RCU_DATA_ME();
+
+	spin_lock_irqsave(&rdp->lock, flags);
+	list = rdp->donelist;
+	if (list == NULL) {
+		spin_unlock_irqrestore(&rdp->lock, flags);
+		return;
+	}
+	rdp->donelist = NULL;
+	rdp->donetail = &rdp->donelist;
+	RCU_TRACE_RDP(rcupreempt_trace_done_remove, rdp);
+	spin_unlock_irqrestore(&rdp->lock, flags);
+	while (list) {
+		next = list->next;
+		list->func(list);
+		list = next;
+		RCU_TRACE_ME(rcupreempt_trace_invoke);
+	}
+}
+
+void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+	unsigned long flags;
+	struct rcu_data *rdp;
+
+	head->func = func;
+	head->next = NULL;
+	local_irq_save(flags);
+	rdp = RCU_DATA_ME();
+	spin_lock(&rdp->lock);
+	__rcu_advance_callbacks(rdp);
+	*rdp->nexttail = head;
+	rdp->nexttail = &head->next;
+	RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
+	spin_unlock(&rdp->lock);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(call_rcu);
+
+/*
+ * Wait until all currently running preempt_disable() code segments
+ * (including hardware-irq-disable segments) complete.  Note that
+ * in -rt this does -not- necessarily result in all currently executing
+ * interrupt -handlers- having completed.
+ */
+void __synchronize_sched(void)
+{
+	cpumask_t oldmask;
+	int cpu;
+
+	if (sched_getaffinity(0, &oldmask) < 0)
+		oldmask = cpu_possible_map;
+	for_each_online_cpu(cpu) {
+		sched_setaffinity(0, cpumask_of_cpu(cpu));
+		schedule();
+	}
+	sched_setaffinity(0, oldmask);
+}
+EXPORT_SYMBOL_GPL(__synchronize_sched);
+
+/*
+ * Check to see if any future RCU-related work will need to be done
+ * by the current CPU, even if none need be done immediately, returning
+ * 1 if so.  Assumes that notifiers would take care of handling any
+ * outstanding requests from the RCU core.
+ *
+ * This function is part of the RCU implementation; it is -not-
+ * an exported member of the RCU API.
+ */
+int rcu_needs_cpu(int cpu)
+{
+	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
+
+	return (rdp->donelist != NULL ||
+		!!rdp->waitlistcount ||
+		rdp->nextlist != NULL);
+}
+
+int rcu_pending(int cpu)
+{
+	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
+
+	/* The CPU has at least one callback queued somewhere. */
+
+	if (rdp->donelist != NULL ||
+	    !!rdp->waitlistcount ||
+	    rdp->nextlist != NULL)
+		return 1;
+
+	/* The RCU core needs an acknowledgement from this CPU. */
+
+	if ((per_cpu(rcu_flip_flag, cpu) == rcu_flipped) ||
+	    (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed))
+		return 1;
+
+	/* This CPU has fallen behind the global grace-period number. */
+
+	if (rdp->completed != rcu_ctrlblk.completed)
+		return 1;
+
+	/* Nothing needed from this CPU. */
+
+	return 0;
+}
+
+void __init __rcu_init(void)
+{
+	int cpu;
+	int i;
+	struct rcu_data *rdp;
+
+	printk(KERN_NOTICE "Preemptible RCU implementation.\n");
+	for_each_possible_cpu(cpu) {
+		rdp = RCU_DATA_CPU(cpu);
+		spin_lock_init(&rdp->lock);
+		rdp->completed = 0;
+		rdp->waitlistcount = 0;
+		rdp->nextlist = NULL;
+		rdp->nexttail = &rdp->nextlist;
+		for (i = 0; i < GP_STAGES; i++) {
+			rdp->waitlist[i] = NULL;
+			rdp->waittail[i] = &rdp->waitlist[i];
+		}
+		rdp->donelist = NULL;
+		rdp->donetail = &rdp->donelist;
+		rdp->rcu_flipctr[0] = 0;
+		rdp->rcu_flipctr[1] = 0;
+	}
+	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
+}
+
+/*
+ * Deprecated, use synchronize_rcu() or synchronize_sched() instead.
+ */
+void synchronize_kernel(void)
+{
+	synchronize_rcu();
+}
+
+#ifdef CONFIG_RCU_TRACE
+long *rcupreempt_flipctr(int cpu)
+{
+	return &RCU_DATA_CPU(cpu)->rcu_flipctr[0];
+}
+EXPORT_SYMBOL_GPL(rcupreempt_flipctr);
+
+int rcupreempt_flip_flag(int cpu)
+{
+	return per_cpu(rcu_flip_flag, cpu);
+}
+EXPORT_SYMBOL_GPL(rcupreempt_flip_flag);
+
+int rcupreempt_mb_flag(int cpu)
+{
+	return per_cpu(rcu_mb_flag, cpu);
+}
+EXPORT_SYMBOL_GPL(rcupreempt_mb_flag);
+
+char *rcupreempt_try_flip_state_name(void)
+{
+	return rcu_try_flip_state_names[rcu_ctrlblk.rcu_try_flip_state];
+}
+EXPORT_SYMBOL_GPL(rcupreempt_try_flip_state_name);
+
+struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu)
+{
+	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
+
+	return &rdp->trace;
+}
+EXPORT_SYMBOL_GPL(rcupreempt_trace_cpu);
+
+#endif /* #ifdef RCU_TRACE */
+/*
+ * Read-Copy Update tracing for realtime implementation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2006
+ *
+ * Papers:  http://www.rdrop.com/users/paulmck/RCU
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 		Documentation/RCU/ *.txt
+ *
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/rcupdate.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <asm/atomic.h>
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/completion.h>
+#include <linux/moduleparam.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/cpu.h>
+#include <linux/mutex.h>
+#include <linux/rcupreempt_trace.h>
+#include <linux/debugfs.h>
+
+static struct mutex rcupreempt_trace_mutex;
+static char *rcupreempt_trace_buf;
+#define RCUPREEMPT_TRACE_BUF_SIZE 4096
+
+void rcupreempt_trace_move2done(struct rcupreempt_trace *trace)
+{
+	trace->done_length += trace->wait_length;
+	trace->done_add += trace->wait_length;
+	trace->wait_length = 0;
+}
+void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace)
+{
+	trace->wait_length += trace->next_length;
+	trace->wait_add += trace->next_length;
+	trace->next_length = 0;
+}
+void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace)
+{
+	atomic_inc(&trace->rcu_try_flip_1);
+}
+void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace)
+{
+	atomic_inc(&trace->rcu_try_flip_e1);
+}
+void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace)
+{
+	trace->rcu_try_flip_i1++;
+}
+void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace)
+{
+	trace->rcu_try_flip_ie1++;
+}
+void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace)
+{
+	trace->rcu_try_flip_g1++;
+}
+void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace)
+{
+	trace->rcu_try_flip_a1++;
+}
+void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace)
+{
+	trace->rcu_try_flip_ae1++;
+}
+void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace)
+{
+	trace->rcu_try_flip_a2++;
+}
+void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace)
+{
+	trace->rcu_try_flip_z1++;
+}
+void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace)
+{
+	trace->rcu_try_flip_ze1++;
+}
+void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace)
+{
+	trace->rcu_try_flip_z2++;
+}
+void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace)
+{
+	trace->rcu_try_flip_m1++;
+}
+void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace)
+{
+	trace->rcu_try_flip_me1++;
+}
+void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace)
+{
+	trace->rcu_try_flip_m2++;
+}
+void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace)
+{
+	trace->rcu_check_callbacks++;
+}
+void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace)
+{
+	trace->done_remove += trace->done_length;
+	trace->done_length = 0;
+}
+void rcupreempt_trace_invoke(struct rcupreempt_trace *trace)
+{
+	atomic_inc(&trace->done_invoked);
+}
+void rcupreempt_trace_next_add(struct rcupreempt_trace *trace)
+{
+	trace->next_add++;
+	trace->next_length++;
+}
+
+static void rcupreempt_trace_sum(struct rcupreempt_trace *sp)
+{
+	struct rcupreempt_trace *cp;
+	int cpu;
+
+	memset(sp, 0, sizeof(*sp));
+	for_each_possible_cpu(cpu) {
+		cp = rcupreempt_trace_cpu(cpu);
+		sp->next_length += cp->next_length;
+		sp->next_add += cp->next_add;
+		sp->wait_length += cp->wait_length;
+		sp->wait_add += cp->wait_add;
+		sp->done_length += cp->done_length;
+		sp->done_add += cp->done_add;
+		sp->done_remove += cp->done_remove;
+		atomic_set(&sp->done_invoked, atomic_read(&cp->done_invoked));
+		sp->rcu_check_callbacks += cp->rcu_check_callbacks;
+		atomic_set(&sp->rcu_try_flip_1,
+			   atomic_read(&cp->rcu_try_flip_1));
+		atomic_set(&sp->rcu_try_flip_e1,
+			   atomic_read(&cp->rcu_try_flip_e1));
+		sp->rcu_try_flip_i1 += cp->rcu_try_flip_i1;
+		sp->rcu_try_flip_ie1 += cp->rcu_try_flip_ie1;
+		sp->rcu_try_flip_g1 += cp->rcu_try_flip_g1;
+		sp->rcu_try_flip_a1 += cp->rcu_try_flip_a1;
+		sp->rcu_try_flip_ae1 += cp->rcu_try_flip_ae1;
+		sp->rcu_try_flip_a2 += cp->rcu_try_flip_a2;
+		sp->rcu_try_flip_z1 += cp->rcu_try_flip_z1;
+		sp->rcu_try_flip_ze1 += cp->rcu_try_flip_ze1;
+		sp->rcu_try_flip_z2 += cp->rcu_try_flip_z2;
+		sp->rcu_try_flip_m1 += cp->rcu_try_flip_m1;
+		sp->rcu_try_flip_me1 += cp->rcu_try_flip_me1;
+		sp->rcu_try_flip_m2 += cp->rcu_try_flip_m2;
+	}
+}
+
+static ssize_t rcustats_read(struct file *filp, char __user *buffer,
+				size_t count, loff_t *ppos)
+{
+	struct rcupreempt_trace trace;
+	ssize_t bcount;
+	int cnt = 0;
+
+	rcupreempt_trace_sum(&trace);
+	mutex_lock(&rcupreempt_trace_mutex);
+	snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt,
+		 "ggp=%ld rcc=%ld\n",
+		 rcu_batches_completed(),
+		 trace.rcu_check_callbacks);
+	snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt,
+		 "na=%ld nl=%ld wa=%ld wl=%ld da=%ld dl=%ld dr=%ld di=%d\n"
+		 "1=%d e1=%d i1=%ld ie1=%ld g1=%ld a1=%ld ae1=%ld a2=%ld\n"
+		 "z1=%ld ze1=%ld z2=%ld m1=%ld me1=%ld m2=%ld\n",
+
+		 trace.next_add, trace.next_length,
+		 trace.wait_add, trace.wait_length,
+		 trace.done_add, trace.done_length,
+		 trace.done_remove, atomic_read(&trace.done_invoked),
+		 atomic_read(&trace.rcu_try_flip_1),
+		 atomic_read(&trace.rcu_try_flip_e1),
+		 trace.rcu_try_flip_i1, trace.rcu_try_flip_ie1,
+		 trace.rcu_try_flip_g1,
+		 trace.rcu_try_flip_a1, trace.rcu_try_flip_ae1,
+			 trace.rcu_try_flip_a2,
+		 trace.rcu_try_flip_z1, trace.rcu_try_flip_ze1,
+			 trace.rcu_try_flip_z2,
+		 trace.rcu_try_flip_m1, trace.rcu_try_flip_me1,
+			trace.rcu_try_flip_m2);
+	bcount = simple_read_from_buffer(buffer, count, ppos,
+			rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
+	mutex_unlock(&rcupreempt_trace_mutex);
+	return bcount;
+}
+
+static ssize_t rcugp_read(struct file *filp, char __user *buffer,
+				size_t count, loff_t *ppos)
+{
+	long oldgp = rcu_batches_completed();
+	ssize_t bcount;
+
+	mutex_lock(&rcupreempt_trace_mutex);
+	synchronize_rcu();
+	snprintf(rcupreempt_trace_buf, RCUPREEMPT_TRACE_BUF_SIZE,
+		"oldggp=%ld  newggp=%ld\n", oldgp, rcu_batches_completed());
+	bcount = simple_read_from_buffer(buffer, count, ppos,
+			rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
+	mutex_unlock(&rcupreempt_trace_mutex);
+	return bcount;
+}
+
+static ssize_t rcuctrs_read(struct file *filp, char __user *buffer,
+				size_t count, loff_t *ppos)
+{
+	int cnt = 0;
+	int cpu;
+	int f = rcu_batches_completed() & 0x1;
+	ssize_t bcount;
+
+	mutex_lock(&rcupreempt_trace_mutex);
+
+	cnt += snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE,
+				"CPU last cur F M\n");
+	for_each_online_cpu(cpu) {
+		long *flipctr = rcupreempt_flipctr(cpu);
+		cnt += snprintf(&rcupreempt_trace_buf[cnt],
+				RCUPREEMPT_TRACE_BUF_SIZE - cnt,
+					"%3d %4ld %3ld %d %d\n",
+			       cpu,
+			       flipctr[!f],
+			       flipctr[f],
+			       rcupreempt_flip_flag(cpu),
+			       rcupreempt_mb_flag(cpu));
+	}
+	cnt += snprintf(&rcupreempt_trace_buf[cnt],
+			RCUPREEMPT_TRACE_BUF_SIZE - cnt,
+			"ggp = %ld, state = %s\n",
+			rcu_batches_completed(),
+			rcupreempt_try_flip_state_name());
+	cnt += snprintf(&rcupreempt_trace_buf[cnt],
+			RCUPREEMPT_TRACE_BUF_SIZE - cnt,
+			"\n");
+	bcount = simple_read_from_buffer(buffer, count, ppos,
+			rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
+	mutex_unlock(&rcupreempt_trace_mutex);
+	return bcount;
+}
+
+static struct file_operations rcustats_fops = {
+	.owner = THIS_MODULE,
+	.read = rcustats_read,
+};
+
+static struct file_operations rcugp_fops = {
+	.owner = THIS_MODULE,
+	.read = rcugp_read,
+};
+
+static struct file_operations rcuctrs_fops = {
+	.owner = THIS_MODULE,
+	.read = rcuctrs_read,
+};
+
+static struct dentry *rcudir, *statdir, *ctrsdir, *gpdir;
+static int rcupreempt_debugfs_init(void)
+{
+	rcudir = debugfs_create_dir("rcu", NULL);
+	if (!rcudir)
+		goto out;
+	statdir = debugfs_create_file("rcustats", 0444, rcudir,
+						NULL, &rcustats_fops);
+	if (!statdir)
+		goto free_out;
+
+	gpdir = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops);
+	if (!gpdir)
+		goto free_out;
+
+	ctrsdir = debugfs_create_file("rcuctrs", 0444, rcudir,
+						NULL, &rcuctrs_fops);
+	if (!ctrsdir)
+		goto free_out;
+	return 0;
+free_out:
+	if (statdir)
+		debugfs_remove(statdir);
+	if (gpdir)
+		debugfs_remove(gpdir);
+	debugfs_remove(rcudir);
+out:
+	return 1;
+}
+
+static int __init rcupreempt_trace_init(void)
+{
+	mutex_init(&rcupreempt_trace_mutex);
+	rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL);
+	if (!rcupreempt_trace_buf)
+		return 1;
+	return rcupreempt_debugfs_init();
+}
+
+static void __exit rcupreempt_trace_cleanup(void)
+{
+	debugfs_remove(statdir);
+	debugfs_remove(gpdir);
+	debugfs_remove(ctrsdir);
+	debugfs_remove(rcudir);
+	kfree(rcupreempt_trace_buf);
+}
+
+
+module_init(rcupreempt_trace_init);
+module_exit(rcupreempt_trace_cleanup);
...	...	@@ -157,6 +157,9 @@
157	157	extern void rcu_check_callbacks(int cpu, int user);
158	158	extern void rcu_restart_cpu(int cpu);
159	159
	160	+extern long rcu_batches_completed(void);
	161	+extern long rcu_batches_completed_bh(void);
	162	+
160	163	#endif /* __KERNEL__ */
161	164	#endif /* __LINUX_RCUCLASSIC_H */
...	...	@@ -53,7 +53,11 @@
53	53	void (func)(struct rcu_head head);
54	54	};
55	55
	56	+#ifdef CONFIG_CLASSIC_RCU
56	57	#include <linux/rcuclassic.h>
	58	+#else /* #ifdef CONFIG_CLASSIC_RCU */
	59	+#include <linux/rcupreempt.h>
	60	+#endif /* #else #ifdef CONFIG_CLASSIC_RCU */
57	61
58	62	#define RCU_HEAD_INIT { .next = NULL, .func = NULL }
59	63	#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT
60	64
...	...	@@ -231,13 +235,12 @@
231	235	/* Exported common interfaces */
232	236	extern void synchronize_rcu(void);
233	237	extern void rcu_barrier(void);
	238	+extern long rcu_batches_completed(void);
	239	+extern long rcu_batches_completed_bh(void);
234	240
235	241	/* Internal to kernel */
236	242	extern void rcu_init(void);
237		-extern void rcu_check_callbacks(int cpu, int user);
238		-
239		-extern long rcu_batches_completed(void);
240		-extern long rcu_batches_completed_bh(void);
	243	+extern int rcu_needs_cpu(int cpu);
241	244
242	245	#endif /* __KERNEL__ */
243	246	#endif /* __LINUX_RCUPDATE_H */
	1	+/*
	2	+ * Read-Copy Update mechanism for mutual exclusion (RT implementation)
	3	+ *
	4	+ * This program is free software; you can redistribute it and/or modify
	5	+ * it under the terms of the GNU General Public License as published by
	6	+ * the Free Software Foundation; either version 2 of the License, or
	7	+ * (at your option) any later version.
	8	+ *
	9	+ * This program is distributed in the hope that it will be useful,
	10	+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	+ * GNU General Public License for more details.
	13	+ *
	14	+ * You should have received a copy of the GNU General Public License
	15	+ * along with this program; if not, write to the Free Software
	16	+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
	17	+ *
	18	+ * Copyright (C) IBM Corporation, 2006
	19	+ *
	20	+ * Author: Paul McKenney <paulmck@us.ibm.com>
	21	+ *
	22	+ * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
	23	+ * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
	24	+ * Papers:
	25	+ * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
	26	+ * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
	27	+ *
	28	+ * For detailed explanation of Read-Copy Update mechanism see -
	29	+ * Documentation/RCU
	30	+ *
	31	+ */
	32	+
	33	+#ifndef __LINUX_RCUPREEMPT_H
	34	+#define __LINUX_RCUPREEMPT_H
	35	+
	36	+#ifdef __KERNEL__
	37	+
	38	+#include <linux/cache.h>
	39	+#include <linux/spinlock.h>
	40	+#include <linux/threads.h>
	41	+#include <linux/percpu.h>
	42	+#include <linux/cpumask.h>
	43	+#include <linux/seqlock.h>
	44	+
	45	+#define rcu_qsctr_inc(cpu)
	46	+#define rcu_bh_qsctr_inc(cpu)
	47	+#define call_rcu_bh(head, rcu) call_rcu(head, rcu)
	48	+
	49	+extern void __rcu_read_lock(void);
	50	+extern void __rcu_read_unlock(void);
	51	+extern int rcu_pending(int cpu);
	52	+extern int rcu_needs_cpu(int cpu);
	53	+
	54	+#define __rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); }
	55	+#define __rcu_read_unlock_bh() { local_bh_enable(); rcu_read_unlock(); }
	56	+
	57	+extern void __synchronize_sched(void);
	58	+
	59	+extern void __rcu_init(void);
	60	+extern void rcu_check_callbacks(int cpu, int user);
	61	+extern void rcu_restart_cpu(int cpu);
	62	+extern long rcu_batches_completed(void);
	63	+
	64	+/*
	65	+ * Return the number of RCU batches processed thus far. Useful for debug
	66	+ * and statistic. The _bh variant is identifcal to straight RCU
	67	+ */
	68	+static inline long rcu_batches_completed_bh(void)
	69	+{
	70	+ return rcu_batches_completed();
	71	+}
	72	+
	73	+#ifdef CONFIG_RCU_TRACE
	74	+struct rcupreempt_trace;
	75	+extern long *rcupreempt_flipctr(int cpu);
	76	+extern long rcupreempt_data_completed(void);
	77	+extern int rcupreempt_flip_flag(int cpu);
	78	+extern int rcupreempt_mb_flag(int cpu);
	79	+extern char *rcupreempt_try_flip_state_name(void);
	80	+extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu);
	81	+#endif
	82	+
	83	+struct softirq_action;
	84	+
	85	+#endif /* __KERNEL__ */
	86	+#endif /* __LINUX_RCUPREEMPT_H */
...	...	@@ -974,6 +974,11 @@
974	974	int nr_cpus_allowed;
975	975	unsigned int time_slice;
976	976
	977	+#ifdef CONFIG_PREEMPT_RCU
	978	+ int rcu_read_lock_nesting;
	979	+ int rcu_flipctr_idx;
	980	+#endif /* #ifdef CONFIG_PREEMPT_RCU */
	981	+
977	982	#if defined(CONFIG_SCHEDSTATS) \|\| defined(CONFIG_TASK_DELAY_ACCT)
978	983	struct sched_info sched_info;
979	984	#endif
...	...	@@ -763,4 +763,32 @@
763	763
764	764	config PREEMPT_NOTIFIERS
765	765	bool
	766	+
	767	+choice
	768	+ prompt "RCU implementation type:"
	769	+ default CLASSIC_RCU
	770	+
	771	+config CLASSIC_RCU
	772	+ bool "Classic RCU"
	773	+ help
	774	+ This option selects the classic RCU implementation that is
	775	+ designed for best read-side performance on non-realtime
	776	+ systems.
	777	+
	778	+ Say Y if you are unsure.
	779	+
	780	+config PREEMPT_RCU
	781	+ bool "Preemptible RCU"
	782	+ depends on PREEMPT
	783	+ help
	784	+ This option reduces the latency of the kernel by making certain
	785	+ RCU sections preemptible. Normally RCU code is non-preemptible, if
	786	+ this option is selected then read-only RCU sections become
	787	+ preemptible. This helps latency, but may expose bugs due to
	788	+ now-naive assumptions about each RCU read-side critical section
	789	+ remaining on a given CPU through its execution.
	790	+
	791	+ Say N if you are unsure.
	792	+
	793	+endchoice
...	...	@@ -62,4 +62,15 @@
62	62
63	63	Say Y here if you are building a kernel for a desktop system.
64	64	Say N if you are unsure.
	65	+
	66	+config RCU_TRACE
	67	+ bool "Enable tracing for RCU - currently stats in debugfs"
	68	+ select DEBUG_FS
	69	+ default y
	70	+ help
	71	+ This option provides tracing in RCU which presents stats
	72	+ in debugfs for debugging RCU implementation.
	73	+
	74	+ Say Y here if you want to enable RCU tracing
	75	+ Say N if you are unsure.
...	...	@@ -6,7 +6,7 @@
6	6	exit.o itimer.o time.o softirq.o resource.o \
7	7	sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \
8	8	signal.o sys.o kmod.o workqueue.o pid.o \
9		- rcupdate.o rcuclassic.o extable.o params.o posix-timers.o \
	9	+ rcupdate.o extable.o params.o posix-timers.o \
10	10	kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11	11	hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \
12	12	utsname.o notifier.o
...	...	@@ -52,6 +52,11 @@
52	52	obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
53	53	obj-$(CONFIG_SECCOMP) += seccomp.o
54	54	obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
	55	+obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o
	56	+obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o
	57	+ifeq ($(CONFIG_PREEMPT_RCU),y)
	58	+obj-$(CONFIG_RCU_TRACE) += rcupreempt_trace.o
	59	+endif
55	60	obj-$(CONFIG_RELAY) += relay.o
56	61	obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
57	62	obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
...	...	@@ -1045,6 +1045,10 @@
1045	1045	copy_flags(clone_flags, p);
1046	1046	INIT_LIST_HEAD(&p->children);
1047	1047	INIT_LIST_HEAD(&p->sibling);
	1048	+#ifdef CONFIG_PREEMPT_RCU
	1049	+ p->rcu_read_lock_nesting = 0;
	1050	+ p->rcu_flipctr_idx = 0;
	1051	+#endif /* #ifdef CONFIG_PREEMPT_RCU */
1048	1052	p->vfork_done = NULL;
1049	1053	spin_lock_init(&p->alloc_lock);
1050	1054
...	...	@@ -45,7 +45,6 @@
45	45	#include <linux/moduleparam.h>
46	46	#include <linux/percpu.h>
47	47	#include <linux/notifier.h>
48		-/* #include <linux/rcupdate.h> @@@ */
49	48	#include <linux/cpu.h>
50	49	#include <linux/mutex.h>
51	50
	1	+/*
	2	+ * Read-Copy Update tracing for realtime implementation
	3	+ *
	4	+ * This program is free software; you can redistribute it and/or modify
	5	+ * it under the terms of the GNU General Public License as published by
	6	+ * the Free Software Foundation; either version 2 of the License, or
	7	+ * (at your option) any later version.
	8	+ *
	9	+ * This program is distributed in the hope that it will be useful,
	10	+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	+ * GNU General Public License for more details.
	13	+ *
	14	+ * You should have received a copy of the GNU General Public License
	15	+ * along with this program; if not, write to the Free Software
	16	+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
	17	+ *
	18	+ * Copyright IBM Corporation, 2006
	19	+ *
	20	+ * Papers: http://www.rdrop.com/users/paulmck/RCU
	21	+ *
	22	+ * For detailed explanation of Read-Copy Update mechanism see -
	23	+ * Documentation/RCU/ *.txt
	24	+ *
	25	+ */
	26	+#include <linux/types.h>
	27	+#include <linux/kernel.h>
	28	+#include <linux/init.h>
	29	+#include <linux/spinlock.h>
	30	+#include <linux/smp.h>
	31	+#include <linux/rcupdate.h>
	32	+#include <linux/interrupt.h>
	33	+#include <linux/sched.h>
	34	+#include <asm/atomic.h>
	35	+#include <linux/bitops.h>
	36	+#include <linux/module.h>
	37	+#include <linux/completion.h>
	38	+#include <linux/moduleparam.h>
	39	+#include <linux/percpu.h>
	40	+#include <linux/notifier.h>
	41	+#include <linux/rcupdate.h>
	42	+#include <linux/cpu.h>
	43	+#include <linux/mutex.h>
	44	+#include <linux/rcupreempt_trace.h>
	45	+#include <linux/debugfs.h>
	46	+
	47	+static struct mutex rcupreempt_trace_mutex;
	48	+static char *rcupreempt_trace_buf;
	49	+#define RCUPREEMPT_TRACE_BUF_SIZE 4096
	50	+
	51	+void rcupreempt_trace_move2done(struct rcupreempt_trace *trace)
	52	+{
	53	+ trace->done_length += trace->wait_length;
	54	+ trace->done_add += trace->wait_length;
	55	+ trace->wait_length = 0;
	56	+}
	57	+void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace)
	58	+{
	59	+ trace->wait_length += trace->next_length;
	60	+ trace->wait_add += trace->next_length;
	61	+ trace->next_length = 0;
	62	+}
	63	+void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace)
	64	+{
	65	+ atomic_inc(&trace->rcu_try_flip_1);
	66	+}
	67	+void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace)
	68	+{
	69	+ atomic_inc(&trace->rcu_try_flip_e1);
	70	+}
	71	+void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace)
	72	+{
	73	+ trace->rcu_try_flip_i1++;
	74	+}
	75	+void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace)
	76	+{
	77	+ trace->rcu_try_flip_ie1++;
	78	+}
	79	+void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace)
	80	+{
	81	+ trace->rcu_try_flip_g1++;
	82	+}
	83	+void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace)
	84	+{
	85	+ trace->rcu_try_flip_a1++;
	86	+}
	87	+void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace)
	88	+{
	89	+ trace->rcu_try_flip_ae1++;
	90	+}
	91	+void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace)
	92	+{
	93	+ trace->rcu_try_flip_a2++;
	94	+}
	95	+void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace)
	96	+{
	97	+ trace->rcu_try_flip_z1++;
	98	+}
	99	+void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace)
	100	+{
	101	+ trace->rcu_try_flip_ze1++;
	102	+}
	103	+void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace)
	104	+{
	105	+ trace->rcu_try_flip_z2++;
	106	+}
	107	+void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace)
	108	+{
	109	+ trace->rcu_try_flip_m1++;
	110	+}
	111	+void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace)
	112	+{
	113	+ trace->rcu_try_flip_me1++;
	114	+}
	115	+void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace)
	116	+{
	117	+ trace->rcu_try_flip_m2++;
	118	+}
	119	+void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace)
	120	+{
	121	+ trace->rcu_check_callbacks++;
	122	+}
	123	+void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace)
	124	+{
	125	+ trace->done_remove += trace->done_length;
	126	+ trace->done_length = 0;
	127	+}
	128	+void rcupreempt_trace_invoke(struct rcupreempt_trace *trace)
	129	+{
	130	+ atomic_inc(&trace->done_invoked);
	131	+}
	132	+void rcupreempt_trace_next_add(struct rcupreempt_trace *trace)
	133	+{
	134	+ trace->next_add++;
	135	+ trace->next_length++;
	136	+}
	137	+
	138	+static void rcupreempt_trace_sum(struct rcupreempt_trace *sp)
	139	+{
	140	+ struct rcupreempt_trace *cp;
	141	+ int cpu;
	142	+
	143	+ memset(sp, 0, sizeof(*sp));
	144	+ for_each_possible_cpu(cpu) {
	145	+ cp = rcupreempt_trace_cpu(cpu);
	146	+ sp->next_length += cp->next_length;
	147	+ sp->next_add += cp->next_add;
	148	+ sp->wait_length += cp->wait_length;
	149	+ sp->wait_add += cp->wait_add;
	150	+ sp->done_length += cp->done_length;
	151	+ sp->done_add += cp->done_add;
	152	+ sp->done_remove += cp->done_remove;
	153	+ atomic_set(&sp->done_invoked, atomic_read(&cp->done_invoked));
	154	+ sp->rcu_check_callbacks += cp->rcu_check_callbacks;
	155	+ atomic_set(&sp->rcu_try_flip_1,
	156	+ atomic_read(&cp->rcu_try_flip_1));
	157	+ atomic_set(&sp->rcu_try_flip_e1,
	158	+ atomic_read(&cp->rcu_try_flip_e1));
	159	+ sp->rcu_try_flip_i1 += cp->rcu_try_flip_i1;
	160	+ sp->rcu_try_flip_ie1 += cp->rcu_try_flip_ie1;
	161	+ sp->rcu_try_flip_g1 += cp->rcu_try_flip_g1;
	162	+ sp->rcu_try_flip_a1 += cp->rcu_try_flip_a1;
	163	+ sp->rcu_try_flip_ae1 += cp->rcu_try_flip_ae1;
	164	+ sp->rcu_try_flip_a2 += cp->rcu_try_flip_a2;
	165	+ sp->rcu_try_flip_z1 += cp->rcu_try_flip_z1;
	166	+ sp->rcu_try_flip_ze1 += cp->rcu_try_flip_ze1;
	167	+ sp->rcu_try_flip_z2 += cp->rcu_try_flip_z2;
	168	+ sp->rcu_try_flip_m1 += cp->rcu_try_flip_m1;
	169	+ sp->rcu_try_flip_me1 += cp->rcu_try_flip_me1;
	170	+ sp->rcu_try_flip_m2 += cp->rcu_try_flip_m2;
	171	+ }
	172	+}
	173	+
	174	+static ssize_t rcustats_read(struct file filp, char __user buffer,
	175	+ size_t count, loff_t *ppos)
	176	+{
	177	+ struct rcupreempt_trace trace;
	178	+ ssize_t bcount;
	179	+ int cnt = 0;
	180	+
	181	+ rcupreempt_trace_sum(&trace);
	182	+ mutex_lock(&rcupreempt_trace_mutex);
	183	+ snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt,
	184	+ "ggp=%ld rcc=%ld\n",
	185	+ rcu_batches_completed(),
	186	+ trace.rcu_check_callbacks);
	187	+ snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt,
	188	+ "na=%ld nl=%ld wa=%ld wl=%ld da=%ld dl=%ld dr=%ld di=%d\n"
	189	+ "1=%d e1=%d i1=%ld ie1=%ld g1=%ld a1=%ld ae1=%ld a2=%ld\n"
	190	+ "z1=%ld ze1=%ld z2=%ld m1=%ld me1=%ld m2=%ld\n",
	191	+
	192	+ trace.next_add, trace.next_length,
	193	+ trace.wait_add, trace.wait_length,
	194	+ trace.done_add, trace.done_length,
	195	+ trace.done_remove, atomic_read(&trace.done_invoked),
	196	+ atomic_read(&trace.rcu_try_flip_1),
	197	+ atomic_read(&trace.rcu_try_flip_e1),
	198	+ trace.rcu_try_flip_i1, trace.rcu_try_flip_ie1,
	199	+ trace.rcu_try_flip_g1,
	200	+ trace.rcu_try_flip_a1, trace.rcu_try_flip_ae1,
	201	+ trace.rcu_try_flip_a2,
	202	+ trace.rcu_try_flip_z1, trace.rcu_try_flip_ze1,
	203	+ trace.rcu_try_flip_z2,
	204	+ trace.rcu_try_flip_m1, trace.rcu_try_flip_me1,
	205	+ trace.rcu_try_flip_m2);
	206	+ bcount = simple_read_from_buffer(buffer, count, ppos,
	207	+ rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
	208	+ mutex_unlock(&rcupreempt_trace_mutex);
	209	+ return bcount;
	210	+}
	211	+
	212	+static ssize_t rcugp_read(struct file filp, char __user buffer,
	213	+ size_t count, loff_t *ppos)
	214	+{
	215	+ long oldgp = rcu_batches_completed();
	216	+ ssize_t bcount;
	217	+
	218	+ mutex_lock(&rcupreempt_trace_mutex);
	219	+ synchronize_rcu();
	220	+ snprintf(rcupreempt_trace_buf, RCUPREEMPT_TRACE_BUF_SIZE,
	221	+ "oldggp=%ld newggp=%ld\n", oldgp, rcu_batches_completed());
	222	+ bcount = simple_read_from_buffer(buffer, count, ppos,
	223	+ rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
	224	+ mutex_unlock(&rcupreempt_trace_mutex);
	225	+ return bcount;
	226	+}
	227	+
	228	+static ssize_t rcuctrs_read(struct file filp, char __user buffer,
	229	+ size_t count, loff_t *ppos)
	230	+{
	231	+ int cnt = 0;
	232	+ int cpu;
	233	+ int f = rcu_batches_completed() & 0x1;
	234	+ ssize_t bcount;
	235	+
	236	+ mutex_lock(&rcupreempt_trace_mutex);
	237	+
	238	+ cnt += snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE,
	239	+ "CPU last cur F M\n");
	240	+ for_each_online_cpu(cpu) {
	241	+ long *flipctr = rcupreempt_flipctr(cpu);
	242	+ cnt += snprintf(&rcupreempt_trace_buf[cnt],
	243	+ RCUPREEMPT_TRACE_BUF_SIZE - cnt,
	244	+ "%3d %4ld %3ld %d %d\n",
	245	+ cpu,
	246	+ flipctr[!f],
	247	+ flipctr[f],
	248	+ rcupreempt_flip_flag(cpu),
	249	+ rcupreempt_mb_flag(cpu));
	250	+ }
	251	+ cnt += snprintf(&rcupreempt_trace_buf[cnt],
	252	+ RCUPREEMPT_TRACE_BUF_SIZE - cnt,
	253	+ "ggp = %ld, state = %s\n",
	254	+ rcu_batches_completed(),
	255	+ rcupreempt_try_flip_state_name());
	256	+ cnt += snprintf(&rcupreempt_trace_buf[cnt],
	257	+ RCUPREEMPT_TRACE_BUF_SIZE - cnt,
	258	+ "\n");
	259	+ bcount = simple_read_from_buffer(buffer, count, ppos,
	260	+ rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
	261	+ mutex_unlock(&rcupreempt_trace_mutex);
	262	+ return bcount;
	263	+}
	264	+
	265	+static struct file_operations rcustats_fops = {
	266	+ .owner = THIS_MODULE,
	267	+ .read = rcustats_read,
	268	+};
	269	+
	270	+static struct file_operations rcugp_fops = {
	271	+ .owner = THIS_MODULE,
	272	+ .read = rcugp_read,
	273	+};
	274	+
	275	+static struct file_operations rcuctrs_fops = {
	276	+ .owner = THIS_MODULE,
	277	+ .read = rcuctrs_read,
	278	+};
	279	+
	280	+static struct dentry rcudir, statdir, ctrsdir, gpdir;
	281	+static int rcupreempt_debugfs_init(void)
	282	+{
	283	+ rcudir = debugfs_create_dir("rcu", NULL);
	284	+ if (!rcudir)
	285	+ goto out;
	286	+ statdir = debugfs_create_file("rcustats", 0444, rcudir,
	287	+ NULL, &rcustats_fops);
	288	+ if (!statdir)
	289	+ goto free_out;
	290	+
	291	+ gpdir = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops);
	292	+ if (!gpdir)
	293	+ goto free_out;
	294	+
	295	+ ctrsdir = debugfs_create_file("rcuctrs", 0444, rcudir,
	296	+ NULL, &rcuctrs_fops);
	297	+ if (!ctrsdir)
	298	+ goto free_out;
	299	+ return 0;
	300	+free_out:
	301	+ if (statdir)
	302	+ debugfs_remove(statdir);
	303	+ if (gpdir)
	304	+ debugfs_remove(gpdir);
	305	+ debugfs_remove(rcudir);
	306	+out:
	307	+ return 1;
	308	+}
	309	+
	310	+static int __init rcupreempt_trace_init(void)
	311	+{
	312	+ mutex_init(&rcupreempt_trace_mutex);
	313	+ rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL);
	314	+ if (!rcupreempt_trace_buf)
	315	+ return 1;
	316	+ return rcupreempt_debugfs_init();
	317	+}
	318	+
	319	+static void __exit rcupreempt_trace_cleanup(void)
	320	+{
	321	+ debugfs_remove(statdir);
	322	+ debugfs_remove(gpdir);
	323	+ debugfs_remove(ctrsdir);
	324	+ debugfs_remove(rcudir);
	325	+ kfree(rcupreempt_trace_buf);
	326	+}
	327	+
	328	+
	329	+module_init(rcupreempt_trace_init);
	330	+module_exit(rcupreempt_trace_cleanup);