Commit e260be673a15b6125068270e0216a3bfbfc12f87

Authored by Paul E. McKenney
Committed by Ingo Molnar
1 parent e0ecfa7917

Preempt-RCU: implementation

This patch implements a new version of RCU which allows its read-side
critical sections to be preempted. It uses a set of counter pairs
to keep track of the read-side critical sections and flips them
when all tasks exit read-side critical section. The details
of this implementation can be found in this paper -

	http://www.rdrop.com/users/paulmck/RCU/OLSrtRCU.2006.08.11a.pdf

and the article-

	http://lwn.net/Articles/253651/

This patch was developed as a part of the -rt kernel development and
meant to provide better latencies when read-side critical sections of
RCU don't disable preemption.  As a consequence of keeping track of RCU
readers, the readers have a slight overhead (optimizations in the paper).
This implementation co-exists with the "classic" RCU implementations
and can be switched to at compiler.

Also includes RCU tracing summarized in debugfs.

[ akpm@linux-foundation.org: build fixes on non-preempt architectures ]

Signed-off-by: Gautham R Shenoy <ego@in.ibm.com>
Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: Paul E. McKenney <paulmck@us.ibm.com>
Reviewed-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

Showing 12 changed files with 1394 additions and 7 deletions Side-by-side Diff

include/linux/rcuclassic.h
... ... @@ -157,6 +157,9 @@
157 157 extern void rcu_check_callbacks(int cpu, int user);
158 158 extern void rcu_restart_cpu(int cpu);
159 159  
  160 +extern long rcu_batches_completed(void);
  161 +extern long rcu_batches_completed_bh(void);
  162 +
160 163 #endif /* __KERNEL__ */
161 164 #endif /* __LINUX_RCUCLASSIC_H */
include/linux/rcupdate.h
... ... @@ -53,7 +53,11 @@
53 53 void (*func)(struct rcu_head *head);
54 54 };
55 55  
  56 +#ifdef CONFIG_CLASSIC_RCU
56 57 #include <linux/rcuclassic.h>
  58 +#else /* #ifdef CONFIG_CLASSIC_RCU */
  59 +#include <linux/rcupreempt.h>
  60 +#endif /* #else #ifdef CONFIG_CLASSIC_RCU */
57 61  
58 62 #define RCU_HEAD_INIT { .next = NULL, .func = NULL }
59 63 #define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT
60 64  
... ... @@ -231,13 +235,12 @@
231 235 /* Exported common interfaces */
232 236 extern void synchronize_rcu(void);
233 237 extern void rcu_barrier(void);
  238 +extern long rcu_batches_completed(void);
  239 +extern long rcu_batches_completed_bh(void);
234 240  
235 241 /* Internal to kernel */
236 242 extern void rcu_init(void);
237   -extern void rcu_check_callbacks(int cpu, int user);
238   -
239   -extern long rcu_batches_completed(void);
240   -extern long rcu_batches_completed_bh(void);
  243 +extern int rcu_needs_cpu(int cpu);
241 244  
242 245 #endif /* __KERNEL__ */
243 246 #endif /* __LINUX_RCUPDATE_H */
include/linux/rcupreempt.h
  1 +/*
  2 + * Read-Copy Update mechanism for mutual exclusion (RT implementation)
  3 + *
  4 + * This program is free software; you can redistribute it and/or modify
  5 + * it under the terms of the GNU General Public License as published by
  6 + * the Free Software Foundation; either version 2 of the License, or
  7 + * (at your option) any later version.
  8 + *
  9 + * This program is distributed in the hope that it will be useful,
  10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12 + * GNU General Public License for more details.
  13 + *
  14 + * You should have received a copy of the GNU General Public License
  15 + * along with this program; if not, write to the Free Software
  16 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17 + *
  18 + * Copyright (C) IBM Corporation, 2006
  19 + *
  20 + * Author: Paul McKenney <paulmck@us.ibm.com>
  21 + *
  22 + * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
  23 + * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
  24 + * Papers:
  25 + * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
  26 + * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
  27 + *
  28 + * For detailed explanation of Read-Copy Update mechanism see -
  29 + * Documentation/RCU
  30 + *
  31 + */
  32 +
  33 +#ifndef __LINUX_RCUPREEMPT_H
  34 +#define __LINUX_RCUPREEMPT_H
  35 +
  36 +#ifdef __KERNEL__
  37 +
  38 +#include <linux/cache.h>
  39 +#include <linux/spinlock.h>
  40 +#include <linux/threads.h>
  41 +#include <linux/percpu.h>
  42 +#include <linux/cpumask.h>
  43 +#include <linux/seqlock.h>
  44 +
  45 +#define rcu_qsctr_inc(cpu)
  46 +#define rcu_bh_qsctr_inc(cpu)
  47 +#define call_rcu_bh(head, rcu) call_rcu(head, rcu)
  48 +
  49 +extern void __rcu_read_lock(void);
  50 +extern void __rcu_read_unlock(void);
  51 +extern int rcu_pending(int cpu);
  52 +extern int rcu_needs_cpu(int cpu);
  53 +
  54 +#define __rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); }
  55 +#define __rcu_read_unlock_bh() { local_bh_enable(); rcu_read_unlock(); }
  56 +
  57 +extern void __synchronize_sched(void);
  58 +
  59 +extern void __rcu_init(void);
  60 +extern void rcu_check_callbacks(int cpu, int user);
  61 +extern void rcu_restart_cpu(int cpu);
  62 +extern long rcu_batches_completed(void);
  63 +
  64 +/*
  65 + * Return the number of RCU batches processed thus far. Useful for debug
  66 + * and statistic. The _bh variant is identifcal to straight RCU
  67 + */
  68 +static inline long rcu_batches_completed_bh(void)
  69 +{
  70 + return rcu_batches_completed();
  71 +}
  72 +
  73 +#ifdef CONFIG_RCU_TRACE
  74 +struct rcupreempt_trace;
  75 +extern long *rcupreempt_flipctr(int cpu);
  76 +extern long rcupreempt_data_completed(void);
  77 +extern int rcupreempt_flip_flag(int cpu);
  78 +extern int rcupreempt_mb_flag(int cpu);
  79 +extern char *rcupreempt_try_flip_state_name(void);
  80 +extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu);
  81 +#endif
  82 +
  83 +struct softirq_action;
  84 +
  85 +#endif /* __KERNEL__ */
  86 +#endif /* __LINUX_RCUPREEMPT_H */
include/linux/rcupreempt_trace.h
  1 +/*
  2 + * Read-Copy Update mechanism for mutual exclusion (RT implementation)
  3 + *
  4 + * This program is free software; you can redistribute it and/or modify
  5 + * it under the terms of the GNU General Public License as published by
  6 + * the Free Software Foundation; either version 2 of the License, or
  7 + * (at your option) any later version.
  8 + *
  9 + * This program is distributed in the hope that it will be useful,
  10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12 + * GNU General Public License for more details.
  13 + *
  14 + * You should have received a copy of the GNU General Public License
  15 + * along with this program; if not, write to the Free Software
  16 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17 + *
  18 + * Copyright (C) IBM Corporation, 2006
  19 + *
  20 + * Author: Paul McKenney <paulmck@us.ibm.com>
  21 + *
  22 + * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
  23 + * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
  24 + * Papers:
  25 + * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
  26 + * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
  27 + *
  28 + * For detailed explanation of the Preemptible Read-Copy Update mechanism see -
  29 + * http://lwn.net/Articles/253651/
  30 + */
  31 +
  32 +#ifndef __LINUX_RCUPREEMPT_TRACE_H
  33 +#define __LINUX_RCUPREEMPT_TRACE_H
  34 +
  35 +#ifdef __KERNEL__
  36 +#include <linux/types.h>
  37 +#include <linux/kernel.h>
  38 +
  39 +#include <asm/atomic.h>
  40 +
  41 +/*
  42 + * PREEMPT_RCU data structures.
  43 + */
  44 +
  45 +struct rcupreempt_trace {
  46 + long next_length;
  47 + long next_add;
  48 + long wait_length;
  49 + long wait_add;
  50 + long done_length;
  51 + long done_add;
  52 + long done_remove;
  53 + atomic_t done_invoked;
  54 + long rcu_check_callbacks;
  55 + atomic_t rcu_try_flip_1;
  56 + atomic_t rcu_try_flip_e1;
  57 + long rcu_try_flip_i1;
  58 + long rcu_try_flip_ie1;
  59 + long rcu_try_flip_g1;
  60 + long rcu_try_flip_a1;
  61 + long rcu_try_flip_ae1;
  62 + long rcu_try_flip_a2;
  63 + long rcu_try_flip_z1;
  64 + long rcu_try_flip_ze1;
  65 + long rcu_try_flip_z2;
  66 + long rcu_try_flip_m1;
  67 + long rcu_try_flip_me1;
  68 + long rcu_try_flip_m2;
  69 +};
  70 +
  71 +#ifdef CONFIG_RCU_TRACE
  72 +#define RCU_TRACE(fn, arg) fn(arg);
  73 +#else
  74 +#define RCU_TRACE(fn, arg)
  75 +#endif
  76 +
  77 +extern void rcupreempt_trace_move2done(struct rcupreempt_trace *trace);
  78 +extern void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace);
  79 +extern void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace);
  80 +extern void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace);
  81 +extern void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace);
  82 +extern void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace);
  83 +extern void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace);
  84 +extern void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace);
  85 +extern void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace);
  86 +extern void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace);
  87 +extern void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace);
  88 +extern void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace);
  89 +extern void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace);
  90 +extern void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace);
  91 +extern void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace);
  92 +extern void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace);
  93 +extern void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace);
  94 +extern void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace);
  95 +extern void rcupreempt_trace_invoke(struct rcupreempt_trace *trace);
  96 +extern void rcupreempt_trace_next_add(struct rcupreempt_trace *trace);
  97 +
  98 +#endif /* __KERNEL__ */
  99 +#endif /* __LINUX_RCUPREEMPT_TRACE_H */
include/linux/sched.h
... ... @@ -974,6 +974,11 @@
974 974 int nr_cpus_allowed;
975 975 unsigned int time_slice;
976 976  
  977 +#ifdef CONFIG_PREEMPT_RCU
  978 + int rcu_read_lock_nesting;
  979 + int rcu_flipctr_idx;
  980 +#endif /* #ifdef CONFIG_PREEMPT_RCU */
  981 +
977 982 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
978 983 struct sched_info sched_info;
979 984 #endif
... ... @@ -763,4 +763,32 @@
763 763  
764 764 config PREEMPT_NOTIFIERS
765 765 bool
  766 +
  767 +choice
  768 + prompt "RCU implementation type:"
  769 + default CLASSIC_RCU
  770 +
  771 +config CLASSIC_RCU
  772 + bool "Classic RCU"
  773 + help
  774 + This option selects the classic RCU implementation that is
  775 + designed for best read-side performance on non-realtime
  776 + systems.
  777 +
  778 + Say Y if you are unsure.
  779 +
  780 +config PREEMPT_RCU
  781 + bool "Preemptible RCU"
  782 + depends on PREEMPT
  783 + help
  784 + This option reduces the latency of the kernel by making certain
  785 + RCU sections preemptible. Normally RCU code is non-preemptible, if
  786 + this option is selected then read-only RCU sections become
  787 + preemptible. This helps latency, but may expose bugs due to
  788 + now-naive assumptions about each RCU read-side critical section
  789 + remaining on a given CPU through its execution.
  790 +
  791 + Say N if you are unsure.
  792 +
  793 +endchoice
kernel/Kconfig.preempt
... ... @@ -62,4 +62,15 @@
62 62  
63 63 Say Y here if you are building a kernel for a desktop system.
64 64 Say N if you are unsure.
  65 +
  66 +config RCU_TRACE
  67 + bool "Enable tracing for RCU - currently stats in debugfs"
  68 + select DEBUG_FS
  69 + default y
  70 + help
  71 + This option provides tracing in RCU which presents stats
  72 + in debugfs for debugging RCU implementation.
  73 +
  74 + Say Y here if you want to enable RCU tracing
  75 + Say N if you are unsure.
... ... @@ -6,7 +6,7 @@
6 6 exit.o itimer.o time.o softirq.o resource.o \
7 7 sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \
8 8 signal.o sys.o kmod.o workqueue.o pid.o \
9   - rcupdate.o rcuclassic.o extable.o params.o posix-timers.o \
  9 + rcupdate.o extable.o params.o posix-timers.o \
10 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11 11 hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \
12 12 utsname.o notifier.o
... ... @@ -52,6 +52,11 @@
52 52 obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
53 53 obj-$(CONFIG_SECCOMP) += seccomp.o
54 54 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
  55 +obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o
  56 +obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o
  57 +ifeq ($(CONFIG_PREEMPT_RCU),y)
  58 +obj-$(CONFIG_RCU_TRACE) += rcupreempt_trace.o
  59 +endif
55 60 obj-$(CONFIG_RELAY) += relay.o
56 61 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
57 62 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
... ... @@ -1045,6 +1045,10 @@
1045 1045 copy_flags(clone_flags, p);
1046 1046 INIT_LIST_HEAD(&p->children);
1047 1047 INIT_LIST_HEAD(&p->sibling);
  1048 +#ifdef CONFIG_PREEMPT_RCU
  1049 + p->rcu_read_lock_nesting = 0;
  1050 + p->rcu_flipctr_idx = 0;
  1051 +#endif /* #ifdef CONFIG_PREEMPT_RCU */
1048 1052 p->vfork_done = NULL;
1049 1053 spin_lock_init(&p->alloc_lock);
1050 1054  
... ... @@ -45,7 +45,6 @@
45 45 #include <linux/moduleparam.h>
46 46 #include <linux/percpu.h>
47 47 #include <linux/notifier.h>
48   -/* #include <linux/rcupdate.h> @@@ */
49 48 #include <linux/cpu.h>
50 49 #include <linux/mutex.h>
51 50  
  1 +/*
  2 + * Read-Copy Update mechanism for mutual exclusion, realtime implementation
  3 + *
  4 + * This program is free software; you can redistribute it and/or modify
  5 + * it under the terms of the GNU General Public License as published by
  6 + * the Free Software Foundation; either version 2 of the License, or
  7 + * (at your option) any later version.
  8 + *
  9 + * This program is distributed in the hope that it will be useful,
  10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12 + * GNU General Public License for more details.
  13 + *
  14 + * You should have received a copy of the GNU General Public License
  15 + * along with this program; if not, write to the Free Software
  16 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17 + *
  18 + * Copyright IBM Corporation, 2006
  19 + *
  20 + * Authors: Paul E. McKenney <paulmck@us.ibm.com>
  21 + * With thanks to Esben Nielsen, Bill Huey, and Ingo Molnar
  22 + * for pushing me away from locks and towards counters, and
  23 + * to Suparna Bhattacharya for pushing me completely away
  24 + * from atomic instructions on the read side.
  25 + *
  26 + * Papers: http://www.rdrop.com/users/paulmck/RCU
  27 + *
  28 + * Design Document: http://lwn.net/Articles/253651/
  29 + *
  30 + * For detailed explanation of Read-Copy Update mechanism see -
  31 + * Documentation/RCU/ *.txt
  32 + *
  33 + */
  34 +#include <linux/types.h>
  35 +#include <linux/kernel.h>
  36 +#include <linux/init.h>
  37 +#include <linux/spinlock.h>
  38 +#include <linux/smp.h>
  39 +#include <linux/rcupdate.h>
  40 +#include <linux/interrupt.h>
  41 +#include <linux/sched.h>
  42 +#include <asm/atomic.h>
  43 +#include <linux/bitops.h>
  44 +#include <linux/module.h>
  45 +#include <linux/completion.h>
  46 +#include <linux/moduleparam.h>
  47 +#include <linux/percpu.h>
  48 +#include <linux/notifier.h>
  49 +#include <linux/rcupdate.h>
  50 +#include <linux/cpu.h>
  51 +#include <linux/random.h>
  52 +#include <linux/delay.h>
  53 +#include <linux/byteorder/swabb.h>
  54 +#include <linux/cpumask.h>
  55 +#include <linux/rcupreempt_trace.h>
  56 +
  57 +/*
  58 + * Macro that prevents the compiler from reordering accesses, but does
  59 + * absolutely -nothing- to prevent CPUs from reordering. This is used
  60 + * only to mediate communication between mainline code and hardware
  61 + * interrupt and NMI handlers.
  62 + */
  63 +#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
  64 +
  65 +/*
  66 + * PREEMPT_RCU data structures.
  67 + */
  68 +
  69 +/*
  70 + * GP_STAGES specifies the number of times the state machine has
  71 + * to go through the all the rcu_try_flip_states (see below)
  72 + * in a single Grace Period.
  73 + *
  74 + * GP in GP_STAGES stands for Grace Period ;)
  75 + */
  76 +#define GP_STAGES 2
  77 +struct rcu_data {
  78 + spinlock_t lock; /* Protect rcu_data fields. */
  79 + long completed; /* Number of last completed batch. */
  80 + int waitlistcount;
  81 + struct tasklet_struct rcu_tasklet;
  82 + struct rcu_head *nextlist;
  83 + struct rcu_head **nexttail;
  84 + struct rcu_head *waitlist[GP_STAGES];
  85 + struct rcu_head **waittail[GP_STAGES];
  86 + struct rcu_head *donelist;
  87 + struct rcu_head **donetail;
  88 + long rcu_flipctr[2];
  89 +#ifdef CONFIG_RCU_TRACE
  90 + struct rcupreempt_trace trace;
  91 +#endif /* #ifdef CONFIG_RCU_TRACE */
  92 +};
  93 +
  94 +/*
  95 + * States for rcu_try_flip() and friends.
  96 + */
  97 +
  98 +enum rcu_try_flip_states {
  99 +
  100 + /*
  101 + * Stay here if nothing is happening. Flip the counter if somthing
  102 + * starts happening. Denoted by "I"
  103 + */
  104 + rcu_try_flip_idle_state,
  105 +
  106 + /*
  107 + * Wait here for all CPUs to notice that the counter has flipped. This
  108 + * prevents the old set of counters from ever being incremented once
  109 + * we leave this state, which in turn is necessary because we cannot
  110 + * test any individual counter for zero -- we can only check the sum.
  111 + * Denoted by "A".
  112 + */
  113 + rcu_try_flip_waitack_state,
  114 +
  115 + /*
  116 + * Wait here for the sum of the old per-CPU counters to reach zero.
  117 + * Denoted by "Z".
  118 + */
  119 + rcu_try_flip_waitzero_state,
  120 +
  121 + /*
  122 + * Wait here for each of the other CPUs to execute a memory barrier.
  123 + * This is necessary to ensure that these other CPUs really have
  124 + * completed executing their RCU read-side critical sections, despite
  125 + * their CPUs wildly reordering memory. Denoted by "M".
  126 + */
  127 + rcu_try_flip_waitmb_state,
  128 +};
  129 +
  130 +struct rcu_ctrlblk {
  131 + spinlock_t fliplock; /* Protect state-machine transitions. */
  132 + long completed; /* Number of last completed batch. */
  133 + enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
  134 + the rcu state machine */
  135 +};
  136 +
  137 +static DEFINE_PER_CPU(struct rcu_data, rcu_data);
  138 +static struct rcu_ctrlblk rcu_ctrlblk = {
  139 + .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
  140 + .completed = 0,
  141 + .rcu_try_flip_state = rcu_try_flip_idle_state,
  142 +};
  143 +
  144 +
  145 +#ifdef CONFIG_RCU_TRACE
  146 +static char *rcu_try_flip_state_names[] =
  147 + { "idle", "waitack", "waitzero", "waitmb" };
  148 +#endif /* #ifdef CONFIG_RCU_TRACE */
  149 +
  150 +/*
  151 + * Enum and per-CPU flag to determine when each CPU has seen
  152 + * the most recent counter flip.
  153 + */
  154 +
  155 +enum rcu_flip_flag_values {
  156 + rcu_flip_seen, /* Steady/initial state, last flip seen. */
  157 + /* Only GP detector can update. */
  158 + rcu_flipped /* Flip just completed, need confirmation. */
  159 + /* Only corresponding CPU can update. */
  160 +};
  161 +static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_flip_flag_values, rcu_flip_flag)
  162 + = rcu_flip_seen;
  163 +
  164 +/*
  165 + * Enum and per-CPU flag to determine when each CPU has executed the
  166 + * needed memory barrier to fence in memory references from its last RCU
  167 + * read-side critical section in the just-completed grace period.
  168 + */
  169 +
  170 +enum rcu_mb_flag_values {
  171 + rcu_mb_done, /* Steady/initial state, no mb()s required. */
  172 + /* Only GP detector can update. */
  173 + rcu_mb_needed /* Flip just completed, need an mb(). */
  174 + /* Only corresponding CPU can update. */
  175 +};
  176 +static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag)
  177 + = rcu_mb_done;
  178 +
  179 +/*
  180 + * RCU_DATA_ME: find the current CPU's rcu_data structure.
  181 + * RCU_DATA_CPU: find the specified CPU's rcu_data structure.
  182 + */
  183 +#define RCU_DATA_ME() (&__get_cpu_var(rcu_data))
  184 +#define RCU_DATA_CPU(cpu) (&per_cpu(rcu_data, cpu))
  185 +
  186 +/*
  187 + * Helper macro for tracing when the appropriate rcu_data is not
  188 + * cached in a local variable, but where the CPU number is so cached.
  189 + */
  190 +#define RCU_TRACE_CPU(f, cpu) RCU_TRACE(f, &(RCU_DATA_CPU(cpu)->trace));
  191 +
  192 +/*
  193 + * Helper macro for tracing when the appropriate rcu_data is not
  194 + * cached in a local variable.
  195 + */
  196 +#define RCU_TRACE_ME(f) RCU_TRACE(f, &(RCU_DATA_ME()->trace));
  197 +
  198 +/*
  199 + * Helper macro for tracing when the appropriate rcu_data is pointed
  200 + * to by a local variable.
  201 + */
  202 +#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
  203 +
  204 +/*
  205 + * Return the number of RCU batches processed thus far. Useful
  206 + * for debug and statistics.
  207 + */
  208 +long rcu_batches_completed(void)
  209 +{
  210 + return rcu_ctrlblk.completed;
  211 +}
  212 +EXPORT_SYMBOL_GPL(rcu_batches_completed);
  213 +
  214 +EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
  215 +
  216 +void __rcu_read_lock(void)
  217 +{
  218 + int idx;
  219 + struct task_struct *t = current;
  220 + int nesting;
  221 +
  222 + nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
  223 + if (nesting != 0) {
  224 +
  225 + /* An earlier rcu_read_lock() covers us, just count it. */
  226 +
  227 + t->rcu_read_lock_nesting = nesting + 1;
  228 +
  229 + } else {
  230 + unsigned long flags;
  231 +
  232 + /*
  233 + * We disable interrupts for the following reasons:
  234 + * - If we get scheduling clock interrupt here, and we
  235 + * end up acking the counter flip, it's like a promise
  236 + * that we will never increment the old counter again.
  237 + * Thus we will break that promise if that
  238 + * scheduling clock interrupt happens between the time
  239 + * we pick the .completed field and the time that we
  240 + * increment our counter.
  241 + *
  242 + * - We don't want to be preempted out here.
  243 + *
  244 + * NMIs can still occur, of course, and might themselves
  245 + * contain rcu_read_lock().
  246 + */
  247 +
  248 + local_irq_save(flags);
  249 +
  250 + /*
  251 + * Outermost nesting of rcu_read_lock(), so increment
  252 + * the current counter for the current CPU. Use volatile
  253 + * casts to prevent the compiler from reordering.
  254 + */
  255 +
  256 + idx = ACCESS_ONCE(rcu_ctrlblk.completed) & 0x1;
  257 + ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])++;
  258 +
  259 + /*
  260 + * Now that the per-CPU counter has been incremented, we
  261 + * are protected from races with rcu_read_lock() invoked
  262 + * from NMI handlers on this CPU. We can therefore safely
  263 + * increment the nesting counter, relieving further NMIs
  264 + * of the need to increment the per-CPU counter.
  265 + */
  266 +
  267 + ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting + 1;
  268 +
  269 + /*
  270 + * Now that we have preventing any NMIs from storing
  271 + * to the ->rcu_flipctr_idx, we can safely use it to
  272 + * remember which counter to decrement in the matching
  273 + * rcu_read_unlock().
  274 + */
  275 +
  276 + ACCESS_ONCE(t->rcu_flipctr_idx) = idx;
  277 + local_irq_restore(flags);
  278 + }
  279 +}
  280 +EXPORT_SYMBOL_GPL(__rcu_read_lock);
  281 +
  282 +void __rcu_read_unlock(void)
  283 +{
  284 + int idx;
  285 + struct task_struct *t = current;
  286 + int nesting;
  287 +
  288 + nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
  289 + if (nesting > 1) {
  290 +
  291 + /*
  292 + * We are still protected by the enclosing rcu_read_lock(),
  293 + * so simply decrement the counter.
  294 + */
  295 +
  296 + t->rcu_read_lock_nesting = nesting - 1;
  297 +
  298 + } else {
  299 + unsigned long flags;
  300 +
  301 + /*
  302 + * Disable local interrupts to prevent the grace-period
  303 + * detection state machine from seeing us half-done.
  304 + * NMIs can still occur, of course, and might themselves
  305 + * contain rcu_read_lock() and rcu_read_unlock().
  306 + */
  307 +
  308 + local_irq_save(flags);
  309 +
  310 + /*
  311 + * Outermost nesting of rcu_read_unlock(), so we must
  312 + * decrement the current counter for the current CPU.
  313 + * This must be done carefully, because NMIs can
  314 + * occur at any point in this code, and any rcu_read_lock()
  315 + * and rcu_read_unlock() pairs in the NMI handlers
  316 + * must interact non-destructively with this code.
  317 + * Lots of volatile casts, and -very- careful ordering.
  318 + *
  319 + * Changes to this code, including this one, must be
  320 + * inspected, validated, and tested extremely carefully!!!
  321 + */
  322 +
  323 + /*
  324 + * First, pick up the index.
  325 + */
  326 +
  327 + idx = ACCESS_ONCE(t->rcu_flipctr_idx);
  328 +
  329 + /*
  330 + * Now that we have fetched the counter index, it is
  331 + * safe to decrement the per-task RCU nesting counter.
  332 + * After this, any interrupts or NMIs will increment and
  333 + * decrement the per-CPU counters.
  334 + */
  335 + ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting - 1;
  336 +
  337 + /*
  338 + * It is now safe to decrement this task's nesting count.
  339 + * NMIs that occur after this statement will route their
  340 + * rcu_read_lock() calls through this "else" clause, and
  341 + * will thus start incrementing the per-CPU counter on
  342 + * their own. They will also clobber ->rcu_flipctr_idx,
  343 + * but that is OK, since we have already fetched it.
  344 + */
  345 +
  346 + ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])--;
  347 + local_irq_restore(flags);
  348 + }
  349 +}
  350 +EXPORT_SYMBOL_GPL(__rcu_read_unlock);
  351 +
  352 +/*
  353 + * If a global counter flip has occurred since the last time that we
  354 + * advanced callbacks, advance them. Hardware interrupts must be
  355 + * disabled when calling this function.
  356 + */
  357 +static void __rcu_advance_callbacks(struct rcu_data *rdp)
  358 +{
  359 + int cpu;
  360 + int i;
  361 + int wlc = 0;
  362 +
  363 + if (rdp->completed != rcu_ctrlblk.completed) {
  364 + if (rdp->waitlist[GP_STAGES - 1] != NULL) {
  365 + *rdp->donetail = rdp->waitlist[GP_STAGES - 1];
  366 + rdp->donetail = rdp->waittail[GP_STAGES - 1];
  367 + RCU_TRACE_RDP(rcupreempt_trace_move2done, rdp);
  368 + }
  369 + for (i = GP_STAGES - 2; i >= 0; i--) {
  370 + if (rdp->waitlist[i] != NULL) {
  371 + rdp->waitlist[i + 1] = rdp->waitlist[i];
  372 + rdp->waittail[i + 1] = rdp->waittail[i];
  373 + wlc++;
  374 + } else {
  375 + rdp->waitlist[i + 1] = NULL;
  376 + rdp->waittail[i + 1] =
  377 + &rdp->waitlist[i + 1];
  378 + }
  379 + }
  380 + if (rdp->nextlist != NULL) {
  381 + rdp->waitlist[0] = rdp->nextlist;
  382 + rdp->waittail[0] = rdp->nexttail;
  383 + wlc++;
  384 + rdp->nextlist = NULL;
  385 + rdp->nexttail = &rdp->nextlist;
  386 + RCU_TRACE_RDP(rcupreempt_trace_move2wait, rdp);
  387 + } else {
  388 + rdp->waitlist[0] = NULL;
  389 + rdp->waittail[0] = &rdp->waitlist[0];
  390 + }
  391 + rdp->waitlistcount = wlc;
  392 + rdp->completed = rcu_ctrlblk.completed;
  393 + }
  394 +
  395 + /*
  396 + * Check to see if this CPU needs to report that it has seen
  397 + * the most recent counter flip, thereby declaring that all
  398 + * subsequent rcu_read_lock() invocations will respect this flip.
  399 + */
  400 +
  401 + cpu = raw_smp_processor_id();
  402 + if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
  403 + smp_mb(); /* Subsequent counter accesses must see new value */
  404 + per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
  405 + smp_mb(); /* Subsequent RCU read-side critical sections */
  406 + /* seen -after- acknowledgement. */
  407 + }
  408 +}
  409 +
  410 +/*
  411 + * Get here when RCU is idle. Decide whether we need to
  412 + * move out of idle state, and return non-zero if so.
  413 + * "Straightforward" approach for the moment, might later
  414 + * use callback-list lengths, grace-period duration, or
  415 + * some such to determine when to exit idle state.
  416 + * Might also need a pre-idle test that does not acquire
  417 + * the lock, but let's get the simple case working first...
  418 + */
  419 +
  420 +static int
  421 +rcu_try_flip_idle(void)
  422 +{
  423 + int cpu;
  424 +
  425 + RCU_TRACE_ME(rcupreempt_trace_try_flip_i1);
  426 + if (!rcu_pending(smp_processor_id())) {
  427 + RCU_TRACE_ME(rcupreempt_trace_try_flip_ie1);
  428 + return 0;
  429 + }
  430 +
  431 + /*
  432 + * Do the flip.
  433 + */
  434 +
  435 + RCU_TRACE_ME(rcupreempt_trace_try_flip_g1);
  436 + rcu_ctrlblk.completed++; /* stands in for rcu_try_flip_g2 */
  437 +
  438 + /*
  439 + * Need a memory barrier so that other CPUs see the new
  440 + * counter value before they see the subsequent change of all
  441 + * the rcu_flip_flag instances to rcu_flipped.
  442 + */
  443 +
  444 + smp_mb(); /* see above block comment. */
  445 +
  446 + /* Now ask each CPU for acknowledgement of the flip. */
  447 +
  448 + for_each_possible_cpu(cpu)
  449 + per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
  450 +
  451 + return 1;
  452 +}
  453 +
  454 +/*
  455 + * Wait for CPUs to acknowledge the flip.
  456 + */
  457 +
  458 +static int
  459 +rcu_try_flip_waitack(void)
  460 +{
  461 + int cpu;
  462 +
  463 + RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
  464 + for_each_possible_cpu(cpu)
  465 + if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
  466 + RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
  467 + return 0;
  468 + }
  469 +
  470 + /*
  471 + * Make sure our checks above don't bleed into subsequent
  472 + * waiting for the sum of the counters to reach zero.
  473 + */
  474 +
  475 + smp_mb(); /* see above block comment. */
  476 + RCU_TRACE_ME(rcupreempt_trace_try_flip_a2);
  477 + return 1;
  478 +}
  479 +
  480 +/*
  481 + * Wait for collective ``last'' counter to reach zero,
  482 + * then tell all CPUs to do an end-of-grace-period memory barrier.
  483 + */
  484 +
  485 +static int
  486 +rcu_try_flip_waitzero(void)
  487 +{
  488 + int cpu;
  489 + int lastidx = !(rcu_ctrlblk.completed & 0x1);
  490 + int sum = 0;
  491 +
  492 + /* Check to see if the sum of the "last" counters is zero. */
  493 +
  494 + RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
  495 + for_each_possible_cpu(cpu)
  496 + sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
  497 + if (sum != 0) {
  498 + RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
  499 + return 0;
  500 + }
  501 +
  502 + /*
  503 + * This ensures that the other CPUs see the call for
  504 + * memory barriers -after- the sum to zero has been
  505 + * detected here
  506 + */
  507 + smp_mb(); /* ^^^^^^^^^^^^ */
  508 +
  509 + /* Call for a memory barrier from each CPU. */
  510 + for_each_possible_cpu(cpu)
  511 + per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
  512 +
  513 + RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
  514 + return 1;
  515 +}
  516 +
  517 +/*
  518 + * Wait for all CPUs to do their end-of-grace-period memory barrier.
  519 + * Return 0 once all CPUs have done so.
  520 + */
  521 +
  522 +static int
  523 +rcu_try_flip_waitmb(void)
  524 +{
  525 + int cpu;
  526 +
  527 + RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
  528 + for_each_possible_cpu(cpu)
  529 + if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
  530 + RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
  531 + return 0;
  532 + }
  533 +
  534 + smp_mb(); /* Ensure that the above checks precede any following flip. */
  535 + RCU_TRACE_ME(rcupreempt_trace_try_flip_m2);
  536 + return 1;
  537 +}
  538 +
  539 +/*
  540 + * Attempt a single flip of the counters. Remember, a single flip does
  541 + * -not- constitute a grace period. Instead, the interval between
  542 + * at least GP_STAGES consecutive flips is a grace period.
  543 + *
  544 + * If anyone is nuts enough to run this CONFIG_PREEMPT_RCU implementation
  545 + * on a large SMP, they might want to use a hierarchical organization of
  546 + * the per-CPU-counter pairs.
  547 + */
  548 +static void rcu_try_flip(void)
  549 +{
  550 + unsigned long flags;
  551 +
  552 + RCU_TRACE_ME(rcupreempt_trace_try_flip_1);
  553 + if (unlikely(!spin_trylock_irqsave(&rcu_ctrlblk.fliplock, flags))) {
  554 + RCU_TRACE_ME(rcupreempt_trace_try_flip_e1);
  555 + return;
  556 + }
  557 +
  558 + /*
  559 + * Take the next transition(s) through the RCU grace-period
  560 + * flip-counter state machine.
  561 + */
  562 +
  563 + switch (rcu_ctrlblk.rcu_try_flip_state) {
  564 + case rcu_try_flip_idle_state:
  565 + if (rcu_try_flip_idle())
  566 + rcu_ctrlblk.rcu_try_flip_state =
  567 + rcu_try_flip_waitack_state;
  568 + break;
  569 + case rcu_try_flip_waitack_state:
  570 + if (rcu_try_flip_waitack())
  571 + rcu_ctrlblk.rcu_try_flip_state =
  572 + rcu_try_flip_waitzero_state;
  573 + break;
  574 + case rcu_try_flip_waitzero_state:
  575 + if (rcu_try_flip_waitzero())
  576 + rcu_ctrlblk.rcu_try_flip_state =
  577 + rcu_try_flip_waitmb_state;
  578 + break;
  579 + case rcu_try_flip_waitmb_state:
  580 + if (rcu_try_flip_waitmb())
  581 + rcu_ctrlblk.rcu_try_flip_state =
  582 + rcu_try_flip_idle_state;
  583 + }
  584 + spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
  585 +}
  586 +
  587 +/*
  588 + * Check to see if this CPU needs to do a memory barrier in order to
  589 + * ensure that any prior RCU read-side critical sections have committed
  590 + * their counter manipulations and critical-section memory references
  591 + * before declaring the grace period to be completed.
  592 + */
  593 +static void rcu_check_mb(int cpu)
  594 +{
  595 + if (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed) {
  596 + smp_mb(); /* Ensure RCU read-side accesses are visible. */
  597 + per_cpu(rcu_mb_flag, cpu) = rcu_mb_done;
  598 + }
  599 +}
  600 +
  601 +void rcu_check_callbacks(int cpu, int user)
  602 +{
  603 + unsigned long flags;
  604 + struct rcu_data *rdp = RCU_DATA_CPU(cpu);
  605 +
  606 + rcu_check_mb(cpu);
  607 + if (rcu_ctrlblk.completed == rdp->completed)
  608 + rcu_try_flip();
  609 + spin_lock_irqsave(&rdp->lock, flags);
  610 + RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
  611 + __rcu_advance_callbacks(rdp);
  612 + if (rdp->donelist == NULL) {
  613 + spin_unlock_irqrestore(&rdp->lock, flags);
  614 + } else {
  615 + spin_unlock_irqrestore(&rdp->lock, flags);
  616 + raise_softirq(RCU_SOFTIRQ);
  617 + }
  618 +}
  619 +
  620 +/*
  621 + * Needed by dynticks, to make sure all RCU processing has finished
  622 + * when we go idle:
  623 + */
  624 +void rcu_advance_callbacks(int cpu, int user)
  625 +{
  626 + unsigned long flags;
  627 + struct rcu_data *rdp = RCU_DATA_CPU(cpu);
  628 +
  629 + if (rcu_ctrlblk.completed == rdp->completed) {
  630 + rcu_try_flip();
  631 + if (rcu_ctrlblk.completed == rdp->completed)
  632 + return;
  633 + }
  634 + spin_lock_irqsave(&rdp->lock, flags);
  635 + RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
  636 + __rcu_advance_callbacks(rdp);
  637 + spin_unlock_irqrestore(&rdp->lock, flags);
  638 +}
  639 +
  640 +static void rcu_process_callbacks(struct softirq_action *unused)
  641 +{
  642 + unsigned long flags;
  643 + struct rcu_head *next, *list;
  644 + struct rcu_data *rdp = RCU_DATA_ME();
  645 +
  646 + spin_lock_irqsave(&rdp->lock, flags);
  647 + list = rdp->donelist;
  648 + if (list == NULL) {
  649 + spin_unlock_irqrestore(&rdp->lock, flags);
  650 + return;
  651 + }
  652 + rdp->donelist = NULL;
  653 + rdp->donetail = &rdp->donelist;
  654 + RCU_TRACE_RDP(rcupreempt_trace_done_remove, rdp);
  655 + spin_unlock_irqrestore(&rdp->lock, flags);
  656 + while (list) {
  657 + next = list->next;
  658 + list->func(list);
  659 + list = next;
  660 + RCU_TRACE_ME(rcupreempt_trace_invoke);
  661 + }
  662 +}
  663 +
  664 +void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
  665 +{
  666 + unsigned long flags;
  667 + struct rcu_data *rdp;
  668 +
  669 + head->func = func;
  670 + head->next = NULL;
  671 + local_irq_save(flags);
  672 + rdp = RCU_DATA_ME();
  673 + spin_lock(&rdp->lock);
  674 + __rcu_advance_callbacks(rdp);
  675 + *rdp->nexttail = head;
  676 + rdp->nexttail = &head->next;
  677 + RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
  678 + spin_unlock(&rdp->lock);
  679 + local_irq_restore(flags);
  680 +}
  681 +EXPORT_SYMBOL_GPL(call_rcu);
  682 +
  683 +/*
  684 + * Wait until all currently running preempt_disable() code segments
  685 + * (including hardware-irq-disable segments) complete. Note that
  686 + * in -rt this does -not- necessarily result in all currently executing
  687 + * interrupt -handlers- having completed.
  688 + */
  689 +void __synchronize_sched(void)
  690 +{
  691 + cpumask_t oldmask;
  692 + int cpu;
  693 +
  694 + if (sched_getaffinity(0, &oldmask) < 0)
  695 + oldmask = cpu_possible_map;
  696 + for_each_online_cpu(cpu) {
  697 + sched_setaffinity(0, cpumask_of_cpu(cpu));
  698 + schedule();
  699 + }
  700 + sched_setaffinity(0, oldmask);
  701 +}
  702 +EXPORT_SYMBOL_GPL(__synchronize_sched);
  703 +
  704 +/*
  705 + * Check to see if any future RCU-related work will need to be done
  706 + * by the current CPU, even if none need be done immediately, returning
  707 + * 1 if so. Assumes that notifiers would take care of handling any
  708 + * outstanding requests from the RCU core.
  709 + *
  710 + * This function is part of the RCU implementation; it is -not-
  711 + * an exported member of the RCU API.
  712 + */
  713 +int rcu_needs_cpu(int cpu)
  714 +{
  715 + struct rcu_data *rdp = RCU_DATA_CPU(cpu);
  716 +
  717 + return (rdp->donelist != NULL ||
  718 + !!rdp->waitlistcount ||
  719 + rdp->nextlist != NULL);
  720 +}
  721 +
  722 +int rcu_pending(int cpu)
  723 +{
  724 + struct rcu_data *rdp = RCU_DATA_CPU(cpu);
  725 +
  726 + /* The CPU has at least one callback queued somewhere. */
  727 +
  728 + if (rdp->donelist != NULL ||
  729 + !!rdp->waitlistcount ||
  730 + rdp->nextlist != NULL)
  731 + return 1;
  732 +
  733 + /* The RCU core needs an acknowledgement from this CPU. */
  734 +
  735 + if ((per_cpu(rcu_flip_flag, cpu) == rcu_flipped) ||
  736 + (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed))
  737 + return 1;
  738 +
  739 + /* This CPU has fallen behind the global grace-period number. */
  740 +
  741 + if (rdp->completed != rcu_ctrlblk.completed)
  742 + return 1;
  743 +
  744 + /* Nothing needed from this CPU. */
  745 +
  746 + return 0;
  747 +}
  748 +
  749 +void __init __rcu_init(void)
  750 +{
  751 + int cpu;
  752 + int i;
  753 + struct rcu_data *rdp;
  754 +
  755 + printk(KERN_NOTICE "Preemptible RCU implementation.\n");
  756 + for_each_possible_cpu(cpu) {
  757 + rdp = RCU_DATA_CPU(cpu);
  758 + spin_lock_init(&rdp->lock);
  759 + rdp->completed = 0;
  760 + rdp->waitlistcount = 0;
  761 + rdp->nextlist = NULL;
  762 + rdp->nexttail = &rdp->nextlist;
  763 + for (i = 0; i < GP_STAGES; i++) {
  764 + rdp->waitlist[i] = NULL;
  765 + rdp->waittail[i] = &rdp->waitlist[i];
  766 + }
  767 + rdp->donelist = NULL;
  768 + rdp->donetail = &rdp->donelist;
  769 + rdp->rcu_flipctr[0] = 0;
  770 + rdp->rcu_flipctr[1] = 0;
  771 + }
  772 + open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
  773 +}
  774 +
  775 +/*
  776 + * Deprecated, use synchronize_rcu() or synchronize_sched() instead.
  777 + */
  778 +void synchronize_kernel(void)
  779 +{
  780 + synchronize_rcu();
  781 +}
  782 +
  783 +#ifdef CONFIG_RCU_TRACE
  784 +long *rcupreempt_flipctr(int cpu)
  785 +{
  786 + return &RCU_DATA_CPU(cpu)->rcu_flipctr[0];
  787 +}
  788 +EXPORT_SYMBOL_GPL(rcupreempt_flipctr);
  789 +
  790 +int rcupreempt_flip_flag(int cpu)
  791 +{
  792 + return per_cpu(rcu_flip_flag, cpu);
  793 +}
  794 +EXPORT_SYMBOL_GPL(rcupreempt_flip_flag);
  795 +
  796 +int rcupreempt_mb_flag(int cpu)
  797 +{
  798 + return per_cpu(rcu_mb_flag, cpu);
  799 +}
  800 +EXPORT_SYMBOL_GPL(rcupreempt_mb_flag);
  801 +
  802 +char *rcupreempt_try_flip_state_name(void)
  803 +{
  804 + return rcu_try_flip_state_names[rcu_ctrlblk.rcu_try_flip_state];
  805 +}
  806 +EXPORT_SYMBOL_GPL(rcupreempt_try_flip_state_name);
  807 +
  808 +struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu)
  809 +{
  810 + struct rcu_data *rdp = RCU_DATA_CPU(cpu);
  811 +
  812 + return &rdp->trace;
  813 +}
  814 +EXPORT_SYMBOL_GPL(rcupreempt_trace_cpu);
  815 +
  816 +#endif /* #ifdef RCU_TRACE */
kernel/rcupreempt_trace.c
  1 +/*
  2 + * Read-Copy Update tracing for realtime implementation
  3 + *
  4 + * This program is free software; you can redistribute it and/or modify
  5 + * it under the terms of the GNU General Public License as published by
  6 + * the Free Software Foundation; either version 2 of the License, or
  7 + * (at your option) any later version.
  8 + *
  9 + * This program is distributed in the hope that it will be useful,
  10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12 + * GNU General Public License for more details.
  13 + *
  14 + * You should have received a copy of the GNU General Public License
  15 + * along with this program; if not, write to the Free Software
  16 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17 + *
  18 + * Copyright IBM Corporation, 2006
  19 + *
  20 + * Papers: http://www.rdrop.com/users/paulmck/RCU
  21 + *
  22 + * For detailed explanation of Read-Copy Update mechanism see -
  23 + * Documentation/RCU/ *.txt
  24 + *
  25 + */
  26 +#include <linux/types.h>
  27 +#include <linux/kernel.h>
  28 +#include <linux/init.h>
  29 +#include <linux/spinlock.h>
  30 +#include <linux/smp.h>
  31 +#include <linux/rcupdate.h>
  32 +#include <linux/interrupt.h>
  33 +#include <linux/sched.h>
  34 +#include <asm/atomic.h>
  35 +#include <linux/bitops.h>
  36 +#include <linux/module.h>
  37 +#include <linux/completion.h>
  38 +#include <linux/moduleparam.h>
  39 +#include <linux/percpu.h>
  40 +#include <linux/notifier.h>
  41 +#include <linux/rcupdate.h>
  42 +#include <linux/cpu.h>
  43 +#include <linux/mutex.h>
  44 +#include <linux/rcupreempt_trace.h>
  45 +#include <linux/debugfs.h>
  46 +
  47 +static struct mutex rcupreempt_trace_mutex;
  48 +static char *rcupreempt_trace_buf;
  49 +#define RCUPREEMPT_TRACE_BUF_SIZE 4096
  50 +
  51 +void rcupreempt_trace_move2done(struct rcupreempt_trace *trace)
  52 +{
  53 + trace->done_length += trace->wait_length;
  54 + trace->done_add += trace->wait_length;
  55 + trace->wait_length = 0;
  56 +}
  57 +void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace)
  58 +{
  59 + trace->wait_length += trace->next_length;
  60 + trace->wait_add += trace->next_length;
  61 + trace->next_length = 0;
  62 +}
  63 +void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace)
  64 +{
  65 + atomic_inc(&trace->rcu_try_flip_1);
  66 +}
  67 +void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace)
  68 +{
  69 + atomic_inc(&trace->rcu_try_flip_e1);
  70 +}
  71 +void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace)
  72 +{
  73 + trace->rcu_try_flip_i1++;
  74 +}
  75 +void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace)
  76 +{
  77 + trace->rcu_try_flip_ie1++;
  78 +}
  79 +void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace)
  80 +{
  81 + trace->rcu_try_flip_g1++;
  82 +}
  83 +void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace)
  84 +{
  85 + trace->rcu_try_flip_a1++;
  86 +}
  87 +void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace)
  88 +{
  89 + trace->rcu_try_flip_ae1++;
  90 +}
  91 +void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace)
  92 +{
  93 + trace->rcu_try_flip_a2++;
  94 +}
  95 +void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace)
  96 +{
  97 + trace->rcu_try_flip_z1++;
  98 +}
  99 +void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace)
  100 +{
  101 + trace->rcu_try_flip_ze1++;
  102 +}
  103 +void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace)
  104 +{
  105 + trace->rcu_try_flip_z2++;
  106 +}
  107 +void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace)
  108 +{
  109 + trace->rcu_try_flip_m1++;
  110 +}
  111 +void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace)
  112 +{
  113 + trace->rcu_try_flip_me1++;
  114 +}
  115 +void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace)
  116 +{
  117 + trace->rcu_try_flip_m2++;
  118 +}
  119 +void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace)
  120 +{
  121 + trace->rcu_check_callbacks++;
  122 +}
  123 +void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace)
  124 +{
  125 + trace->done_remove += trace->done_length;
  126 + trace->done_length = 0;
  127 +}
  128 +void rcupreempt_trace_invoke(struct rcupreempt_trace *trace)
  129 +{
  130 + atomic_inc(&trace->done_invoked);
  131 +}
  132 +void rcupreempt_trace_next_add(struct rcupreempt_trace *trace)
  133 +{
  134 + trace->next_add++;
  135 + trace->next_length++;
  136 +}
  137 +
  138 +static void rcupreempt_trace_sum(struct rcupreempt_trace *sp)
  139 +{
  140 + struct rcupreempt_trace *cp;
  141 + int cpu;
  142 +
  143 + memset(sp, 0, sizeof(*sp));
  144 + for_each_possible_cpu(cpu) {
  145 + cp = rcupreempt_trace_cpu(cpu);
  146 + sp->next_length += cp->next_length;
  147 + sp->next_add += cp->next_add;
  148 + sp->wait_length += cp->wait_length;
  149 + sp->wait_add += cp->wait_add;
  150 + sp->done_length += cp->done_length;
  151 + sp->done_add += cp->done_add;
  152 + sp->done_remove += cp->done_remove;
  153 + atomic_set(&sp->done_invoked, atomic_read(&cp->done_invoked));
  154 + sp->rcu_check_callbacks += cp->rcu_check_callbacks;
  155 + atomic_set(&sp->rcu_try_flip_1,
  156 + atomic_read(&cp->rcu_try_flip_1));
  157 + atomic_set(&sp->rcu_try_flip_e1,
  158 + atomic_read(&cp->rcu_try_flip_e1));
  159 + sp->rcu_try_flip_i1 += cp->rcu_try_flip_i1;
  160 + sp->rcu_try_flip_ie1 += cp->rcu_try_flip_ie1;
  161 + sp->rcu_try_flip_g1 += cp->rcu_try_flip_g1;
  162 + sp->rcu_try_flip_a1 += cp->rcu_try_flip_a1;
  163 + sp->rcu_try_flip_ae1 += cp->rcu_try_flip_ae1;
  164 + sp->rcu_try_flip_a2 += cp->rcu_try_flip_a2;
  165 + sp->rcu_try_flip_z1 += cp->rcu_try_flip_z1;
  166 + sp->rcu_try_flip_ze1 += cp->rcu_try_flip_ze1;
  167 + sp->rcu_try_flip_z2 += cp->rcu_try_flip_z2;
  168 + sp->rcu_try_flip_m1 += cp->rcu_try_flip_m1;
  169 + sp->rcu_try_flip_me1 += cp->rcu_try_flip_me1;
  170 + sp->rcu_try_flip_m2 += cp->rcu_try_flip_m2;
  171 + }
  172 +}
  173 +
  174 +static ssize_t rcustats_read(struct file *filp, char __user *buffer,
  175 + size_t count, loff_t *ppos)
  176 +{
  177 + struct rcupreempt_trace trace;
  178 + ssize_t bcount;
  179 + int cnt = 0;
  180 +
  181 + rcupreempt_trace_sum(&trace);
  182 + mutex_lock(&rcupreempt_trace_mutex);
  183 + snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt,
  184 + "ggp=%ld rcc=%ld\n",
  185 + rcu_batches_completed(),
  186 + trace.rcu_check_callbacks);
  187 + snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt,
  188 + "na=%ld nl=%ld wa=%ld wl=%ld da=%ld dl=%ld dr=%ld di=%d\n"
  189 + "1=%d e1=%d i1=%ld ie1=%ld g1=%ld a1=%ld ae1=%ld a2=%ld\n"
  190 + "z1=%ld ze1=%ld z2=%ld m1=%ld me1=%ld m2=%ld\n",
  191 +
  192 + trace.next_add, trace.next_length,
  193 + trace.wait_add, trace.wait_length,
  194 + trace.done_add, trace.done_length,
  195 + trace.done_remove, atomic_read(&trace.done_invoked),
  196 + atomic_read(&trace.rcu_try_flip_1),
  197 + atomic_read(&trace.rcu_try_flip_e1),
  198 + trace.rcu_try_flip_i1, trace.rcu_try_flip_ie1,
  199 + trace.rcu_try_flip_g1,
  200 + trace.rcu_try_flip_a1, trace.rcu_try_flip_ae1,
  201 + trace.rcu_try_flip_a2,
  202 + trace.rcu_try_flip_z1, trace.rcu_try_flip_ze1,
  203 + trace.rcu_try_flip_z2,
  204 + trace.rcu_try_flip_m1, trace.rcu_try_flip_me1,
  205 + trace.rcu_try_flip_m2);
  206 + bcount = simple_read_from_buffer(buffer, count, ppos,
  207 + rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
  208 + mutex_unlock(&rcupreempt_trace_mutex);
  209 + return bcount;
  210 +}
  211 +
  212 +static ssize_t rcugp_read(struct file *filp, char __user *buffer,
  213 + size_t count, loff_t *ppos)
  214 +{
  215 + long oldgp = rcu_batches_completed();
  216 + ssize_t bcount;
  217 +
  218 + mutex_lock(&rcupreempt_trace_mutex);
  219 + synchronize_rcu();
  220 + snprintf(rcupreempt_trace_buf, RCUPREEMPT_TRACE_BUF_SIZE,
  221 + "oldggp=%ld newggp=%ld\n", oldgp, rcu_batches_completed());
  222 + bcount = simple_read_from_buffer(buffer, count, ppos,
  223 + rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
  224 + mutex_unlock(&rcupreempt_trace_mutex);
  225 + return bcount;
  226 +}
  227 +
  228 +static ssize_t rcuctrs_read(struct file *filp, char __user *buffer,
  229 + size_t count, loff_t *ppos)
  230 +{
  231 + int cnt = 0;
  232 + int cpu;
  233 + int f = rcu_batches_completed() & 0x1;
  234 + ssize_t bcount;
  235 +
  236 + mutex_lock(&rcupreempt_trace_mutex);
  237 +
  238 + cnt += snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE,
  239 + "CPU last cur F M\n");
  240 + for_each_online_cpu(cpu) {
  241 + long *flipctr = rcupreempt_flipctr(cpu);
  242 + cnt += snprintf(&rcupreempt_trace_buf[cnt],
  243 + RCUPREEMPT_TRACE_BUF_SIZE - cnt,
  244 + "%3d %4ld %3ld %d %d\n",
  245 + cpu,
  246 + flipctr[!f],
  247 + flipctr[f],
  248 + rcupreempt_flip_flag(cpu),
  249 + rcupreempt_mb_flag(cpu));
  250 + }
  251 + cnt += snprintf(&rcupreempt_trace_buf[cnt],
  252 + RCUPREEMPT_TRACE_BUF_SIZE - cnt,
  253 + "ggp = %ld, state = %s\n",
  254 + rcu_batches_completed(),
  255 + rcupreempt_try_flip_state_name());
  256 + cnt += snprintf(&rcupreempt_trace_buf[cnt],
  257 + RCUPREEMPT_TRACE_BUF_SIZE - cnt,
  258 + "\n");
  259 + bcount = simple_read_from_buffer(buffer, count, ppos,
  260 + rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
  261 + mutex_unlock(&rcupreempt_trace_mutex);
  262 + return bcount;
  263 +}
  264 +
  265 +static struct file_operations rcustats_fops = {
  266 + .owner = THIS_MODULE,
  267 + .read = rcustats_read,
  268 +};
  269 +
  270 +static struct file_operations rcugp_fops = {
  271 + .owner = THIS_MODULE,
  272 + .read = rcugp_read,
  273 +};
  274 +
  275 +static struct file_operations rcuctrs_fops = {
  276 + .owner = THIS_MODULE,
  277 + .read = rcuctrs_read,
  278 +};
  279 +
  280 +static struct dentry *rcudir, *statdir, *ctrsdir, *gpdir;
  281 +static int rcupreempt_debugfs_init(void)
  282 +{
  283 + rcudir = debugfs_create_dir("rcu", NULL);
  284 + if (!rcudir)
  285 + goto out;
  286 + statdir = debugfs_create_file("rcustats", 0444, rcudir,
  287 + NULL, &rcustats_fops);
  288 + if (!statdir)
  289 + goto free_out;
  290 +
  291 + gpdir = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops);
  292 + if (!gpdir)
  293 + goto free_out;
  294 +
  295 + ctrsdir = debugfs_create_file("rcuctrs", 0444, rcudir,
  296 + NULL, &rcuctrs_fops);
  297 + if (!ctrsdir)
  298 + goto free_out;
  299 + return 0;
  300 +free_out:
  301 + if (statdir)
  302 + debugfs_remove(statdir);
  303 + if (gpdir)
  304 + debugfs_remove(gpdir);
  305 + debugfs_remove(rcudir);
  306 +out:
  307 + return 1;
  308 +}
  309 +
  310 +static int __init rcupreempt_trace_init(void)
  311 +{
  312 + mutex_init(&rcupreempt_trace_mutex);
  313 + rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL);
  314 + if (!rcupreempt_trace_buf)
  315 + return 1;
  316 + return rcupreempt_debugfs_init();
  317 +}
  318 +
  319 +static void __exit rcupreempt_trace_cleanup(void)
  320 +{
  321 + debugfs_remove(statdir);
  322 + debugfs_remove(gpdir);
  323 + debugfs_remove(ctrsdir);
  324 + debugfs_remove(rcudir);
  325 + kfree(rcupreempt_trace_buf);
  326 +}
  327 +
  328 +
  329 +module_init(rcupreempt_trace_init);
  330 +module_exit(rcupreempt_trace_cleanup);