Commit 7b2e6011f150c42235c4a541d20cf6891afe878a

Authored by Paul E. McKenney
Committed by Paul E. McKenney
1 parent 489832609a

rcu: Rename ->onofflock to ->orphan_lock

The ->onofflock field in the rcu_state structure at one time synchronized
CPU-hotplug operations for RCU.  However, its scope has decreased over time
so that it now only protects the lists of orphaned RCU callbacks.  This
commit therefore renames it to ->orphan_lock to reflect its current use.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

Showing 3 changed files with 11 additions and 11 deletions Inline Diff

1 /* 1 /*
2 * Read-Copy Update mechanism for mutual exclusion 2 * Read-Copy Update mechanism for mutual exclusion
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * 17 *
18 * Copyright IBM Corporation, 2008 18 * Copyright IBM Corporation, 2008
19 * 19 *
20 * Authors: Dipankar Sarma <dipankar@in.ibm.com> 20 * Authors: Dipankar Sarma <dipankar@in.ibm.com>
21 * Manfred Spraul <manfred@colorfullife.com> 21 * Manfred Spraul <manfred@colorfullife.com>
22 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version 22 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version
23 * 23 *
24 * Based on the original work by Paul McKenney <paulmck@us.ibm.com> 24 * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
25 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 25 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
26 * 26 *
27 * For detailed explanation of Read-Copy Update mechanism see - 27 * For detailed explanation of Read-Copy Update mechanism see -
28 * Documentation/RCU 28 * Documentation/RCU
29 */ 29 */
30 #include <linux/types.h> 30 #include <linux/types.h>
31 #include <linux/kernel.h> 31 #include <linux/kernel.h>
32 #include <linux/init.h> 32 #include <linux/init.h>
33 #include <linux/spinlock.h> 33 #include <linux/spinlock.h>
34 #include <linux/smp.h> 34 #include <linux/smp.h>
35 #include <linux/rcupdate.h> 35 #include <linux/rcupdate.h>
36 #include <linux/interrupt.h> 36 #include <linux/interrupt.h>
37 #include <linux/sched.h> 37 #include <linux/sched.h>
38 #include <linux/nmi.h> 38 #include <linux/nmi.h>
39 #include <linux/atomic.h> 39 #include <linux/atomic.h>
40 #include <linux/bitops.h> 40 #include <linux/bitops.h>
41 #include <linux/export.h> 41 #include <linux/export.h>
42 #include <linux/completion.h> 42 #include <linux/completion.h>
43 #include <linux/moduleparam.h> 43 #include <linux/moduleparam.h>
44 #include <linux/percpu.h> 44 #include <linux/percpu.h>
45 #include <linux/notifier.h> 45 #include <linux/notifier.h>
46 #include <linux/cpu.h> 46 #include <linux/cpu.h>
47 #include <linux/mutex.h> 47 #include <linux/mutex.h>
48 #include <linux/time.h> 48 #include <linux/time.h>
49 #include <linux/kernel_stat.h> 49 #include <linux/kernel_stat.h>
50 #include <linux/wait.h> 50 #include <linux/wait.h>
51 #include <linux/kthread.h> 51 #include <linux/kthread.h>
52 #include <linux/prefetch.h> 52 #include <linux/prefetch.h>
53 #include <linux/delay.h> 53 #include <linux/delay.h>
54 #include <linux/stop_machine.h> 54 #include <linux/stop_machine.h>
55 #include <linux/random.h> 55 #include <linux/random.h>
56 56
57 #include "rcutree.h" 57 #include "rcutree.h"
58 #include <trace/events/rcu.h> 58 #include <trace/events/rcu.h>
59 59
60 #include "rcu.h" 60 #include "rcu.h"
61 61
62 /* Data structures. */ 62 /* Data structures. */
63 63
64 static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; 64 static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
65 static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; 65 static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
66 66
67 #define RCU_STATE_INITIALIZER(sname, cr) { \ 67 #define RCU_STATE_INITIALIZER(sname, cr) { \
68 .level = { &sname##_state.node[0] }, \ 68 .level = { &sname##_state.node[0] }, \
69 .call = cr, \ 69 .call = cr, \
70 .fqs_state = RCU_GP_IDLE, \ 70 .fqs_state = RCU_GP_IDLE, \
71 .gpnum = -300, \ 71 .gpnum = -300, \
72 .completed = -300, \ 72 .completed = -300, \
73 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \ 73 .orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \
74 .orphan_nxttail = &sname##_state.orphan_nxtlist, \ 74 .orphan_nxttail = &sname##_state.orphan_nxtlist, \
75 .orphan_donetail = &sname##_state.orphan_donelist, \ 75 .orphan_donetail = &sname##_state.orphan_donelist, \
76 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ 76 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
77 .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \ 77 .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
78 .name = #sname, \ 78 .name = #sname, \
79 } 79 }
80 80
81 struct rcu_state rcu_sched_state = 81 struct rcu_state rcu_sched_state =
82 RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched); 82 RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched);
83 DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); 83 DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
84 84
85 struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh); 85 struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh);
86 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 86 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
87 87
88 static struct rcu_state *rcu_state; 88 static struct rcu_state *rcu_state;
89 LIST_HEAD(rcu_struct_flavors); 89 LIST_HEAD(rcu_struct_flavors);
90 90
91 /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ 91 /* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */
92 static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; 92 static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF;
93 module_param(rcu_fanout_leaf, int, 0444); 93 module_param(rcu_fanout_leaf, int, 0444);
94 int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; 94 int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
95 static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */ 95 static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */
96 NUM_RCU_LVL_0, 96 NUM_RCU_LVL_0,
97 NUM_RCU_LVL_1, 97 NUM_RCU_LVL_1,
98 NUM_RCU_LVL_2, 98 NUM_RCU_LVL_2,
99 NUM_RCU_LVL_3, 99 NUM_RCU_LVL_3,
100 NUM_RCU_LVL_4, 100 NUM_RCU_LVL_4,
101 }; 101 };
102 int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ 102 int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
103 103
104 /* 104 /*
105 * The rcu_scheduler_active variable transitions from zero to one just 105 * The rcu_scheduler_active variable transitions from zero to one just
106 * before the first task is spawned. So when this variable is zero, RCU 106 * before the first task is spawned. So when this variable is zero, RCU
107 * can assume that there is but one task, allowing RCU to (for example) 107 * can assume that there is but one task, allowing RCU to (for example)
108 * optimized synchronize_sched() to a simple barrier(). When this variable 108 * optimized synchronize_sched() to a simple barrier(). When this variable
109 * is one, RCU must actually do all the hard work required to detect real 109 * is one, RCU must actually do all the hard work required to detect real
110 * grace periods. This variable is also used to suppress boot-time false 110 * grace periods. This variable is also used to suppress boot-time false
111 * positives from lockdep-RCU error checking. 111 * positives from lockdep-RCU error checking.
112 */ 112 */
113 int rcu_scheduler_active __read_mostly; 113 int rcu_scheduler_active __read_mostly;
114 EXPORT_SYMBOL_GPL(rcu_scheduler_active); 114 EXPORT_SYMBOL_GPL(rcu_scheduler_active);
115 115
116 /* 116 /*
117 * The rcu_scheduler_fully_active variable transitions from zero to one 117 * The rcu_scheduler_fully_active variable transitions from zero to one
118 * during the early_initcall() processing, which is after the scheduler 118 * during the early_initcall() processing, which is after the scheduler
119 * is capable of creating new tasks. So RCU processing (for example, 119 * is capable of creating new tasks. So RCU processing (for example,
120 * creating tasks for RCU priority boosting) must be delayed until after 120 * creating tasks for RCU priority boosting) must be delayed until after
121 * rcu_scheduler_fully_active transitions from zero to one. We also 121 * rcu_scheduler_fully_active transitions from zero to one. We also
122 * currently delay invocation of any RCU callbacks until after this point. 122 * currently delay invocation of any RCU callbacks until after this point.
123 * 123 *
124 * It might later prove better for people registering RCU callbacks during 124 * It might later prove better for people registering RCU callbacks during
125 * early boot to take responsibility for these callbacks, but one step at 125 * early boot to take responsibility for these callbacks, but one step at
126 * a time. 126 * a time.
127 */ 127 */
128 static int rcu_scheduler_fully_active __read_mostly; 128 static int rcu_scheduler_fully_active __read_mostly;
129 129
130 #ifdef CONFIG_RCU_BOOST 130 #ifdef CONFIG_RCU_BOOST
131 131
132 /* 132 /*
133 * Control variables for per-CPU and per-rcu_node kthreads. These 133 * Control variables for per-CPU and per-rcu_node kthreads. These
134 * handle all flavors of RCU. 134 * handle all flavors of RCU.
135 */ 135 */
136 static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); 136 static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
137 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); 137 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
138 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); 138 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
139 DEFINE_PER_CPU(char, rcu_cpu_has_work); 139 DEFINE_PER_CPU(char, rcu_cpu_has_work);
140 140
141 #endif /* #ifdef CONFIG_RCU_BOOST */ 141 #endif /* #ifdef CONFIG_RCU_BOOST */
142 142
143 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); 143 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
144 static void invoke_rcu_core(void); 144 static void invoke_rcu_core(void);
145 static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); 145 static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
146 146
147 /* 147 /*
148 * Track the rcutorture test sequence number and the update version 148 * Track the rcutorture test sequence number and the update version
149 * number within a given test. The rcutorture_testseq is incremented 149 * number within a given test. The rcutorture_testseq is incremented
150 * on every rcutorture module load and unload, so has an odd value 150 * on every rcutorture module load and unload, so has an odd value
151 * when a test is running. The rcutorture_vernum is set to zero 151 * when a test is running. The rcutorture_vernum is set to zero
152 * when rcutorture starts and is incremented on each rcutorture update. 152 * when rcutorture starts and is incremented on each rcutorture update.
153 * These variables enable correlating rcutorture output with the 153 * These variables enable correlating rcutorture output with the
154 * RCU tracing information. 154 * RCU tracing information.
155 */ 155 */
156 unsigned long rcutorture_testseq; 156 unsigned long rcutorture_testseq;
157 unsigned long rcutorture_vernum; 157 unsigned long rcutorture_vernum;
158 158
159 /* 159 /*
160 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s 160 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
161 * permit this function to be invoked without holding the root rcu_node 161 * permit this function to be invoked without holding the root rcu_node
162 * structure's ->lock, but of course results can be subject to change. 162 * structure's ->lock, but of course results can be subject to change.
163 */ 163 */
164 static int rcu_gp_in_progress(struct rcu_state *rsp) 164 static int rcu_gp_in_progress(struct rcu_state *rsp)
165 { 165 {
166 return ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum); 166 return ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum);
167 } 167 }
168 168
169 /* 169 /*
170 * Note a quiescent state. Because we do not need to know 170 * Note a quiescent state. Because we do not need to know
171 * how many quiescent states passed, just if there was at least 171 * how many quiescent states passed, just if there was at least
172 * one since the start of the grace period, this just sets a flag. 172 * one since the start of the grace period, this just sets a flag.
173 * The caller must have disabled preemption. 173 * The caller must have disabled preemption.
174 */ 174 */
175 void rcu_sched_qs(int cpu) 175 void rcu_sched_qs(int cpu)
176 { 176 {
177 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); 177 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
178 178
179 if (rdp->passed_quiesce == 0) 179 if (rdp->passed_quiesce == 0)
180 trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs"); 180 trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs");
181 rdp->passed_quiesce = 1; 181 rdp->passed_quiesce = 1;
182 } 182 }
183 183
184 void rcu_bh_qs(int cpu) 184 void rcu_bh_qs(int cpu)
185 { 185 {
186 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); 186 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
187 187
188 if (rdp->passed_quiesce == 0) 188 if (rdp->passed_quiesce == 0)
189 trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs"); 189 trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs");
190 rdp->passed_quiesce = 1; 190 rdp->passed_quiesce = 1;
191 } 191 }
192 192
193 /* 193 /*
194 * Note a context switch. This is a quiescent state for RCU-sched, 194 * Note a context switch. This is a quiescent state for RCU-sched,
195 * and requires special handling for preemptible RCU. 195 * and requires special handling for preemptible RCU.
196 * The caller must have disabled preemption. 196 * The caller must have disabled preemption.
197 */ 197 */
198 void rcu_note_context_switch(int cpu) 198 void rcu_note_context_switch(int cpu)
199 { 199 {
200 trace_rcu_utilization("Start context switch"); 200 trace_rcu_utilization("Start context switch");
201 rcu_sched_qs(cpu); 201 rcu_sched_qs(cpu);
202 rcu_preempt_note_context_switch(cpu); 202 rcu_preempt_note_context_switch(cpu);
203 trace_rcu_utilization("End context switch"); 203 trace_rcu_utilization("End context switch");
204 } 204 }
205 EXPORT_SYMBOL_GPL(rcu_note_context_switch); 205 EXPORT_SYMBOL_GPL(rcu_note_context_switch);
206 206
207 DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 207 DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
208 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, 208 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
209 .dynticks = ATOMIC_INIT(1), 209 .dynticks = ATOMIC_INIT(1),
210 #if defined(CONFIG_RCU_USER_QS) && !defined(CONFIG_RCU_USER_QS_FORCE) 210 #if defined(CONFIG_RCU_USER_QS) && !defined(CONFIG_RCU_USER_QS_FORCE)
211 .ignore_user_qs = true, 211 .ignore_user_qs = true,
212 #endif 212 #endif
213 }; 213 };
214 214
215 static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ 215 static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */
216 static int qhimark = 10000; /* If this many pending, ignore blimit. */ 216 static int qhimark = 10000; /* If this many pending, ignore blimit. */
217 static int qlowmark = 100; /* Once only this many pending, use blimit. */ 217 static int qlowmark = 100; /* Once only this many pending, use blimit. */
218 218
219 module_param(blimit, int, 0444); 219 module_param(blimit, int, 0444);
220 module_param(qhimark, int, 0444); 220 module_param(qhimark, int, 0444);
221 module_param(qlowmark, int, 0444); 221 module_param(qlowmark, int, 0444);
222 222
223 int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */ 223 int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
224 int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; 224 int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
225 225
226 module_param(rcu_cpu_stall_suppress, int, 0644); 226 module_param(rcu_cpu_stall_suppress, int, 0644);
227 module_param(rcu_cpu_stall_timeout, int, 0644); 227 module_param(rcu_cpu_stall_timeout, int, 0644);
228 228
229 static ulong jiffies_till_first_fqs = RCU_JIFFIES_TILL_FORCE_QS; 229 static ulong jiffies_till_first_fqs = RCU_JIFFIES_TILL_FORCE_QS;
230 static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS; 230 static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS;
231 231
232 module_param(jiffies_till_first_fqs, ulong, 0644); 232 module_param(jiffies_till_first_fqs, ulong, 0644);
233 module_param(jiffies_till_next_fqs, ulong, 0644); 233 module_param(jiffies_till_next_fqs, ulong, 0644);
234 234
235 static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)); 235 static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *));
236 static void force_quiescent_state(struct rcu_state *rsp); 236 static void force_quiescent_state(struct rcu_state *rsp);
237 static int rcu_pending(int cpu); 237 static int rcu_pending(int cpu);
238 238
239 /* 239 /*
240 * Return the number of RCU-sched batches processed thus far for debug & stats. 240 * Return the number of RCU-sched batches processed thus far for debug & stats.
241 */ 241 */
242 long rcu_batches_completed_sched(void) 242 long rcu_batches_completed_sched(void)
243 { 243 {
244 return rcu_sched_state.completed; 244 return rcu_sched_state.completed;
245 } 245 }
246 EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); 246 EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
247 247
248 /* 248 /*
249 * Return the number of RCU BH batches processed thus far for debug & stats. 249 * Return the number of RCU BH batches processed thus far for debug & stats.
250 */ 250 */
251 long rcu_batches_completed_bh(void) 251 long rcu_batches_completed_bh(void)
252 { 252 {
253 return rcu_bh_state.completed; 253 return rcu_bh_state.completed;
254 } 254 }
255 EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); 255 EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
256 256
257 /* 257 /*
258 * Force a quiescent state for RCU BH. 258 * Force a quiescent state for RCU BH.
259 */ 259 */
260 void rcu_bh_force_quiescent_state(void) 260 void rcu_bh_force_quiescent_state(void)
261 { 261 {
262 force_quiescent_state(&rcu_bh_state); 262 force_quiescent_state(&rcu_bh_state);
263 } 263 }
264 EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); 264 EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
265 265
266 /* 266 /*
267 * Record the number of times rcutorture tests have been initiated and 267 * Record the number of times rcutorture tests have been initiated and
268 * terminated. This information allows the debugfs tracing stats to be 268 * terminated. This information allows the debugfs tracing stats to be
269 * correlated to the rcutorture messages, even when the rcutorture module 269 * correlated to the rcutorture messages, even when the rcutorture module
270 * is being repeatedly loaded and unloaded. In other words, we cannot 270 * is being repeatedly loaded and unloaded. In other words, we cannot
271 * store this state in rcutorture itself. 271 * store this state in rcutorture itself.
272 */ 272 */
273 void rcutorture_record_test_transition(void) 273 void rcutorture_record_test_transition(void)
274 { 274 {
275 rcutorture_testseq++; 275 rcutorture_testseq++;
276 rcutorture_vernum = 0; 276 rcutorture_vernum = 0;
277 } 277 }
278 EXPORT_SYMBOL_GPL(rcutorture_record_test_transition); 278 EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
279 279
280 /* 280 /*
281 * Record the number of writer passes through the current rcutorture test. 281 * Record the number of writer passes through the current rcutorture test.
282 * This is also used to correlate debugfs tracing stats with the rcutorture 282 * This is also used to correlate debugfs tracing stats with the rcutorture
283 * messages. 283 * messages.
284 */ 284 */
285 void rcutorture_record_progress(unsigned long vernum) 285 void rcutorture_record_progress(unsigned long vernum)
286 { 286 {
287 rcutorture_vernum++; 287 rcutorture_vernum++;
288 } 288 }
289 EXPORT_SYMBOL_GPL(rcutorture_record_progress); 289 EXPORT_SYMBOL_GPL(rcutorture_record_progress);
290 290
291 /* 291 /*
292 * Force a quiescent state for RCU-sched. 292 * Force a quiescent state for RCU-sched.
293 */ 293 */
294 void rcu_sched_force_quiescent_state(void) 294 void rcu_sched_force_quiescent_state(void)
295 { 295 {
296 force_quiescent_state(&rcu_sched_state); 296 force_quiescent_state(&rcu_sched_state);
297 } 297 }
298 EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); 298 EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
299 299
300 /* 300 /*
301 * Does the CPU have callbacks ready to be invoked? 301 * Does the CPU have callbacks ready to be invoked?
302 */ 302 */
303 static int 303 static int
304 cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp) 304 cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
305 { 305 {
306 return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]; 306 return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL];
307 } 307 }
308 308
309 /* 309 /*
310 * Does the current CPU require a yet-as-unscheduled grace period? 310 * Does the current CPU require a yet-as-unscheduled grace period?
311 */ 311 */
312 static int 312 static int
313 cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) 313 cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
314 { 314 {
315 return *rdp->nxttail[RCU_DONE_TAIL + 315 return *rdp->nxttail[RCU_DONE_TAIL +
316 ACCESS_ONCE(rsp->completed) != rdp->completed] && 316 ACCESS_ONCE(rsp->completed) != rdp->completed] &&
317 !rcu_gp_in_progress(rsp); 317 !rcu_gp_in_progress(rsp);
318 } 318 }
319 319
320 /* 320 /*
321 * Return the root node of the specified rcu_state structure. 321 * Return the root node of the specified rcu_state structure.
322 */ 322 */
323 static struct rcu_node *rcu_get_root(struct rcu_state *rsp) 323 static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
324 { 324 {
325 return &rsp->node[0]; 325 return &rsp->node[0];
326 } 326 }
327 327
328 /* 328 /*
329 * rcu_eqs_enter_common - current CPU is moving towards extended quiescent state 329 * rcu_eqs_enter_common - current CPU is moving towards extended quiescent state
330 * 330 *
331 * If the new value of the ->dynticks_nesting counter now is zero, 331 * If the new value of the ->dynticks_nesting counter now is zero,
332 * we really have entered idle, and must do the appropriate accounting. 332 * we really have entered idle, and must do the appropriate accounting.
333 * The caller must have disabled interrupts. 333 * The caller must have disabled interrupts.
334 */ 334 */
335 static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, 335 static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
336 bool user) 336 bool user)
337 { 337 {
338 trace_rcu_dyntick("Start", oldval, 0); 338 trace_rcu_dyntick("Start", oldval, 0);
339 if (!user && !is_idle_task(current)) { 339 if (!user && !is_idle_task(current)) {
340 struct task_struct *idle = idle_task(smp_processor_id()); 340 struct task_struct *idle = idle_task(smp_processor_id());
341 341
342 trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); 342 trace_rcu_dyntick("Error on entry: not idle task", oldval, 0);
343 ftrace_dump(DUMP_ORIG); 343 ftrace_dump(DUMP_ORIG);
344 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", 344 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
345 current->pid, current->comm, 345 current->pid, current->comm,
346 idle->pid, idle->comm); /* must be idle task! */ 346 idle->pid, idle->comm); /* must be idle task! */
347 } 347 }
348 rcu_prepare_for_idle(smp_processor_id()); 348 rcu_prepare_for_idle(smp_processor_id());
349 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ 349 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
350 smp_mb__before_atomic_inc(); /* See above. */ 350 smp_mb__before_atomic_inc(); /* See above. */
351 atomic_inc(&rdtp->dynticks); 351 atomic_inc(&rdtp->dynticks);
352 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ 352 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
353 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 353 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
354 354
355 /* 355 /*
356 * It is illegal to enter an extended quiescent state while 356 * It is illegal to enter an extended quiescent state while
357 * in an RCU read-side critical section. 357 * in an RCU read-side critical section.
358 */ 358 */
359 rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), 359 rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
360 "Illegal idle entry in RCU read-side critical section."); 360 "Illegal idle entry in RCU read-side critical section.");
361 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), 361 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map),
362 "Illegal idle entry in RCU-bh read-side critical section."); 362 "Illegal idle entry in RCU-bh read-side critical section.");
363 rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), 363 rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map),
364 "Illegal idle entry in RCU-sched read-side critical section."); 364 "Illegal idle entry in RCU-sched read-side critical section.");
365 } 365 }
366 366
367 /* 367 /*
368 * Enter an RCU extended quiescent state, which can be either the 368 * Enter an RCU extended quiescent state, which can be either the
369 * idle loop or adaptive-tickless usermode execution. 369 * idle loop or adaptive-tickless usermode execution.
370 */ 370 */
371 static void rcu_eqs_enter(bool user) 371 static void rcu_eqs_enter(bool user)
372 { 372 {
373 long long oldval; 373 long long oldval;
374 struct rcu_dynticks *rdtp; 374 struct rcu_dynticks *rdtp;
375 375
376 rdtp = &__get_cpu_var(rcu_dynticks); 376 rdtp = &__get_cpu_var(rcu_dynticks);
377 oldval = rdtp->dynticks_nesting; 377 oldval = rdtp->dynticks_nesting;
378 WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); 378 WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
379 if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) 379 if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
380 rdtp->dynticks_nesting = 0; 380 rdtp->dynticks_nesting = 0;
381 else 381 else
382 rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; 382 rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
383 rcu_eqs_enter_common(rdtp, oldval, user); 383 rcu_eqs_enter_common(rdtp, oldval, user);
384 } 384 }
385 385
386 /** 386 /**
387 * rcu_idle_enter - inform RCU that current CPU is entering idle 387 * rcu_idle_enter - inform RCU that current CPU is entering idle
388 * 388 *
389 * Enter idle mode, in other words, -leave- the mode in which RCU 389 * Enter idle mode, in other words, -leave- the mode in which RCU
390 * read-side critical sections can occur. (Though RCU read-side 390 * read-side critical sections can occur. (Though RCU read-side
391 * critical sections can occur in irq handlers in idle, a possibility 391 * critical sections can occur in irq handlers in idle, a possibility
392 * handled by irq_enter() and irq_exit().) 392 * handled by irq_enter() and irq_exit().)
393 * 393 *
394 * We crowbar the ->dynticks_nesting field to zero to allow for 394 * We crowbar the ->dynticks_nesting field to zero to allow for
395 * the possibility of usermode upcalls having messed up our count 395 * the possibility of usermode upcalls having messed up our count
396 * of interrupt nesting level during the prior busy period. 396 * of interrupt nesting level during the prior busy period.
397 */ 397 */
398 void rcu_idle_enter(void) 398 void rcu_idle_enter(void)
399 { 399 {
400 unsigned long flags; 400 unsigned long flags;
401 401
402 local_irq_save(flags); 402 local_irq_save(flags);
403 rcu_eqs_enter(false); 403 rcu_eqs_enter(false);
404 local_irq_restore(flags); 404 local_irq_restore(flags);
405 } 405 }
406 EXPORT_SYMBOL_GPL(rcu_idle_enter); 406 EXPORT_SYMBOL_GPL(rcu_idle_enter);
407 407
408 #ifdef CONFIG_RCU_USER_QS 408 #ifdef CONFIG_RCU_USER_QS
409 /** 409 /**
410 * rcu_user_enter - inform RCU that we are resuming userspace. 410 * rcu_user_enter - inform RCU that we are resuming userspace.
411 * 411 *
412 * Enter RCU idle mode right before resuming userspace. No use of RCU 412 * Enter RCU idle mode right before resuming userspace. No use of RCU
413 * is permitted between this call and rcu_user_exit(). This way the 413 * is permitted between this call and rcu_user_exit(). This way the
414 * CPU doesn't need to maintain the tick for RCU maintenance purposes 414 * CPU doesn't need to maintain the tick for RCU maintenance purposes
415 * when the CPU runs in userspace. 415 * when the CPU runs in userspace.
416 */ 416 */
417 void rcu_user_enter(void) 417 void rcu_user_enter(void)
418 { 418 {
419 unsigned long flags; 419 unsigned long flags;
420 struct rcu_dynticks *rdtp; 420 struct rcu_dynticks *rdtp;
421 421
422 /* 422 /*
423 * Some contexts may involve an exception occuring in an irq, 423 * Some contexts may involve an exception occuring in an irq,
424 * leading to that nesting: 424 * leading to that nesting:
425 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() 425 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
426 * This would mess up the dyntick_nesting count though. And rcu_irq_*() 426 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
427 * helpers are enough to protect RCU uses inside the exception. So 427 * helpers are enough to protect RCU uses inside the exception. So
428 * just return immediately if we detect we are in an IRQ. 428 * just return immediately if we detect we are in an IRQ.
429 */ 429 */
430 if (in_interrupt()) 430 if (in_interrupt())
431 return; 431 return;
432 432
433 WARN_ON_ONCE(!current->mm); 433 WARN_ON_ONCE(!current->mm);
434 434
435 local_irq_save(flags); 435 local_irq_save(flags);
436 rdtp = &__get_cpu_var(rcu_dynticks); 436 rdtp = &__get_cpu_var(rcu_dynticks);
437 if (!rdtp->ignore_user_qs && !rdtp->in_user) { 437 if (!rdtp->ignore_user_qs && !rdtp->in_user) {
438 rdtp->in_user = true; 438 rdtp->in_user = true;
439 rcu_eqs_enter(true); 439 rcu_eqs_enter(true);
440 } 440 }
441 local_irq_restore(flags); 441 local_irq_restore(flags);
442 } 442 }
443 443
444 /** 444 /**
445 * rcu_user_enter_after_irq - inform RCU that we are going to resume userspace 445 * rcu_user_enter_after_irq - inform RCU that we are going to resume userspace
446 * after the current irq returns. 446 * after the current irq returns.
447 * 447 *
448 * This is similar to rcu_user_enter() but in the context of a non-nesting 448 * This is similar to rcu_user_enter() but in the context of a non-nesting
449 * irq. After this call, RCU enters into idle mode when the interrupt 449 * irq. After this call, RCU enters into idle mode when the interrupt
450 * returns. 450 * returns.
451 */ 451 */
452 void rcu_user_enter_after_irq(void) 452 void rcu_user_enter_after_irq(void)
453 { 453 {
454 unsigned long flags; 454 unsigned long flags;
455 struct rcu_dynticks *rdtp; 455 struct rcu_dynticks *rdtp;
456 456
457 local_irq_save(flags); 457 local_irq_save(flags);
458 rdtp = &__get_cpu_var(rcu_dynticks); 458 rdtp = &__get_cpu_var(rcu_dynticks);
459 /* Ensure this irq is interrupting a non-idle RCU state. */ 459 /* Ensure this irq is interrupting a non-idle RCU state. */
460 WARN_ON_ONCE(!(rdtp->dynticks_nesting & DYNTICK_TASK_MASK)); 460 WARN_ON_ONCE(!(rdtp->dynticks_nesting & DYNTICK_TASK_MASK));
461 rdtp->dynticks_nesting = 1; 461 rdtp->dynticks_nesting = 1;
462 local_irq_restore(flags); 462 local_irq_restore(flags);
463 } 463 }
464 #endif /* CONFIG_RCU_USER_QS */ 464 #endif /* CONFIG_RCU_USER_QS */
465 465
466 /** 466 /**
467 * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle 467 * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
468 * 468 *
469 * Exit from an interrupt handler, which might possibly result in entering 469 * Exit from an interrupt handler, which might possibly result in entering
470 * idle mode, in other words, leaving the mode in which read-side critical 470 * idle mode, in other words, leaving the mode in which read-side critical
471 * sections can occur. 471 * sections can occur.
472 * 472 *
473 * This code assumes that the idle loop never does anything that might 473 * This code assumes that the idle loop never does anything that might
474 * result in unbalanced calls to irq_enter() and irq_exit(). If your 474 * result in unbalanced calls to irq_enter() and irq_exit(). If your
475 * architecture violates this assumption, RCU will give you what you 475 * architecture violates this assumption, RCU will give you what you
476 * deserve, good and hard. But very infrequently and irreproducibly. 476 * deserve, good and hard. But very infrequently and irreproducibly.
477 * 477 *
478 * Use things like work queues to work around this limitation. 478 * Use things like work queues to work around this limitation.
479 * 479 *
480 * You have been warned. 480 * You have been warned.
481 */ 481 */
482 void rcu_irq_exit(void) 482 void rcu_irq_exit(void)
483 { 483 {
484 unsigned long flags; 484 unsigned long flags;
485 long long oldval; 485 long long oldval;
486 struct rcu_dynticks *rdtp; 486 struct rcu_dynticks *rdtp;
487 487
488 local_irq_save(flags); 488 local_irq_save(flags);
489 rdtp = &__get_cpu_var(rcu_dynticks); 489 rdtp = &__get_cpu_var(rcu_dynticks);
490 oldval = rdtp->dynticks_nesting; 490 oldval = rdtp->dynticks_nesting;
491 rdtp->dynticks_nesting--; 491 rdtp->dynticks_nesting--;
492 WARN_ON_ONCE(rdtp->dynticks_nesting < 0); 492 WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
493 if (rdtp->dynticks_nesting) 493 if (rdtp->dynticks_nesting)
494 trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting); 494 trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting);
495 else 495 else
496 rcu_eqs_enter_common(rdtp, oldval, true); 496 rcu_eqs_enter_common(rdtp, oldval, true);
497 local_irq_restore(flags); 497 local_irq_restore(flags);
498 } 498 }
499 499
500 /* 500 /*
501 * rcu_eqs_exit_common - current CPU moving away from extended quiescent state 501 * rcu_eqs_exit_common - current CPU moving away from extended quiescent state
502 * 502 *
503 * If the new value of the ->dynticks_nesting counter was previously zero, 503 * If the new value of the ->dynticks_nesting counter was previously zero,
504 * we really have exited idle, and must do the appropriate accounting. 504 * we really have exited idle, and must do the appropriate accounting.
505 * The caller must have disabled interrupts. 505 * The caller must have disabled interrupts.
506 */ 506 */
507 static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval, 507 static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
508 int user) 508 int user)
509 { 509 {
510 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ 510 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */
511 atomic_inc(&rdtp->dynticks); 511 atomic_inc(&rdtp->dynticks);
512 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 512 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
513 smp_mb__after_atomic_inc(); /* See above. */ 513 smp_mb__after_atomic_inc(); /* See above. */
514 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 514 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
515 rcu_cleanup_after_idle(smp_processor_id()); 515 rcu_cleanup_after_idle(smp_processor_id());
516 trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); 516 trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting);
517 if (!user && !is_idle_task(current)) { 517 if (!user && !is_idle_task(current)) {
518 struct task_struct *idle = idle_task(smp_processor_id()); 518 struct task_struct *idle = idle_task(smp_processor_id());
519 519
520 trace_rcu_dyntick("Error on exit: not idle task", 520 trace_rcu_dyntick("Error on exit: not idle task",
521 oldval, rdtp->dynticks_nesting); 521 oldval, rdtp->dynticks_nesting);
522 ftrace_dump(DUMP_ORIG); 522 ftrace_dump(DUMP_ORIG);
523 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", 523 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
524 current->pid, current->comm, 524 current->pid, current->comm,
525 idle->pid, idle->comm); /* must be idle task! */ 525 idle->pid, idle->comm); /* must be idle task! */
526 } 526 }
527 } 527 }
528 528
529 /* 529 /*
530 * Exit an RCU extended quiescent state, which can be either the 530 * Exit an RCU extended quiescent state, which can be either the
531 * idle loop or adaptive-tickless usermode execution. 531 * idle loop or adaptive-tickless usermode execution.
532 */ 532 */
533 static void rcu_eqs_exit(bool user) 533 static void rcu_eqs_exit(bool user)
534 { 534 {
535 struct rcu_dynticks *rdtp; 535 struct rcu_dynticks *rdtp;
536 long long oldval; 536 long long oldval;
537 537
538 rdtp = &__get_cpu_var(rcu_dynticks); 538 rdtp = &__get_cpu_var(rcu_dynticks);
539 oldval = rdtp->dynticks_nesting; 539 oldval = rdtp->dynticks_nesting;
540 WARN_ON_ONCE(oldval < 0); 540 WARN_ON_ONCE(oldval < 0);
541 if (oldval & DYNTICK_TASK_NEST_MASK) 541 if (oldval & DYNTICK_TASK_NEST_MASK)
542 rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; 542 rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
543 else 543 else
544 rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; 544 rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
545 rcu_eqs_exit_common(rdtp, oldval, user); 545 rcu_eqs_exit_common(rdtp, oldval, user);
546 } 546 }
547 547
548 /** 548 /**
549 * rcu_idle_exit - inform RCU that current CPU is leaving idle 549 * rcu_idle_exit - inform RCU that current CPU is leaving idle
550 * 550 *
551 * Exit idle mode, in other words, -enter- the mode in which RCU 551 * Exit idle mode, in other words, -enter- the mode in which RCU
552 * read-side critical sections can occur. 552 * read-side critical sections can occur.
553 * 553 *
554 * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NEST to 554 * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NEST to
555 * allow for the possibility of usermode upcalls messing up our count 555 * allow for the possibility of usermode upcalls messing up our count
556 * of interrupt nesting level during the busy period that is just 556 * of interrupt nesting level during the busy period that is just
557 * now starting. 557 * now starting.
558 */ 558 */
559 void rcu_idle_exit(void) 559 void rcu_idle_exit(void)
560 { 560 {
561 unsigned long flags; 561 unsigned long flags;
562 562
563 local_irq_save(flags); 563 local_irq_save(flags);
564 rcu_eqs_exit(false); 564 rcu_eqs_exit(false);
565 local_irq_restore(flags); 565 local_irq_restore(flags);
566 } 566 }
567 EXPORT_SYMBOL_GPL(rcu_idle_exit); 567 EXPORT_SYMBOL_GPL(rcu_idle_exit);
568 568
569 #ifdef CONFIG_RCU_USER_QS 569 #ifdef CONFIG_RCU_USER_QS
570 /** 570 /**
571 * rcu_user_exit - inform RCU that we are exiting userspace. 571 * rcu_user_exit - inform RCU that we are exiting userspace.
572 * 572 *
573 * Exit RCU idle mode while entering the kernel because it can 573 * Exit RCU idle mode while entering the kernel because it can
574 * run a RCU read side critical section anytime. 574 * run a RCU read side critical section anytime.
575 */ 575 */
576 void rcu_user_exit(void) 576 void rcu_user_exit(void)
577 { 577 {
578 unsigned long flags; 578 unsigned long flags;
579 struct rcu_dynticks *rdtp; 579 struct rcu_dynticks *rdtp;
580 580
581 /* 581 /*
582 * Some contexts may involve an exception occuring in an irq, 582 * Some contexts may involve an exception occuring in an irq,
583 * leading to that nesting: 583 * leading to that nesting:
584 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() 584 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
585 * This would mess up the dyntick_nesting count though. And rcu_irq_*() 585 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
586 * helpers are enough to protect RCU uses inside the exception. So 586 * helpers are enough to protect RCU uses inside the exception. So
587 * just return immediately if we detect we are in an IRQ. 587 * just return immediately if we detect we are in an IRQ.
588 */ 588 */
589 if (in_interrupt()) 589 if (in_interrupt())
590 return; 590 return;
591 591
592 local_irq_save(flags); 592 local_irq_save(flags);
593 rdtp = &__get_cpu_var(rcu_dynticks); 593 rdtp = &__get_cpu_var(rcu_dynticks);
594 if (rdtp->in_user) { 594 if (rdtp->in_user) {
595 rdtp->in_user = false; 595 rdtp->in_user = false;
596 rcu_eqs_exit(true); 596 rcu_eqs_exit(true);
597 } 597 }
598 local_irq_restore(flags); 598 local_irq_restore(flags);
599 } 599 }
600 600
601 /** 601 /**
602 * rcu_user_exit_after_irq - inform RCU that we won't resume to userspace 602 * rcu_user_exit_after_irq - inform RCU that we won't resume to userspace
603 * idle mode after the current non-nesting irq returns. 603 * idle mode after the current non-nesting irq returns.
604 * 604 *
605 * This is similar to rcu_user_exit() but in the context of an irq. 605 * This is similar to rcu_user_exit() but in the context of an irq.
606 * This is called when the irq has interrupted a userspace RCU idle mode 606 * This is called when the irq has interrupted a userspace RCU idle mode
607 * context. When the current non-nesting interrupt returns after this call, 607 * context. When the current non-nesting interrupt returns after this call,
608 * the CPU won't restore the RCU idle mode. 608 * the CPU won't restore the RCU idle mode.
609 */ 609 */
610 void rcu_user_exit_after_irq(void) 610 void rcu_user_exit_after_irq(void)
611 { 611 {
612 unsigned long flags; 612 unsigned long flags;
613 struct rcu_dynticks *rdtp; 613 struct rcu_dynticks *rdtp;
614 614
615 local_irq_save(flags); 615 local_irq_save(flags);
616 rdtp = &__get_cpu_var(rcu_dynticks); 616 rdtp = &__get_cpu_var(rcu_dynticks);
617 /* Ensure we are interrupting an RCU idle mode. */ 617 /* Ensure we are interrupting an RCU idle mode. */
618 WARN_ON_ONCE(rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK); 618 WARN_ON_ONCE(rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK);
619 rdtp->dynticks_nesting += DYNTICK_TASK_EXIT_IDLE; 619 rdtp->dynticks_nesting += DYNTICK_TASK_EXIT_IDLE;
620 local_irq_restore(flags); 620 local_irq_restore(flags);
621 } 621 }
622 #endif /* CONFIG_RCU_USER_QS */ 622 #endif /* CONFIG_RCU_USER_QS */
623 623
624 /** 624 /**
625 * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle 625 * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
626 * 626 *
627 * Enter an interrupt handler, which might possibly result in exiting 627 * Enter an interrupt handler, which might possibly result in exiting
628 * idle mode, in other words, entering the mode in which read-side critical 628 * idle mode, in other words, entering the mode in which read-side critical
629 * sections can occur. 629 * sections can occur.
630 * 630 *
631 * Note that the Linux kernel is fully capable of entering an interrupt 631 * Note that the Linux kernel is fully capable of entering an interrupt
632 * handler that it never exits, for example when doing upcalls to 632 * handler that it never exits, for example when doing upcalls to
633 * user mode! This code assumes that the idle loop never does upcalls to 633 * user mode! This code assumes that the idle loop never does upcalls to
634 * user mode. If your architecture does do upcalls from the idle loop (or 634 * user mode. If your architecture does do upcalls from the idle loop (or
635 * does anything else that results in unbalanced calls to the irq_enter() 635 * does anything else that results in unbalanced calls to the irq_enter()
636 * and irq_exit() functions), RCU will give you what you deserve, good 636 * and irq_exit() functions), RCU will give you what you deserve, good
637 * and hard. But very infrequently and irreproducibly. 637 * and hard. But very infrequently and irreproducibly.
638 * 638 *
639 * Use things like work queues to work around this limitation. 639 * Use things like work queues to work around this limitation.
640 * 640 *
641 * You have been warned. 641 * You have been warned.
642 */ 642 */
643 void rcu_irq_enter(void) 643 void rcu_irq_enter(void)
644 { 644 {
645 unsigned long flags; 645 unsigned long flags;
646 struct rcu_dynticks *rdtp; 646 struct rcu_dynticks *rdtp;
647 long long oldval; 647 long long oldval;
648 648
649 local_irq_save(flags); 649 local_irq_save(flags);
650 rdtp = &__get_cpu_var(rcu_dynticks); 650 rdtp = &__get_cpu_var(rcu_dynticks);
651 oldval = rdtp->dynticks_nesting; 651 oldval = rdtp->dynticks_nesting;
652 rdtp->dynticks_nesting++; 652 rdtp->dynticks_nesting++;
653 WARN_ON_ONCE(rdtp->dynticks_nesting == 0); 653 WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
654 if (oldval) 654 if (oldval)
655 trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting); 655 trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting);
656 else 656 else
657 rcu_eqs_exit_common(rdtp, oldval, true); 657 rcu_eqs_exit_common(rdtp, oldval, true);
658 local_irq_restore(flags); 658 local_irq_restore(flags);
659 } 659 }
660 660
661 /** 661 /**
662 * rcu_nmi_enter - inform RCU of entry to NMI context 662 * rcu_nmi_enter - inform RCU of entry to NMI context
663 * 663 *
664 * If the CPU was idle with dynamic ticks active, and there is no 664 * If the CPU was idle with dynamic ticks active, and there is no
665 * irq handler running, this updates rdtp->dynticks_nmi to let the 665 * irq handler running, this updates rdtp->dynticks_nmi to let the
666 * RCU grace-period handling know that the CPU is active. 666 * RCU grace-period handling know that the CPU is active.
667 */ 667 */
668 void rcu_nmi_enter(void) 668 void rcu_nmi_enter(void)
669 { 669 {
670 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 670 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
671 671
672 if (rdtp->dynticks_nmi_nesting == 0 && 672 if (rdtp->dynticks_nmi_nesting == 0 &&
673 (atomic_read(&rdtp->dynticks) & 0x1)) 673 (atomic_read(&rdtp->dynticks) & 0x1))
674 return; 674 return;
675 rdtp->dynticks_nmi_nesting++; 675 rdtp->dynticks_nmi_nesting++;
676 smp_mb__before_atomic_inc(); /* Force delay from prior write. */ 676 smp_mb__before_atomic_inc(); /* Force delay from prior write. */
677 atomic_inc(&rdtp->dynticks); 677 atomic_inc(&rdtp->dynticks);
678 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 678 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
679 smp_mb__after_atomic_inc(); /* See above. */ 679 smp_mb__after_atomic_inc(); /* See above. */
680 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 680 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
681 } 681 }
682 682
683 /** 683 /**
684 * rcu_nmi_exit - inform RCU of exit from NMI context 684 * rcu_nmi_exit - inform RCU of exit from NMI context
685 * 685 *
686 * If the CPU was idle with dynamic ticks active, and there is no 686 * If the CPU was idle with dynamic ticks active, and there is no
687 * irq handler running, this updates rdtp->dynticks_nmi to let the 687 * irq handler running, this updates rdtp->dynticks_nmi to let the
688 * RCU grace-period handling know that the CPU is no longer active. 688 * RCU grace-period handling know that the CPU is no longer active.
689 */ 689 */
690 void rcu_nmi_exit(void) 690 void rcu_nmi_exit(void)
691 { 691 {
692 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 692 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
693 693
694 if (rdtp->dynticks_nmi_nesting == 0 || 694 if (rdtp->dynticks_nmi_nesting == 0 ||
695 --rdtp->dynticks_nmi_nesting != 0) 695 --rdtp->dynticks_nmi_nesting != 0)
696 return; 696 return;
697 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ 697 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
698 smp_mb__before_atomic_inc(); /* See above. */ 698 smp_mb__before_atomic_inc(); /* See above. */
699 atomic_inc(&rdtp->dynticks); 699 atomic_inc(&rdtp->dynticks);
700 smp_mb__after_atomic_inc(); /* Force delay to next write. */ 700 smp_mb__after_atomic_inc(); /* Force delay to next write. */
701 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 701 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
702 } 702 }
703 703
704 /** 704 /**
705 * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle 705 * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle
706 * 706 *
707 * If the current CPU is in its idle loop and is neither in an interrupt 707 * If the current CPU is in its idle loop and is neither in an interrupt
708 * or NMI handler, return true. 708 * or NMI handler, return true.
709 */ 709 */
710 int rcu_is_cpu_idle(void) 710 int rcu_is_cpu_idle(void)
711 { 711 {
712 int ret; 712 int ret;
713 713
714 preempt_disable(); 714 preempt_disable();
715 ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0; 715 ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0;
716 preempt_enable(); 716 preempt_enable();
717 return ret; 717 return ret;
718 } 718 }
719 EXPORT_SYMBOL(rcu_is_cpu_idle); 719 EXPORT_SYMBOL(rcu_is_cpu_idle);
720 720
721 #ifdef CONFIG_RCU_USER_QS 721 #ifdef CONFIG_RCU_USER_QS
722 void rcu_user_hooks_switch(struct task_struct *prev, 722 void rcu_user_hooks_switch(struct task_struct *prev,
723 struct task_struct *next) 723 struct task_struct *next)
724 { 724 {
725 struct rcu_dynticks *rdtp; 725 struct rcu_dynticks *rdtp;
726 726
727 /* Interrupts are disabled in context switch */ 727 /* Interrupts are disabled in context switch */
728 rdtp = &__get_cpu_var(rcu_dynticks); 728 rdtp = &__get_cpu_var(rcu_dynticks);
729 if (!rdtp->ignore_user_qs) { 729 if (!rdtp->ignore_user_qs) {
730 clear_tsk_thread_flag(prev, TIF_NOHZ); 730 clear_tsk_thread_flag(prev, TIF_NOHZ);
731 set_tsk_thread_flag(next, TIF_NOHZ); 731 set_tsk_thread_flag(next, TIF_NOHZ);
732 } 732 }
733 } 733 }
734 #endif /* #ifdef CONFIG_RCU_USER_QS */ 734 #endif /* #ifdef CONFIG_RCU_USER_QS */
735 735
736 #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) 736 #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
737 737
738 /* 738 /*
739 * Is the current CPU online? Disable preemption to avoid false positives 739 * Is the current CPU online? Disable preemption to avoid false positives
740 * that could otherwise happen due to the current CPU number being sampled, 740 * that could otherwise happen due to the current CPU number being sampled,
741 * this task being preempted, its old CPU being taken offline, resuming 741 * this task being preempted, its old CPU being taken offline, resuming
742 * on some other CPU, then determining that its old CPU is now offline. 742 * on some other CPU, then determining that its old CPU is now offline.
743 * It is OK to use RCU on an offline processor during initial boot, hence 743 * It is OK to use RCU on an offline processor during initial boot, hence
744 * the check for rcu_scheduler_fully_active. Note also that it is OK 744 * the check for rcu_scheduler_fully_active. Note also that it is OK
745 * for a CPU coming online to use RCU for one jiffy prior to marking itself 745 * for a CPU coming online to use RCU for one jiffy prior to marking itself
746 * online in the cpu_online_mask. Similarly, it is OK for a CPU going 746 * online in the cpu_online_mask. Similarly, it is OK for a CPU going
747 * offline to continue to use RCU for one jiffy after marking itself 747 * offline to continue to use RCU for one jiffy after marking itself
748 * offline in the cpu_online_mask. This leniency is necessary given the 748 * offline in the cpu_online_mask. This leniency is necessary given the
749 * non-atomic nature of the online and offline processing, for example, 749 * non-atomic nature of the online and offline processing, for example,
750 * the fact that a CPU enters the scheduler after completing the CPU_DYING 750 * the fact that a CPU enters the scheduler after completing the CPU_DYING
751 * notifiers. 751 * notifiers.
752 * 752 *
753 * This is also why RCU internally marks CPUs online during the 753 * This is also why RCU internally marks CPUs online during the
754 * CPU_UP_PREPARE phase and offline during the CPU_DEAD phase. 754 * CPU_UP_PREPARE phase and offline during the CPU_DEAD phase.
755 * 755 *
756 * Disable checking if in an NMI handler because we cannot safely report 756 * Disable checking if in an NMI handler because we cannot safely report
757 * errors from NMI handlers anyway. 757 * errors from NMI handlers anyway.
758 */ 758 */
759 bool rcu_lockdep_current_cpu_online(void) 759 bool rcu_lockdep_current_cpu_online(void)
760 { 760 {
761 struct rcu_data *rdp; 761 struct rcu_data *rdp;
762 struct rcu_node *rnp; 762 struct rcu_node *rnp;
763 bool ret; 763 bool ret;
764 764
765 if (in_nmi()) 765 if (in_nmi())
766 return 1; 766 return 1;
767 preempt_disable(); 767 preempt_disable();
768 rdp = &__get_cpu_var(rcu_sched_data); 768 rdp = &__get_cpu_var(rcu_sched_data);
769 rnp = rdp->mynode; 769 rnp = rdp->mynode;
770 ret = (rdp->grpmask & rnp->qsmaskinit) || 770 ret = (rdp->grpmask & rnp->qsmaskinit) ||
771 !rcu_scheduler_fully_active; 771 !rcu_scheduler_fully_active;
772 preempt_enable(); 772 preempt_enable();
773 return ret; 773 return ret;
774 } 774 }
775 EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); 775 EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
776 776
777 #endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */ 777 #endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
778 778
779 /** 779 /**
780 * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle 780 * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
781 * 781 *
782 * If the current CPU is idle or running at a first-level (not nested) 782 * If the current CPU is idle or running at a first-level (not nested)
783 * interrupt from idle, return true. The caller must have at least 783 * interrupt from idle, return true. The caller must have at least
784 * disabled preemption. 784 * disabled preemption.
785 */ 785 */
786 int rcu_is_cpu_rrupt_from_idle(void) 786 int rcu_is_cpu_rrupt_from_idle(void)
787 { 787 {
788 return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1; 788 return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
789 } 789 }
790 790
791 /* 791 /*
792 * Snapshot the specified CPU's dynticks counter so that we can later 792 * Snapshot the specified CPU's dynticks counter so that we can later
793 * credit them with an implicit quiescent state. Return 1 if this CPU 793 * credit them with an implicit quiescent state. Return 1 if this CPU
794 * is in dynticks idle mode, which is an extended quiescent state. 794 * is in dynticks idle mode, which is an extended quiescent state.
795 */ 795 */
796 static int dyntick_save_progress_counter(struct rcu_data *rdp) 796 static int dyntick_save_progress_counter(struct rcu_data *rdp)
797 { 797 {
798 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); 798 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
799 return (rdp->dynticks_snap & 0x1) == 0; 799 return (rdp->dynticks_snap & 0x1) == 0;
800 } 800 }
801 801
802 /* 802 /*
803 * Return true if the specified CPU has passed through a quiescent 803 * Return true if the specified CPU has passed through a quiescent
804 * state by virtue of being in or having passed through an dynticks 804 * state by virtue of being in or having passed through an dynticks
805 * idle state since the last call to dyntick_save_progress_counter() 805 * idle state since the last call to dyntick_save_progress_counter()
806 * for this same CPU, or by virtue of having been offline. 806 * for this same CPU, or by virtue of having been offline.
807 */ 807 */
808 static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) 808 static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
809 { 809 {
810 unsigned int curr; 810 unsigned int curr;
811 unsigned int snap; 811 unsigned int snap;
812 812
813 curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks); 813 curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks);
814 snap = (unsigned int)rdp->dynticks_snap; 814 snap = (unsigned int)rdp->dynticks_snap;
815 815
816 /* 816 /*
817 * If the CPU passed through or entered a dynticks idle phase with 817 * If the CPU passed through or entered a dynticks idle phase with
818 * no active irq/NMI handlers, then we can safely pretend that the CPU 818 * no active irq/NMI handlers, then we can safely pretend that the CPU
819 * already acknowledged the request to pass through a quiescent 819 * already acknowledged the request to pass through a quiescent
820 * state. Either way, that CPU cannot possibly be in an RCU 820 * state. Either way, that CPU cannot possibly be in an RCU
821 * read-side critical section that started before the beginning 821 * read-side critical section that started before the beginning
822 * of the current RCU grace period. 822 * of the current RCU grace period.
823 */ 823 */
824 if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) { 824 if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) {
825 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti"); 825 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti");
826 rdp->dynticks_fqs++; 826 rdp->dynticks_fqs++;
827 return 1; 827 return 1;
828 } 828 }
829 829
830 /* 830 /*
831 * Check for the CPU being offline, but only if the grace period 831 * Check for the CPU being offline, but only if the grace period
832 * is old enough. We don't need to worry about the CPU changing 832 * is old enough. We don't need to worry about the CPU changing
833 * state: If we see it offline even once, it has been through a 833 * state: If we see it offline even once, it has been through a
834 * quiescent state. 834 * quiescent state.
835 * 835 *
836 * The reason for insisting that the grace period be at least 836 * The reason for insisting that the grace period be at least
837 * one jiffy old is that CPUs that are not quite online and that 837 * one jiffy old is that CPUs that are not quite online and that
838 * have just gone offline can still execute RCU read-side critical 838 * have just gone offline can still execute RCU read-side critical
839 * sections. 839 * sections.
840 */ 840 */
841 if (ULONG_CMP_GE(rdp->rsp->gp_start + 2, jiffies)) 841 if (ULONG_CMP_GE(rdp->rsp->gp_start + 2, jiffies))
842 return 0; /* Grace period is not old enough. */ 842 return 0; /* Grace period is not old enough. */
843 barrier(); 843 barrier();
844 if (cpu_is_offline(rdp->cpu)) { 844 if (cpu_is_offline(rdp->cpu)) {
845 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); 845 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl");
846 rdp->offline_fqs++; 846 rdp->offline_fqs++;
847 return 1; 847 return 1;
848 } 848 }
849 return 0; 849 return 0;
850 } 850 }
851 851
852 static int jiffies_till_stall_check(void) 852 static int jiffies_till_stall_check(void)
853 { 853 {
854 int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout); 854 int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
855 855
856 /* 856 /*
857 * Limit check must be consistent with the Kconfig limits 857 * Limit check must be consistent with the Kconfig limits
858 * for CONFIG_RCU_CPU_STALL_TIMEOUT. 858 * for CONFIG_RCU_CPU_STALL_TIMEOUT.
859 */ 859 */
860 if (till_stall_check < 3) { 860 if (till_stall_check < 3) {
861 ACCESS_ONCE(rcu_cpu_stall_timeout) = 3; 861 ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
862 till_stall_check = 3; 862 till_stall_check = 3;
863 } else if (till_stall_check > 300) { 863 } else if (till_stall_check > 300) {
864 ACCESS_ONCE(rcu_cpu_stall_timeout) = 300; 864 ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
865 till_stall_check = 300; 865 till_stall_check = 300;
866 } 866 }
867 return till_stall_check * HZ + RCU_STALL_DELAY_DELTA; 867 return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
868 } 868 }
869 869
870 static void record_gp_stall_check_time(struct rcu_state *rsp) 870 static void record_gp_stall_check_time(struct rcu_state *rsp)
871 { 871 {
872 rsp->gp_start = jiffies; 872 rsp->gp_start = jiffies;
873 rsp->jiffies_stall = jiffies + jiffies_till_stall_check(); 873 rsp->jiffies_stall = jiffies + jiffies_till_stall_check();
874 } 874 }
875 875
876 static void print_other_cpu_stall(struct rcu_state *rsp) 876 static void print_other_cpu_stall(struct rcu_state *rsp)
877 { 877 {
878 int cpu; 878 int cpu;
879 long delta; 879 long delta;
880 unsigned long flags; 880 unsigned long flags;
881 int ndetected = 0; 881 int ndetected = 0;
882 struct rcu_node *rnp = rcu_get_root(rsp); 882 struct rcu_node *rnp = rcu_get_root(rsp);
883 883
884 /* Only let one CPU complain about others per time interval. */ 884 /* Only let one CPU complain about others per time interval. */
885 885
886 raw_spin_lock_irqsave(&rnp->lock, flags); 886 raw_spin_lock_irqsave(&rnp->lock, flags);
887 delta = jiffies - rsp->jiffies_stall; 887 delta = jiffies - rsp->jiffies_stall;
888 if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) { 888 if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
889 raw_spin_unlock_irqrestore(&rnp->lock, flags); 889 raw_spin_unlock_irqrestore(&rnp->lock, flags);
890 return; 890 return;
891 } 891 }
892 rsp->jiffies_stall = jiffies + 3 * jiffies_till_stall_check() + 3; 892 rsp->jiffies_stall = jiffies + 3 * jiffies_till_stall_check() + 3;
893 raw_spin_unlock_irqrestore(&rnp->lock, flags); 893 raw_spin_unlock_irqrestore(&rnp->lock, flags);
894 894
895 /* 895 /*
896 * OK, time to rat on our buddy... 896 * OK, time to rat on our buddy...
897 * See Documentation/RCU/stallwarn.txt for info on how to debug 897 * See Documentation/RCU/stallwarn.txt for info on how to debug
898 * RCU CPU stall warnings. 898 * RCU CPU stall warnings.
899 */ 899 */
900 printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks:", 900 printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks:",
901 rsp->name); 901 rsp->name);
902 print_cpu_stall_info_begin(); 902 print_cpu_stall_info_begin();
903 rcu_for_each_leaf_node(rsp, rnp) { 903 rcu_for_each_leaf_node(rsp, rnp) {
904 raw_spin_lock_irqsave(&rnp->lock, flags); 904 raw_spin_lock_irqsave(&rnp->lock, flags);
905 ndetected += rcu_print_task_stall(rnp); 905 ndetected += rcu_print_task_stall(rnp);
906 if (rnp->qsmask != 0) { 906 if (rnp->qsmask != 0) {
907 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) 907 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
908 if (rnp->qsmask & (1UL << cpu)) { 908 if (rnp->qsmask & (1UL << cpu)) {
909 print_cpu_stall_info(rsp, 909 print_cpu_stall_info(rsp,
910 rnp->grplo + cpu); 910 rnp->grplo + cpu);
911 ndetected++; 911 ndetected++;
912 } 912 }
913 } 913 }
914 raw_spin_unlock_irqrestore(&rnp->lock, flags); 914 raw_spin_unlock_irqrestore(&rnp->lock, flags);
915 } 915 }
916 916
917 /* 917 /*
918 * Now rat on any tasks that got kicked up to the root rcu_node 918 * Now rat on any tasks that got kicked up to the root rcu_node
919 * due to CPU offlining. 919 * due to CPU offlining.
920 */ 920 */
921 rnp = rcu_get_root(rsp); 921 rnp = rcu_get_root(rsp);
922 raw_spin_lock_irqsave(&rnp->lock, flags); 922 raw_spin_lock_irqsave(&rnp->lock, flags);
923 ndetected += rcu_print_task_stall(rnp); 923 ndetected += rcu_print_task_stall(rnp);
924 raw_spin_unlock_irqrestore(&rnp->lock, flags); 924 raw_spin_unlock_irqrestore(&rnp->lock, flags);
925 925
926 print_cpu_stall_info_end(); 926 print_cpu_stall_info_end();
927 printk(KERN_CONT "(detected by %d, t=%ld jiffies)\n", 927 printk(KERN_CONT "(detected by %d, t=%ld jiffies)\n",
928 smp_processor_id(), (long)(jiffies - rsp->gp_start)); 928 smp_processor_id(), (long)(jiffies - rsp->gp_start));
929 if (ndetected == 0) 929 if (ndetected == 0)
930 printk(KERN_ERR "INFO: Stall ended before state dump start\n"); 930 printk(KERN_ERR "INFO: Stall ended before state dump start\n");
931 else if (!trigger_all_cpu_backtrace()) 931 else if (!trigger_all_cpu_backtrace())
932 dump_stack(); 932 dump_stack();
933 933
934 /* Complain about tasks blocking the grace period. */ 934 /* Complain about tasks blocking the grace period. */
935 935
936 rcu_print_detail_task_stall(rsp); 936 rcu_print_detail_task_stall(rsp);
937 937
938 force_quiescent_state(rsp); /* Kick them all. */ 938 force_quiescent_state(rsp); /* Kick them all. */
939 } 939 }
940 940
941 static void print_cpu_stall(struct rcu_state *rsp) 941 static void print_cpu_stall(struct rcu_state *rsp)
942 { 942 {
943 unsigned long flags; 943 unsigned long flags;
944 struct rcu_node *rnp = rcu_get_root(rsp); 944 struct rcu_node *rnp = rcu_get_root(rsp);
945 945
946 /* 946 /*
947 * OK, time to rat on ourselves... 947 * OK, time to rat on ourselves...
948 * See Documentation/RCU/stallwarn.txt for info on how to debug 948 * See Documentation/RCU/stallwarn.txt for info on how to debug
949 * RCU CPU stall warnings. 949 * RCU CPU stall warnings.
950 */ 950 */
951 printk(KERN_ERR "INFO: %s self-detected stall on CPU", rsp->name); 951 printk(KERN_ERR "INFO: %s self-detected stall on CPU", rsp->name);
952 print_cpu_stall_info_begin(); 952 print_cpu_stall_info_begin();
953 print_cpu_stall_info(rsp, smp_processor_id()); 953 print_cpu_stall_info(rsp, smp_processor_id());
954 print_cpu_stall_info_end(); 954 print_cpu_stall_info_end();
955 printk(KERN_CONT " (t=%lu jiffies)\n", jiffies - rsp->gp_start); 955 printk(KERN_CONT " (t=%lu jiffies)\n", jiffies - rsp->gp_start);
956 if (!trigger_all_cpu_backtrace()) 956 if (!trigger_all_cpu_backtrace())
957 dump_stack(); 957 dump_stack();
958 958
959 raw_spin_lock_irqsave(&rnp->lock, flags); 959 raw_spin_lock_irqsave(&rnp->lock, flags);
960 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) 960 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
961 rsp->jiffies_stall = jiffies + 961 rsp->jiffies_stall = jiffies +
962 3 * jiffies_till_stall_check() + 3; 962 3 * jiffies_till_stall_check() + 3;
963 raw_spin_unlock_irqrestore(&rnp->lock, flags); 963 raw_spin_unlock_irqrestore(&rnp->lock, flags);
964 964
965 set_need_resched(); /* kick ourselves to get things going. */ 965 set_need_resched(); /* kick ourselves to get things going. */
966 } 966 }
967 967
968 static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) 968 static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
969 { 969 {
970 unsigned long j; 970 unsigned long j;
971 unsigned long js; 971 unsigned long js;
972 struct rcu_node *rnp; 972 struct rcu_node *rnp;
973 973
974 if (rcu_cpu_stall_suppress) 974 if (rcu_cpu_stall_suppress)
975 return; 975 return;
976 j = ACCESS_ONCE(jiffies); 976 j = ACCESS_ONCE(jiffies);
977 js = ACCESS_ONCE(rsp->jiffies_stall); 977 js = ACCESS_ONCE(rsp->jiffies_stall);
978 rnp = rdp->mynode; 978 rnp = rdp->mynode;
979 if (rcu_gp_in_progress(rsp) && 979 if (rcu_gp_in_progress(rsp) &&
980 (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) { 980 (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) {
981 981
982 /* We haven't checked in, so go dump stack. */ 982 /* We haven't checked in, so go dump stack. */
983 print_cpu_stall(rsp); 983 print_cpu_stall(rsp);
984 984
985 } else if (rcu_gp_in_progress(rsp) && 985 } else if (rcu_gp_in_progress(rsp) &&
986 ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) { 986 ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) {
987 987
988 /* They had a few time units to dump stack, so complain. */ 988 /* They had a few time units to dump stack, so complain. */
989 print_other_cpu_stall(rsp); 989 print_other_cpu_stall(rsp);
990 } 990 }
991 } 991 }
992 992
993 static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) 993 static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
994 { 994 {
995 rcu_cpu_stall_suppress = 1; 995 rcu_cpu_stall_suppress = 1;
996 return NOTIFY_DONE; 996 return NOTIFY_DONE;
997 } 997 }
998 998
999 /** 999 /**
1000 * rcu_cpu_stall_reset - prevent further stall warnings in current grace period 1000 * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
1001 * 1001 *
1002 * Set the stall-warning timeout way off into the future, thus preventing 1002 * Set the stall-warning timeout way off into the future, thus preventing
1003 * any RCU CPU stall-warning messages from appearing in the current set of 1003 * any RCU CPU stall-warning messages from appearing in the current set of
1004 * RCU grace periods. 1004 * RCU grace periods.
1005 * 1005 *
1006 * The caller must disable hard irqs. 1006 * The caller must disable hard irqs.
1007 */ 1007 */
1008 void rcu_cpu_stall_reset(void) 1008 void rcu_cpu_stall_reset(void)
1009 { 1009 {
1010 struct rcu_state *rsp; 1010 struct rcu_state *rsp;
1011 1011
1012 for_each_rcu_flavor(rsp) 1012 for_each_rcu_flavor(rsp)
1013 rsp->jiffies_stall = jiffies + ULONG_MAX / 2; 1013 rsp->jiffies_stall = jiffies + ULONG_MAX / 2;
1014 } 1014 }
1015 1015
1016 static struct notifier_block rcu_panic_block = { 1016 static struct notifier_block rcu_panic_block = {
1017 .notifier_call = rcu_panic, 1017 .notifier_call = rcu_panic,
1018 }; 1018 };
1019 1019
1020 static void __init check_cpu_stall_init(void) 1020 static void __init check_cpu_stall_init(void)
1021 { 1021 {
1022 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); 1022 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
1023 } 1023 }
1024 1024
1025 /* 1025 /*
1026 * Update CPU-local rcu_data state to record the newly noticed grace period. 1026 * Update CPU-local rcu_data state to record the newly noticed grace period.
1027 * This is used both when we started the grace period and when we notice 1027 * This is used both when we started the grace period and when we notice
1028 * that someone else started the grace period. The caller must hold the 1028 * that someone else started the grace period. The caller must hold the
1029 * ->lock of the leaf rcu_node structure corresponding to the current CPU, 1029 * ->lock of the leaf rcu_node structure corresponding to the current CPU,
1030 * and must have irqs disabled. 1030 * and must have irqs disabled.
1031 */ 1031 */
1032 static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) 1032 static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
1033 { 1033 {
1034 if (rdp->gpnum != rnp->gpnum) { 1034 if (rdp->gpnum != rnp->gpnum) {
1035 /* 1035 /*
1036 * If the current grace period is waiting for this CPU, 1036 * If the current grace period is waiting for this CPU,
1037 * set up to detect a quiescent state, otherwise don't 1037 * set up to detect a quiescent state, otherwise don't
1038 * go looking for one. 1038 * go looking for one.
1039 */ 1039 */
1040 rdp->gpnum = rnp->gpnum; 1040 rdp->gpnum = rnp->gpnum;
1041 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart"); 1041 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart");
1042 rdp->passed_quiesce = 0; 1042 rdp->passed_quiesce = 0;
1043 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); 1043 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
1044 zero_cpu_stall_ticks(rdp); 1044 zero_cpu_stall_ticks(rdp);
1045 } 1045 }
1046 } 1046 }
1047 1047
1048 static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp) 1048 static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp)
1049 { 1049 {
1050 unsigned long flags; 1050 unsigned long flags;
1051 struct rcu_node *rnp; 1051 struct rcu_node *rnp;
1052 1052
1053 local_irq_save(flags); 1053 local_irq_save(flags);
1054 rnp = rdp->mynode; 1054 rnp = rdp->mynode;
1055 if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */ 1055 if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */
1056 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ 1056 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
1057 local_irq_restore(flags); 1057 local_irq_restore(flags);
1058 return; 1058 return;
1059 } 1059 }
1060 __note_new_gpnum(rsp, rnp, rdp); 1060 __note_new_gpnum(rsp, rnp, rdp);
1061 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1061 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1062 } 1062 }
1063 1063
1064 /* 1064 /*
1065 * Did someone else start a new RCU grace period start since we last 1065 * Did someone else start a new RCU grace period start since we last
1066 * checked? Update local state appropriately if so. Must be called 1066 * checked? Update local state appropriately if so. Must be called
1067 * on the CPU corresponding to rdp. 1067 * on the CPU corresponding to rdp.
1068 */ 1068 */
1069 static int 1069 static int
1070 check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp) 1070 check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp)
1071 { 1071 {
1072 unsigned long flags; 1072 unsigned long flags;
1073 int ret = 0; 1073 int ret = 0;
1074 1074
1075 local_irq_save(flags); 1075 local_irq_save(flags);
1076 if (rdp->gpnum != rsp->gpnum) { 1076 if (rdp->gpnum != rsp->gpnum) {
1077 note_new_gpnum(rsp, rdp); 1077 note_new_gpnum(rsp, rdp);
1078 ret = 1; 1078 ret = 1;
1079 } 1079 }
1080 local_irq_restore(flags); 1080 local_irq_restore(flags);
1081 return ret; 1081 return ret;
1082 } 1082 }
1083 1083
1084 /* 1084 /*
1085 * Initialize the specified rcu_data structure's callback list to empty. 1085 * Initialize the specified rcu_data structure's callback list to empty.
1086 */ 1086 */
1087 static void init_callback_list(struct rcu_data *rdp) 1087 static void init_callback_list(struct rcu_data *rdp)
1088 { 1088 {
1089 int i; 1089 int i;
1090 1090
1091 rdp->nxtlist = NULL; 1091 rdp->nxtlist = NULL;
1092 for (i = 0; i < RCU_NEXT_SIZE; i++) 1092 for (i = 0; i < RCU_NEXT_SIZE; i++)
1093 rdp->nxttail[i] = &rdp->nxtlist; 1093 rdp->nxttail[i] = &rdp->nxtlist;
1094 } 1094 }
1095 1095
1096 /* 1096 /*
1097 * Advance this CPU's callbacks, but only if the current grace period 1097 * Advance this CPU's callbacks, but only if the current grace period
1098 * has ended. This may be called only from the CPU to whom the rdp 1098 * has ended. This may be called only from the CPU to whom the rdp
1099 * belongs. In addition, the corresponding leaf rcu_node structure's 1099 * belongs. In addition, the corresponding leaf rcu_node structure's
1100 * ->lock must be held by the caller, with irqs disabled. 1100 * ->lock must be held by the caller, with irqs disabled.
1101 */ 1101 */
1102 static void 1102 static void
1103 __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) 1103 __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
1104 { 1104 {
1105 /* Did another grace period end? */ 1105 /* Did another grace period end? */
1106 if (rdp->completed != rnp->completed) { 1106 if (rdp->completed != rnp->completed) {
1107 1107
1108 /* Advance callbacks. No harm if list empty. */ 1108 /* Advance callbacks. No harm if list empty. */
1109 rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL]; 1109 rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL];
1110 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL]; 1110 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL];
1111 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 1111 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
1112 1112
1113 /* Remember that we saw this grace-period completion. */ 1113 /* Remember that we saw this grace-period completion. */
1114 rdp->completed = rnp->completed; 1114 rdp->completed = rnp->completed;
1115 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend"); 1115 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend");
1116 1116
1117 /* 1117 /*
1118 * If we were in an extended quiescent state, we may have 1118 * If we were in an extended quiescent state, we may have
1119 * missed some grace periods that others CPUs handled on 1119 * missed some grace periods that others CPUs handled on
1120 * our behalf. Catch up with this state to avoid noting 1120 * our behalf. Catch up with this state to avoid noting
1121 * spurious new grace periods. If another grace period 1121 * spurious new grace periods. If another grace period
1122 * has started, then rnp->gpnum will have advanced, so 1122 * has started, then rnp->gpnum will have advanced, so
1123 * we will detect this later on. Of course, any quiescent 1123 * we will detect this later on. Of course, any quiescent
1124 * states we found for the old GP are now invalid. 1124 * states we found for the old GP are now invalid.
1125 */ 1125 */
1126 if (ULONG_CMP_LT(rdp->gpnum, rdp->completed)) { 1126 if (ULONG_CMP_LT(rdp->gpnum, rdp->completed)) {
1127 rdp->gpnum = rdp->completed; 1127 rdp->gpnum = rdp->completed;
1128 rdp->passed_quiesce = 0; 1128 rdp->passed_quiesce = 0;
1129 } 1129 }
1130 1130
1131 /* 1131 /*
1132 * If RCU does not need a quiescent state from this CPU, 1132 * If RCU does not need a quiescent state from this CPU,
1133 * then make sure that this CPU doesn't go looking for one. 1133 * then make sure that this CPU doesn't go looking for one.
1134 */ 1134 */
1135 if ((rnp->qsmask & rdp->grpmask) == 0) 1135 if ((rnp->qsmask & rdp->grpmask) == 0)
1136 rdp->qs_pending = 0; 1136 rdp->qs_pending = 0;
1137 } 1137 }
1138 } 1138 }
1139 1139
1140 /* 1140 /*
1141 * Advance this CPU's callbacks, but only if the current grace period 1141 * Advance this CPU's callbacks, but only if the current grace period
1142 * has ended. This may be called only from the CPU to whom the rdp 1142 * has ended. This may be called only from the CPU to whom the rdp
1143 * belongs. 1143 * belongs.
1144 */ 1144 */
1145 static void 1145 static void
1146 rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp) 1146 rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
1147 { 1147 {
1148 unsigned long flags; 1148 unsigned long flags;
1149 struct rcu_node *rnp; 1149 struct rcu_node *rnp;
1150 1150
1151 local_irq_save(flags); 1151 local_irq_save(flags);
1152 rnp = rdp->mynode; 1152 rnp = rdp->mynode;
1153 if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */ 1153 if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */
1154 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ 1154 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
1155 local_irq_restore(flags); 1155 local_irq_restore(flags);
1156 return; 1156 return;
1157 } 1157 }
1158 __rcu_process_gp_end(rsp, rnp, rdp); 1158 __rcu_process_gp_end(rsp, rnp, rdp);
1159 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1159 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1160 } 1160 }
1161 1161
1162 /* 1162 /*
1163 * Do per-CPU grace-period initialization for running CPU. The caller 1163 * Do per-CPU grace-period initialization for running CPU. The caller
1164 * must hold the lock of the leaf rcu_node structure corresponding to 1164 * must hold the lock of the leaf rcu_node structure corresponding to
1165 * this CPU. 1165 * this CPU.
1166 */ 1166 */
1167 static void 1167 static void
1168 rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) 1168 rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
1169 { 1169 {
1170 /* Prior grace period ended, so advance callbacks for current CPU. */ 1170 /* Prior grace period ended, so advance callbacks for current CPU. */
1171 __rcu_process_gp_end(rsp, rnp, rdp); 1171 __rcu_process_gp_end(rsp, rnp, rdp);
1172 1172
1173 /* Set state so that this CPU will detect the next quiescent state. */ 1173 /* Set state so that this CPU will detect the next quiescent state. */
1174 __note_new_gpnum(rsp, rnp, rdp); 1174 __note_new_gpnum(rsp, rnp, rdp);
1175 } 1175 }
1176 1176
1177 /* 1177 /*
1178 * Initialize a new grace period. 1178 * Initialize a new grace period.
1179 */ 1179 */
1180 static int rcu_gp_init(struct rcu_state *rsp) 1180 static int rcu_gp_init(struct rcu_state *rsp)
1181 { 1181 {
1182 struct rcu_data *rdp; 1182 struct rcu_data *rdp;
1183 struct rcu_node *rnp = rcu_get_root(rsp); 1183 struct rcu_node *rnp = rcu_get_root(rsp);
1184 1184
1185 raw_spin_lock_irq(&rnp->lock); 1185 raw_spin_lock_irq(&rnp->lock);
1186 rsp->gp_flags = 0; /* Clear all flags: New grace period. */ 1186 rsp->gp_flags = 0; /* Clear all flags: New grace period. */
1187 1187
1188 if (rcu_gp_in_progress(rsp)) { 1188 if (rcu_gp_in_progress(rsp)) {
1189 /* Grace period already in progress, don't start another. */ 1189 /* Grace period already in progress, don't start another. */
1190 raw_spin_unlock_irq(&rnp->lock); 1190 raw_spin_unlock_irq(&rnp->lock);
1191 return 0; 1191 return 0;
1192 } 1192 }
1193 1193
1194 /* Advance to a new grace period and initialize state. */ 1194 /* Advance to a new grace period and initialize state. */
1195 rsp->gpnum++; 1195 rsp->gpnum++;
1196 trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); 1196 trace_rcu_grace_period(rsp->name, rsp->gpnum, "start");
1197 record_gp_stall_check_time(rsp); 1197 record_gp_stall_check_time(rsp);
1198 raw_spin_unlock_irq(&rnp->lock); 1198 raw_spin_unlock_irq(&rnp->lock);
1199 1199
1200 /* Exclude any concurrent CPU-hotplug operations. */ 1200 /* Exclude any concurrent CPU-hotplug operations. */
1201 mutex_lock(&rsp->onoff_mutex); 1201 mutex_lock(&rsp->onoff_mutex);
1202 1202
1203 /* 1203 /*
1204 * Set the quiescent-state-needed bits in all the rcu_node 1204 * Set the quiescent-state-needed bits in all the rcu_node
1205 * structures for all currently online CPUs in breadth-first order, 1205 * structures for all currently online CPUs in breadth-first order,
1206 * starting from the root rcu_node structure, relying on the layout 1206 * starting from the root rcu_node structure, relying on the layout
1207 * of the tree within the rsp->node[] array. Note that other CPUs 1207 * of the tree within the rsp->node[] array. Note that other CPUs
1208 * will access only the leaves of the hierarchy, thus seeing that no 1208 * will access only the leaves of the hierarchy, thus seeing that no
1209 * grace period is in progress, at least until the corresponding 1209 * grace period is in progress, at least until the corresponding
1210 * leaf node has been initialized. In addition, we have excluded 1210 * leaf node has been initialized. In addition, we have excluded
1211 * CPU-hotplug operations. 1211 * CPU-hotplug operations.
1212 * 1212 *
1213 * The grace period cannot complete until the initialization 1213 * The grace period cannot complete until the initialization
1214 * process finishes, because this kthread handles both. 1214 * process finishes, because this kthread handles both.
1215 */ 1215 */
1216 rcu_for_each_node_breadth_first(rsp, rnp) { 1216 rcu_for_each_node_breadth_first(rsp, rnp) {
1217 raw_spin_lock_irq(&rnp->lock); 1217 raw_spin_lock_irq(&rnp->lock);
1218 rdp = this_cpu_ptr(rsp->rda); 1218 rdp = this_cpu_ptr(rsp->rda);
1219 rcu_preempt_check_blocked_tasks(rnp); 1219 rcu_preempt_check_blocked_tasks(rnp);
1220 rnp->qsmask = rnp->qsmaskinit; 1220 rnp->qsmask = rnp->qsmaskinit;
1221 rnp->gpnum = rsp->gpnum; 1221 rnp->gpnum = rsp->gpnum;
1222 WARN_ON_ONCE(rnp->completed != rsp->completed); 1222 WARN_ON_ONCE(rnp->completed != rsp->completed);
1223 rnp->completed = rsp->completed; 1223 rnp->completed = rsp->completed;
1224 if (rnp == rdp->mynode) 1224 if (rnp == rdp->mynode)
1225 rcu_start_gp_per_cpu(rsp, rnp, rdp); 1225 rcu_start_gp_per_cpu(rsp, rnp, rdp);
1226 rcu_preempt_boost_start_gp(rnp); 1226 rcu_preempt_boost_start_gp(rnp);
1227 trace_rcu_grace_period_init(rsp->name, rnp->gpnum, 1227 trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
1228 rnp->level, rnp->grplo, 1228 rnp->level, rnp->grplo,
1229 rnp->grphi, rnp->qsmask); 1229 rnp->grphi, rnp->qsmask);
1230 raw_spin_unlock_irq(&rnp->lock); 1230 raw_spin_unlock_irq(&rnp->lock);
1231 #ifdef CONFIG_PROVE_RCU_DELAY 1231 #ifdef CONFIG_PROVE_RCU_DELAY
1232 if ((random32() % (rcu_num_nodes * 8)) == 0) 1232 if ((random32() % (rcu_num_nodes * 8)) == 0)
1233 schedule_timeout_uninterruptible(2); 1233 schedule_timeout_uninterruptible(2);
1234 #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ 1234 #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
1235 cond_resched(); 1235 cond_resched();
1236 } 1236 }
1237 1237
1238 mutex_unlock(&rsp->onoff_mutex); 1238 mutex_unlock(&rsp->onoff_mutex);
1239 return 1; 1239 return 1;
1240 } 1240 }
1241 1241
1242 /* 1242 /*
1243 * Do one round of quiescent-state forcing. 1243 * Do one round of quiescent-state forcing.
1244 */ 1244 */
1245 int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) 1245 int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1246 { 1246 {
1247 int fqs_state = fqs_state_in; 1247 int fqs_state = fqs_state_in;
1248 struct rcu_node *rnp = rcu_get_root(rsp); 1248 struct rcu_node *rnp = rcu_get_root(rsp);
1249 1249
1250 rsp->n_force_qs++; 1250 rsp->n_force_qs++;
1251 if (fqs_state == RCU_SAVE_DYNTICK) { 1251 if (fqs_state == RCU_SAVE_DYNTICK) {
1252 /* Collect dyntick-idle snapshots. */ 1252 /* Collect dyntick-idle snapshots. */
1253 force_qs_rnp(rsp, dyntick_save_progress_counter); 1253 force_qs_rnp(rsp, dyntick_save_progress_counter);
1254 fqs_state = RCU_FORCE_QS; 1254 fqs_state = RCU_FORCE_QS;
1255 } else { 1255 } else {
1256 /* Handle dyntick-idle and offline CPUs. */ 1256 /* Handle dyntick-idle and offline CPUs. */
1257 force_qs_rnp(rsp, rcu_implicit_dynticks_qs); 1257 force_qs_rnp(rsp, rcu_implicit_dynticks_qs);
1258 } 1258 }
1259 /* Clear flag to prevent immediate re-entry. */ 1259 /* Clear flag to prevent immediate re-entry. */
1260 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { 1260 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
1261 raw_spin_lock_irq(&rnp->lock); 1261 raw_spin_lock_irq(&rnp->lock);
1262 rsp->gp_flags &= ~RCU_GP_FLAG_FQS; 1262 rsp->gp_flags &= ~RCU_GP_FLAG_FQS;
1263 raw_spin_unlock_irq(&rnp->lock); 1263 raw_spin_unlock_irq(&rnp->lock);
1264 } 1264 }
1265 return fqs_state; 1265 return fqs_state;
1266 } 1266 }
1267 1267
1268 /* 1268 /*
1269 * Clean up after the old grace period. 1269 * Clean up after the old grace period.
1270 */ 1270 */
1271 static void rcu_gp_cleanup(struct rcu_state *rsp) 1271 static void rcu_gp_cleanup(struct rcu_state *rsp)
1272 { 1272 {
1273 unsigned long gp_duration; 1273 unsigned long gp_duration;
1274 struct rcu_data *rdp; 1274 struct rcu_data *rdp;
1275 struct rcu_node *rnp = rcu_get_root(rsp); 1275 struct rcu_node *rnp = rcu_get_root(rsp);
1276 1276
1277 raw_spin_lock_irq(&rnp->lock); 1277 raw_spin_lock_irq(&rnp->lock);
1278 gp_duration = jiffies - rsp->gp_start; 1278 gp_duration = jiffies - rsp->gp_start;
1279 if (gp_duration > rsp->gp_max) 1279 if (gp_duration > rsp->gp_max)
1280 rsp->gp_max = gp_duration; 1280 rsp->gp_max = gp_duration;
1281 1281
1282 /* 1282 /*
1283 * We know the grace period is complete, but to everyone else 1283 * We know the grace period is complete, but to everyone else
1284 * it appears to still be ongoing. But it is also the case 1284 * it appears to still be ongoing. But it is also the case
1285 * that to everyone else it looks like there is nothing that 1285 * that to everyone else it looks like there is nothing that
1286 * they can do to advance the grace period. It is therefore 1286 * they can do to advance the grace period. It is therefore
1287 * safe for us to drop the lock in order to mark the grace 1287 * safe for us to drop the lock in order to mark the grace
1288 * period as completed in all of the rcu_node structures. 1288 * period as completed in all of the rcu_node structures.
1289 */ 1289 */
1290 raw_spin_unlock_irq(&rnp->lock); 1290 raw_spin_unlock_irq(&rnp->lock);
1291 1291
1292 /* 1292 /*
1293 * Propagate new ->completed value to rcu_node structures so 1293 * Propagate new ->completed value to rcu_node structures so
1294 * that other CPUs don't have to wait until the start of the next 1294 * that other CPUs don't have to wait until the start of the next
1295 * grace period to process their callbacks. This also avoids 1295 * grace period to process their callbacks. This also avoids
1296 * some nasty RCU grace-period initialization races by forcing 1296 * some nasty RCU grace-period initialization races by forcing
1297 * the end of the current grace period to be completely recorded in 1297 * the end of the current grace period to be completely recorded in
1298 * all of the rcu_node structures before the beginning of the next 1298 * all of the rcu_node structures before the beginning of the next
1299 * grace period is recorded in any of the rcu_node structures. 1299 * grace period is recorded in any of the rcu_node structures.
1300 */ 1300 */
1301 rcu_for_each_node_breadth_first(rsp, rnp) { 1301 rcu_for_each_node_breadth_first(rsp, rnp) {
1302 raw_spin_lock_irq(&rnp->lock); 1302 raw_spin_lock_irq(&rnp->lock);
1303 rnp->completed = rsp->gpnum; 1303 rnp->completed = rsp->gpnum;
1304 raw_spin_unlock_irq(&rnp->lock); 1304 raw_spin_unlock_irq(&rnp->lock);
1305 cond_resched(); 1305 cond_resched();
1306 } 1306 }
1307 rnp = rcu_get_root(rsp); 1307 rnp = rcu_get_root(rsp);
1308 raw_spin_lock_irq(&rnp->lock); 1308 raw_spin_lock_irq(&rnp->lock);
1309 1309
1310 rsp->completed = rsp->gpnum; /* Declare grace period done. */ 1310 rsp->completed = rsp->gpnum; /* Declare grace period done. */
1311 trace_rcu_grace_period(rsp->name, rsp->completed, "end"); 1311 trace_rcu_grace_period(rsp->name, rsp->completed, "end");
1312 rsp->fqs_state = RCU_GP_IDLE; 1312 rsp->fqs_state = RCU_GP_IDLE;
1313 rdp = this_cpu_ptr(rsp->rda); 1313 rdp = this_cpu_ptr(rsp->rda);
1314 if (cpu_needs_another_gp(rsp, rdp)) 1314 if (cpu_needs_another_gp(rsp, rdp))
1315 rsp->gp_flags = 1; 1315 rsp->gp_flags = 1;
1316 raw_spin_unlock_irq(&rnp->lock); 1316 raw_spin_unlock_irq(&rnp->lock);
1317 } 1317 }
1318 1318
1319 /* 1319 /*
1320 * Body of kthread that handles grace periods. 1320 * Body of kthread that handles grace periods.
1321 */ 1321 */
1322 static int __noreturn rcu_gp_kthread(void *arg) 1322 static int __noreturn rcu_gp_kthread(void *arg)
1323 { 1323 {
1324 int fqs_state; 1324 int fqs_state;
1325 unsigned long j; 1325 unsigned long j;
1326 int ret; 1326 int ret;
1327 struct rcu_state *rsp = arg; 1327 struct rcu_state *rsp = arg;
1328 struct rcu_node *rnp = rcu_get_root(rsp); 1328 struct rcu_node *rnp = rcu_get_root(rsp);
1329 1329
1330 for (;;) { 1330 for (;;) {
1331 1331
1332 /* Handle grace-period start. */ 1332 /* Handle grace-period start. */
1333 for (;;) { 1333 for (;;) {
1334 wait_event_interruptible(rsp->gp_wq, 1334 wait_event_interruptible(rsp->gp_wq,
1335 rsp->gp_flags & 1335 rsp->gp_flags &
1336 RCU_GP_FLAG_INIT); 1336 RCU_GP_FLAG_INIT);
1337 if ((rsp->gp_flags & RCU_GP_FLAG_INIT) && 1337 if ((rsp->gp_flags & RCU_GP_FLAG_INIT) &&
1338 rcu_gp_init(rsp)) 1338 rcu_gp_init(rsp))
1339 break; 1339 break;
1340 cond_resched(); 1340 cond_resched();
1341 flush_signals(current); 1341 flush_signals(current);
1342 } 1342 }
1343 1343
1344 /* Handle quiescent-state forcing. */ 1344 /* Handle quiescent-state forcing. */
1345 fqs_state = RCU_SAVE_DYNTICK; 1345 fqs_state = RCU_SAVE_DYNTICK;
1346 j = jiffies_till_first_fqs; 1346 j = jiffies_till_first_fqs;
1347 if (j > HZ) { 1347 if (j > HZ) {
1348 j = HZ; 1348 j = HZ;
1349 jiffies_till_first_fqs = HZ; 1349 jiffies_till_first_fqs = HZ;
1350 } 1350 }
1351 for (;;) { 1351 for (;;) {
1352 rsp->jiffies_force_qs = jiffies + j; 1352 rsp->jiffies_force_qs = jiffies + j;
1353 ret = wait_event_interruptible_timeout(rsp->gp_wq, 1353 ret = wait_event_interruptible_timeout(rsp->gp_wq,
1354 (rsp->gp_flags & RCU_GP_FLAG_FQS) || 1354 (rsp->gp_flags & RCU_GP_FLAG_FQS) ||
1355 (!ACCESS_ONCE(rnp->qsmask) && 1355 (!ACCESS_ONCE(rnp->qsmask) &&
1356 !rcu_preempt_blocked_readers_cgp(rnp)), 1356 !rcu_preempt_blocked_readers_cgp(rnp)),
1357 j); 1357 j);
1358 /* If grace period done, leave loop. */ 1358 /* If grace period done, leave loop. */
1359 if (!ACCESS_ONCE(rnp->qsmask) && 1359 if (!ACCESS_ONCE(rnp->qsmask) &&
1360 !rcu_preempt_blocked_readers_cgp(rnp)) 1360 !rcu_preempt_blocked_readers_cgp(rnp))
1361 break; 1361 break;
1362 /* If time for quiescent-state forcing, do it. */ 1362 /* If time for quiescent-state forcing, do it. */
1363 if (ret == 0 || (rsp->gp_flags & RCU_GP_FLAG_FQS)) { 1363 if (ret == 0 || (rsp->gp_flags & RCU_GP_FLAG_FQS)) {
1364 fqs_state = rcu_gp_fqs(rsp, fqs_state); 1364 fqs_state = rcu_gp_fqs(rsp, fqs_state);
1365 cond_resched(); 1365 cond_resched();
1366 } else { 1366 } else {
1367 /* Deal with stray signal. */ 1367 /* Deal with stray signal. */
1368 cond_resched(); 1368 cond_resched();
1369 flush_signals(current); 1369 flush_signals(current);
1370 } 1370 }
1371 j = jiffies_till_next_fqs; 1371 j = jiffies_till_next_fqs;
1372 if (j > HZ) { 1372 if (j > HZ) {
1373 j = HZ; 1373 j = HZ;
1374 jiffies_till_next_fqs = HZ; 1374 jiffies_till_next_fqs = HZ;
1375 } else if (j < 1) { 1375 } else if (j < 1) {
1376 j = 1; 1376 j = 1;
1377 jiffies_till_next_fqs = 1; 1377 jiffies_till_next_fqs = 1;
1378 } 1378 }
1379 } 1379 }
1380 1380
1381 /* Handle grace-period end. */ 1381 /* Handle grace-period end. */
1382 rcu_gp_cleanup(rsp); 1382 rcu_gp_cleanup(rsp);
1383 } 1383 }
1384 } 1384 }
1385 1385
1386 /* 1386 /*
1387 * Start a new RCU grace period if warranted, re-initializing the hierarchy 1387 * Start a new RCU grace period if warranted, re-initializing the hierarchy
1388 * in preparation for detecting the next grace period. The caller must hold 1388 * in preparation for detecting the next grace period. The caller must hold
1389 * the root node's ->lock, which is released before return. Hard irqs must 1389 * the root node's ->lock, which is released before return. Hard irqs must
1390 * be disabled. 1390 * be disabled.
1391 * 1391 *
1392 * Note that it is legal for a dying CPU (which is marked as offline) to 1392 * Note that it is legal for a dying CPU (which is marked as offline) to
1393 * invoke this function. This can happen when the dying CPU reports its 1393 * invoke this function. This can happen when the dying CPU reports its
1394 * quiescent state. 1394 * quiescent state.
1395 */ 1395 */
1396 static void 1396 static void
1397 rcu_start_gp(struct rcu_state *rsp, unsigned long flags) 1397 rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
1398 __releases(rcu_get_root(rsp)->lock) 1398 __releases(rcu_get_root(rsp)->lock)
1399 { 1399 {
1400 struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 1400 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1401 struct rcu_node *rnp = rcu_get_root(rsp); 1401 struct rcu_node *rnp = rcu_get_root(rsp);
1402 1402
1403 if (!rsp->gp_kthread || 1403 if (!rsp->gp_kthread ||
1404 !cpu_needs_another_gp(rsp, rdp)) { 1404 !cpu_needs_another_gp(rsp, rdp)) {
1405 /* 1405 /*
1406 * Either we have not yet spawned the grace-period 1406 * Either we have not yet spawned the grace-period
1407 * task or this CPU does not need another grace period. 1407 * task or this CPU does not need another grace period.
1408 * Either way, don't start a new grace period. 1408 * Either way, don't start a new grace period.
1409 */ 1409 */
1410 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1410 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1411 return; 1411 return;
1412 } 1412 }
1413 1413
1414 rsp->gp_flags = RCU_GP_FLAG_INIT; 1414 rsp->gp_flags = RCU_GP_FLAG_INIT;
1415 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1415 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1416 wake_up(&rsp->gp_wq); 1416 wake_up(&rsp->gp_wq);
1417 } 1417 }
1418 1418
1419 /* 1419 /*
1420 * Report a full set of quiescent states to the specified rcu_state 1420 * Report a full set of quiescent states to the specified rcu_state
1421 * data structure. This involves cleaning up after the prior grace 1421 * data structure. This involves cleaning up after the prior grace
1422 * period and letting rcu_start_gp() start up the next grace period 1422 * period and letting rcu_start_gp() start up the next grace period
1423 * if one is needed. Note that the caller must hold rnp->lock, as 1423 * if one is needed. Note that the caller must hold rnp->lock, as
1424 * required by rcu_start_gp(), which will release it. 1424 * required by rcu_start_gp(), which will release it.
1425 */ 1425 */
1426 static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) 1426 static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
1427 __releases(rcu_get_root(rsp)->lock) 1427 __releases(rcu_get_root(rsp)->lock)
1428 { 1428 {
1429 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 1429 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
1430 raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); 1430 raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
1431 wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */ 1431 wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
1432 } 1432 }
1433 1433
1434 /* 1434 /*
1435 * Similar to rcu_report_qs_rdp(), for which it is a helper function. 1435 * Similar to rcu_report_qs_rdp(), for which it is a helper function.
1436 * Allows quiescent states for a group of CPUs to be reported at one go 1436 * Allows quiescent states for a group of CPUs to be reported at one go
1437 * to the specified rcu_node structure, though all the CPUs in the group 1437 * to the specified rcu_node structure, though all the CPUs in the group
1438 * must be represented by the same rcu_node structure (which need not be 1438 * must be represented by the same rcu_node structure (which need not be
1439 * a leaf rcu_node structure, though it often will be). That structure's 1439 * a leaf rcu_node structure, though it often will be). That structure's
1440 * lock must be held upon entry, and it is released before return. 1440 * lock must be held upon entry, and it is released before return.
1441 */ 1441 */
1442 static void 1442 static void
1443 rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, 1443 rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
1444 struct rcu_node *rnp, unsigned long flags) 1444 struct rcu_node *rnp, unsigned long flags)
1445 __releases(rnp->lock) 1445 __releases(rnp->lock)
1446 { 1446 {
1447 struct rcu_node *rnp_c; 1447 struct rcu_node *rnp_c;
1448 1448
1449 /* Walk up the rcu_node hierarchy. */ 1449 /* Walk up the rcu_node hierarchy. */
1450 for (;;) { 1450 for (;;) {
1451 if (!(rnp->qsmask & mask)) { 1451 if (!(rnp->qsmask & mask)) {
1452 1452
1453 /* Our bit has already been cleared, so done. */ 1453 /* Our bit has already been cleared, so done. */
1454 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1454 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1455 return; 1455 return;
1456 } 1456 }
1457 rnp->qsmask &= ~mask; 1457 rnp->qsmask &= ~mask;
1458 trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, 1458 trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
1459 mask, rnp->qsmask, rnp->level, 1459 mask, rnp->qsmask, rnp->level,
1460 rnp->grplo, rnp->grphi, 1460 rnp->grplo, rnp->grphi,
1461 !!rnp->gp_tasks); 1461 !!rnp->gp_tasks);
1462 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { 1462 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
1463 1463
1464 /* Other bits still set at this level, so done. */ 1464 /* Other bits still set at this level, so done. */
1465 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1465 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1466 return; 1466 return;
1467 } 1467 }
1468 mask = rnp->grpmask; 1468 mask = rnp->grpmask;
1469 if (rnp->parent == NULL) { 1469 if (rnp->parent == NULL) {
1470 1470
1471 /* No more levels. Exit loop holding root lock. */ 1471 /* No more levels. Exit loop holding root lock. */
1472 1472
1473 break; 1473 break;
1474 } 1474 }
1475 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1475 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1476 rnp_c = rnp; 1476 rnp_c = rnp;
1477 rnp = rnp->parent; 1477 rnp = rnp->parent;
1478 raw_spin_lock_irqsave(&rnp->lock, flags); 1478 raw_spin_lock_irqsave(&rnp->lock, flags);
1479 WARN_ON_ONCE(rnp_c->qsmask); 1479 WARN_ON_ONCE(rnp_c->qsmask);
1480 } 1480 }
1481 1481
1482 /* 1482 /*
1483 * Get here if we are the last CPU to pass through a quiescent 1483 * Get here if we are the last CPU to pass through a quiescent
1484 * state for this grace period. Invoke rcu_report_qs_rsp() 1484 * state for this grace period. Invoke rcu_report_qs_rsp()
1485 * to clean up and start the next grace period if one is needed. 1485 * to clean up and start the next grace period if one is needed.
1486 */ 1486 */
1487 rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */ 1487 rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */
1488 } 1488 }
1489 1489
1490 /* 1490 /*
1491 * Record a quiescent state for the specified CPU to that CPU's rcu_data 1491 * Record a quiescent state for the specified CPU to that CPU's rcu_data
1492 * structure. This must be either called from the specified CPU, or 1492 * structure. This must be either called from the specified CPU, or
1493 * called when the specified CPU is known to be offline (and when it is 1493 * called when the specified CPU is known to be offline (and when it is
1494 * also known that no other CPU is concurrently trying to help the offline 1494 * also known that no other CPU is concurrently trying to help the offline
1495 * CPU). The lastcomp argument is used to make sure we are still in the 1495 * CPU). The lastcomp argument is used to make sure we are still in the
1496 * grace period of interest. We don't want to end the current grace period 1496 * grace period of interest. We don't want to end the current grace period
1497 * based on quiescent states detected in an earlier grace period! 1497 * based on quiescent states detected in an earlier grace period!
1498 */ 1498 */
1499 static void 1499 static void
1500 rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) 1500 rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
1501 { 1501 {
1502 unsigned long flags; 1502 unsigned long flags;
1503 unsigned long mask; 1503 unsigned long mask;
1504 struct rcu_node *rnp; 1504 struct rcu_node *rnp;
1505 1505
1506 rnp = rdp->mynode; 1506 rnp = rdp->mynode;
1507 raw_spin_lock_irqsave(&rnp->lock, flags); 1507 raw_spin_lock_irqsave(&rnp->lock, flags);
1508 if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum || 1508 if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum ||
1509 rnp->completed == rnp->gpnum) { 1509 rnp->completed == rnp->gpnum) {
1510 1510
1511 /* 1511 /*
1512 * The grace period in which this quiescent state was 1512 * The grace period in which this quiescent state was
1513 * recorded has ended, so don't report it upwards. 1513 * recorded has ended, so don't report it upwards.
1514 * We will instead need a new quiescent state that lies 1514 * We will instead need a new quiescent state that lies
1515 * within the current grace period. 1515 * within the current grace period.
1516 */ 1516 */
1517 rdp->passed_quiesce = 0; /* need qs for new gp. */ 1517 rdp->passed_quiesce = 0; /* need qs for new gp. */
1518 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1518 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1519 return; 1519 return;
1520 } 1520 }
1521 mask = rdp->grpmask; 1521 mask = rdp->grpmask;
1522 if ((rnp->qsmask & mask) == 0) { 1522 if ((rnp->qsmask & mask) == 0) {
1523 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1523 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1524 } else { 1524 } else {
1525 rdp->qs_pending = 0; 1525 rdp->qs_pending = 0;
1526 1526
1527 /* 1527 /*
1528 * This GP can't end until cpu checks in, so all of our 1528 * This GP can't end until cpu checks in, so all of our
1529 * callbacks can be processed during the next GP. 1529 * callbacks can be processed during the next GP.
1530 */ 1530 */
1531 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 1531 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
1532 1532
1533 rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */ 1533 rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
1534 } 1534 }
1535 } 1535 }
1536 1536
1537 /* 1537 /*
1538 * Check to see if there is a new grace period of which this CPU 1538 * Check to see if there is a new grace period of which this CPU
1539 * is not yet aware, and if so, set up local rcu_data state for it. 1539 * is not yet aware, and if so, set up local rcu_data state for it.
1540 * Otherwise, see if this CPU has just passed through its first 1540 * Otherwise, see if this CPU has just passed through its first
1541 * quiescent state for this grace period, and record that fact if so. 1541 * quiescent state for this grace period, and record that fact if so.
1542 */ 1542 */
1543 static void 1543 static void
1544 rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) 1544 rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
1545 { 1545 {
1546 /* If there is now a new grace period, record and return. */ 1546 /* If there is now a new grace period, record and return. */
1547 if (check_for_new_grace_period(rsp, rdp)) 1547 if (check_for_new_grace_period(rsp, rdp))
1548 return; 1548 return;
1549 1549
1550 /* 1550 /*
1551 * Does this CPU still need to do its part for current grace period? 1551 * Does this CPU still need to do its part for current grace period?
1552 * If no, return and let the other CPUs do their part as well. 1552 * If no, return and let the other CPUs do their part as well.
1553 */ 1553 */
1554 if (!rdp->qs_pending) 1554 if (!rdp->qs_pending)
1555 return; 1555 return;
1556 1556
1557 /* 1557 /*
1558 * Was there a quiescent state since the beginning of the grace 1558 * Was there a quiescent state since the beginning of the grace
1559 * period? If no, then exit and wait for the next call. 1559 * period? If no, then exit and wait for the next call.
1560 */ 1560 */
1561 if (!rdp->passed_quiesce) 1561 if (!rdp->passed_quiesce)
1562 return; 1562 return;
1563 1563
1564 /* 1564 /*
1565 * Tell RCU we are done (but rcu_report_qs_rdp() will be the 1565 * Tell RCU we are done (but rcu_report_qs_rdp() will be the
1566 * judge of that). 1566 * judge of that).
1567 */ 1567 */
1568 rcu_report_qs_rdp(rdp->cpu, rsp, rdp); 1568 rcu_report_qs_rdp(rdp->cpu, rsp, rdp);
1569 } 1569 }
1570 1570
1571 #ifdef CONFIG_HOTPLUG_CPU 1571 #ifdef CONFIG_HOTPLUG_CPU
1572 1572
1573 /* 1573 /*
1574 * Send the specified CPU's RCU callbacks to the orphanage. The 1574 * Send the specified CPU's RCU callbacks to the orphanage. The
1575 * specified CPU must be offline, and the caller must hold the 1575 * specified CPU must be offline, and the caller must hold the
1576 * ->onofflock. 1576 * ->orphan_lock.
1577 */ 1577 */
1578 static void 1578 static void
1579 rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, 1579 rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1580 struct rcu_node *rnp, struct rcu_data *rdp) 1580 struct rcu_node *rnp, struct rcu_data *rdp)
1581 { 1581 {
1582 /* 1582 /*
1583 * Orphan the callbacks. First adjust the counts. This is safe 1583 * Orphan the callbacks. First adjust the counts. This is safe
1584 * because _rcu_barrier() excludes CPU-hotplug operations, so it 1584 * because _rcu_barrier() excludes CPU-hotplug operations, so it
1585 * cannot be running now. Thus no memory barrier is required. 1585 * cannot be running now. Thus no memory barrier is required.
1586 */ 1586 */
1587 if (rdp->nxtlist != NULL) { 1587 if (rdp->nxtlist != NULL) {
1588 rsp->qlen_lazy += rdp->qlen_lazy; 1588 rsp->qlen_lazy += rdp->qlen_lazy;
1589 rsp->qlen += rdp->qlen; 1589 rsp->qlen += rdp->qlen;
1590 rdp->n_cbs_orphaned += rdp->qlen; 1590 rdp->n_cbs_orphaned += rdp->qlen;
1591 rdp->qlen_lazy = 0; 1591 rdp->qlen_lazy = 0;
1592 ACCESS_ONCE(rdp->qlen) = 0; 1592 ACCESS_ONCE(rdp->qlen) = 0;
1593 } 1593 }
1594 1594
1595 /* 1595 /*
1596 * Next, move those callbacks still needing a grace period to 1596 * Next, move those callbacks still needing a grace period to
1597 * the orphanage, where some other CPU will pick them up. 1597 * the orphanage, where some other CPU will pick them up.
1598 * Some of the callbacks might have gone partway through a grace 1598 * Some of the callbacks might have gone partway through a grace
1599 * period, but that is too bad. They get to start over because we 1599 * period, but that is too bad. They get to start over because we
1600 * cannot assume that grace periods are synchronized across CPUs. 1600 * cannot assume that grace periods are synchronized across CPUs.
1601 * We don't bother updating the ->nxttail[] array yet, instead 1601 * We don't bother updating the ->nxttail[] array yet, instead
1602 * we just reset the whole thing later on. 1602 * we just reset the whole thing later on.
1603 */ 1603 */
1604 if (*rdp->nxttail[RCU_DONE_TAIL] != NULL) { 1604 if (*rdp->nxttail[RCU_DONE_TAIL] != NULL) {
1605 *rsp->orphan_nxttail = *rdp->nxttail[RCU_DONE_TAIL]; 1605 *rsp->orphan_nxttail = *rdp->nxttail[RCU_DONE_TAIL];
1606 rsp->orphan_nxttail = rdp->nxttail[RCU_NEXT_TAIL]; 1606 rsp->orphan_nxttail = rdp->nxttail[RCU_NEXT_TAIL];
1607 *rdp->nxttail[RCU_DONE_TAIL] = NULL; 1607 *rdp->nxttail[RCU_DONE_TAIL] = NULL;
1608 } 1608 }
1609 1609
1610 /* 1610 /*
1611 * Then move the ready-to-invoke callbacks to the orphanage, 1611 * Then move the ready-to-invoke callbacks to the orphanage,
1612 * where some other CPU will pick them up. These will not be 1612 * where some other CPU will pick them up. These will not be
1613 * required to pass though another grace period: They are done. 1613 * required to pass though another grace period: They are done.
1614 */ 1614 */
1615 if (rdp->nxtlist != NULL) { 1615 if (rdp->nxtlist != NULL) {
1616 *rsp->orphan_donetail = rdp->nxtlist; 1616 *rsp->orphan_donetail = rdp->nxtlist;
1617 rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL]; 1617 rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL];
1618 } 1618 }
1619 1619
1620 /* Finally, initialize the rcu_data structure's list to empty. */ 1620 /* Finally, initialize the rcu_data structure's list to empty. */
1621 init_callback_list(rdp); 1621 init_callback_list(rdp);
1622 } 1622 }
1623 1623
1624 /* 1624 /*
1625 * Adopt the RCU callbacks from the specified rcu_state structure's 1625 * Adopt the RCU callbacks from the specified rcu_state structure's
1626 * orphanage. The caller must hold the ->onofflock. 1626 * orphanage. The caller must hold the ->orphan_lock.
1627 */ 1627 */
1628 static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) 1628 static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
1629 { 1629 {
1630 int i; 1630 int i;
1631 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); 1631 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
1632 1632
1633 /* Do the accounting first. */ 1633 /* Do the accounting first. */
1634 rdp->qlen_lazy += rsp->qlen_lazy; 1634 rdp->qlen_lazy += rsp->qlen_lazy;
1635 rdp->qlen += rsp->qlen; 1635 rdp->qlen += rsp->qlen;
1636 rdp->n_cbs_adopted += rsp->qlen; 1636 rdp->n_cbs_adopted += rsp->qlen;
1637 if (rsp->qlen_lazy != rsp->qlen) 1637 if (rsp->qlen_lazy != rsp->qlen)
1638 rcu_idle_count_callbacks_posted(); 1638 rcu_idle_count_callbacks_posted();
1639 rsp->qlen_lazy = 0; 1639 rsp->qlen_lazy = 0;
1640 rsp->qlen = 0; 1640 rsp->qlen = 0;
1641 1641
1642 /* 1642 /*
1643 * We do not need a memory barrier here because the only way we 1643 * We do not need a memory barrier here because the only way we
1644 * can get here if there is an rcu_barrier() in flight is if 1644 * can get here if there is an rcu_barrier() in flight is if
1645 * we are the task doing the rcu_barrier(). 1645 * we are the task doing the rcu_barrier().
1646 */ 1646 */
1647 1647
1648 /* First adopt the ready-to-invoke callbacks. */ 1648 /* First adopt the ready-to-invoke callbacks. */
1649 if (rsp->orphan_donelist != NULL) { 1649 if (rsp->orphan_donelist != NULL) {
1650 *rsp->orphan_donetail = *rdp->nxttail[RCU_DONE_TAIL]; 1650 *rsp->orphan_donetail = *rdp->nxttail[RCU_DONE_TAIL];
1651 *rdp->nxttail[RCU_DONE_TAIL] = rsp->orphan_donelist; 1651 *rdp->nxttail[RCU_DONE_TAIL] = rsp->orphan_donelist;
1652 for (i = RCU_NEXT_SIZE - 1; i >= RCU_DONE_TAIL; i--) 1652 for (i = RCU_NEXT_SIZE - 1; i >= RCU_DONE_TAIL; i--)
1653 if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL]) 1653 if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
1654 rdp->nxttail[i] = rsp->orphan_donetail; 1654 rdp->nxttail[i] = rsp->orphan_donetail;
1655 rsp->orphan_donelist = NULL; 1655 rsp->orphan_donelist = NULL;
1656 rsp->orphan_donetail = &rsp->orphan_donelist; 1656 rsp->orphan_donetail = &rsp->orphan_donelist;
1657 } 1657 }
1658 1658
1659 /* And then adopt the callbacks that still need a grace period. */ 1659 /* And then adopt the callbacks that still need a grace period. */
1660 if (rsp->orphan_nxtlist != NULL) { 1660 if (rsp->orphan_nxtlist != NULL) {
1661 *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxtlist; 1661 *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxtlist;
1662 rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxttail; 1662 rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxttail;
1663 rsp->orphan_nxtlist = NULL; 1663 rsp->orphan_nxtlist = NULL;
1664 rsp->orphan_nxttail = &rsp->orphan_nxtlist; 1664 rsp->orphan_nxttail = &rsp->orphan_nxtlist;
1665 } 1665 }
1666 } 1666 }
1667 1667
1668 /* 1668 /*
1669 * Trace the fact that this CPU is going offline. 1669 * Trace the fact that this CPU is going offline.
1670 */ 1670 */
1671 static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) 1671 static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
1672 { 1672 {
1673 RCU_TRACE(unsigned long mask); 1673 RCU_TRACE(unsigned long mask);
1674 RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda)); 1674 RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda));
1675 RCU_TRACE(struct rcu_node *rnp = rdp->mynode); 1675 RCU_TRACE(struct rcu_node *rnp = rdp->mynode);
1676 1676
1677 RCU_TRACE(mask = rdp->grpmask); 1677 RCU_TRACE(mask = rdp->grpmask);
1678 trace_rcu_grace_period(rsp->name, 1678 trace_rcu_grace_period(rsp->name,
1679 rnp->gpnum + 1 - !!(rnp->qsmask & mask), 1679 rnp->gpnum + 1 - !!(rnp->qsmask & mask),
1680 "cpuofl"); 1680 "cpuofl");
1681 } 1681 }
1682 1682
1683 /* 1683 /*
1684 * The CPU has been completely removed, and some other CPU is reporting 1684 * The CPU has been completely removed, and some other CPU is reporting
1685 * this fact from process context. Do the remainder of the cleanup, 1685 * this fact from process context. Do the remainder of the cleanup,
1686 * including orphaning the outgoing CPU's RCU callbacks, and also 1686 * including orphaning the outgoing CPU's RCU callbacks, and also
1687 * adopting them. There can only be one CPU hotplug operation at a time, 1687 * adopting them. There can only be one CPU hotplug operation at a time,
1688 * so no other CPU can be attempting to update rcu_cpu_kthread_task. 1688 * so no other CPU can be attempting to update rcu_cpu_kthread_task.
1689 */ 1689 */
1690 static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) 1690 static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1691 { 1691 {
1692 unsigned long flags; 1692 unsigned long flags;
1693 unsigned long mask; 1693 unsigned long mask;
1694 int need_report = 0; 1694 int need_report = 0;
1695 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 1695 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1696 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ 1696 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
1697 1697
1698 /* Adjust any no-longer-needed kthreads. */ 1698 /* Adjust any no-longer-needed kthreads. */
1699 rcu_boost_kthread_setaffinity(rnp, -1); 1699 rcu_boost_kthread_setaffinity(rnp, -1);
1700 1700
1701 /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */ 1701 /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */
1702 1702
1703 /* Exclude any attempts to start a new grace period. */ 1703 /* Exclude any attempts to start a new grace period. */
1704 mutex_lock(&rsp->onoff_mutex); 1704 mutex_lock(&rsp->onoff_mutex);
1705 raw_spin_lock_irqsave(&rsp->onofflock, flags); 1705 raw_spin_lock_irqsave(&rsp->orphan_lock, flags);
1706 1706
1707 /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ 1707 /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
1708 rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); 1708 rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
1709 rcu_adopt_orphan_cbs(rsp); 1709 rcu_adopt_orphan_cbs(rsp);
1710 1710
1711 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ 1711 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
1712 mask = rdp->grpmask; /* rnp->grplo is constant. */ 1712 mask = rdp->grpmask; /* rnp->grplo is constant. */
1713 do { 1713 do {
1714 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 1714 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
1715 rnp->qsmaskinit &= ~mask; 1715 rnp->qsmaskinit &= ~mask;
1716 if (rnp->qsmaskinit != 0) { 1716 if (rnp->qsmaskinit != 0) {
1717 if (rnp != rdp->mynode) 1717 if (rnp != rdp->mynode)
1718 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1718 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1719 break; 1719 break;
1720 } 1720 }
1721 if (rnp == rdp->mynode) 1721 if (rnp == rdp->mynode)
1722 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); 1722 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
1723 else 1723 else
1724 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1724 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1725 mask = rnp->grpmask; 1725 mask = rnp->grpmask;
1726 rnp = rnp->parent; 1726 rnp = rnp->parent;
1727 } while (rnp != NULL); 1727 } while (rnp != NULL);
1728 1728
1729 /* 1729 /*
1730 * We still hold the leaf rcu_node structure lock here, and 1730 * We still hold the leaf rcu_node structure lock here, and
1731 * irqs are still disabled. The reason for this subterfuge is 1731 * irqs are still disabled. The reason for this subterfuge is
1732 * because invoking rcu_report_unblock_qs_rnp() with ->onofflock 1732 * because invoking rcu_report_unblock_qs_rnp() with ->orphan_lock
1733 * held leads to deadlock. 1733 * held leads to deadlock.
1734 */ 1734 */
1735 raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ 1735 raw_spin_unlock(&rsp->orphan_lock); /* irqs remain disabled. */
1736 rnp = rdp->mynode; 1736 rnp = rdp->mynode;
1737 if (need_report & RCU_OFL_TASKS_NORM_GP) 1737 if (need_report & RCU_OFL_TASKS_NORM_GP)
1738 rcu_report_unblock_qs_rnp(rnp, flags); 1738 rcu_report_unblock_qs_rnp(rnp, flags);
1739 else 1739 else
1740 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1740 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1741 if (need_report & RCU_OFL_TASKS_EXP_GP) 1741 if (need_report & RCU_OFL_TASKS_EXP_GP)
1742 rcu_report_exp_rnp(rsp, rnp, true); 1742 rcu_report_exp_rnp(rsp, rnp, true);
1743 WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, 1743 WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
1744 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", 1744 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
1745 cpu, rdp->qlen, rdp->nxtlist); 1745 cpu, rdp->qlen, rdp->nxtlist);
1746 init_callback_list(rdp); 1746 init_callback_list(rdp);
1747 /* Disallow further callbacks on this CPU. */ 1747 /* Disallow further callbacks on this CPU. */
1748 rdp->nxttail[RCU_NEXT_TAIL] = NULL; 1748 rdp->nxttail[RCU_NEXT_TAIL] = NULL;
1749 mutex_unlock(&rsp->onoff_mutex); 1749 mutex_unlock(&rsp->onoff_mutex);
1750 } 1750 }
1751 1751
1752 #else /* #ifdef CONFIG_HOTPLUG_CPU */ 1752 #else /* #ifdef CONFIG_HOTPLUG_CPU */
1753 1753
1754 static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) 1754 static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
1755 { 1755 {
1756 } 1756 }
1757 1757
1758 static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) 1758 static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1759 { 1759 {
1760 } 1760 }
1761 1761
1762 #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ 1762 #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
1763 1763
1764 /* 1764 /*
1765 * Invoke any RCU callbacks that have made it to the end of their grace 1765 * Invoke any RCU callbacks that have made it to the end of their grace
1766 * period. Thottle as specified by rdp->blimit. 1766 * period. Thottle as specified by rdp->blimit.
1767 */ 1767 */
1768 static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) 1768 static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1769 { 1769 {
1770 unsigned long flags; 1770 unsigned long flags;
1771 struct rcu_head *next, *list, **tail; 1771 struct rcu_head *next, *list, **tail;
1772 int bl, count, count_lazy, i; 1772 int bl, count, count_lazy, i;
1773 1773
1774 /* If no callbacks are ready, just return.*/ 1774 /* If no callbacks are ready, just return.*/
1775 if (!cpu_has_callbacks_ready_to_invoke(rdp)) { 1775 if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
1776 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0); 1776 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
1777 trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist), 1777 trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
1778 need_resched(), is_idle_task(current), 1778 need_resched(), is_idle_task(current),
1779 rcu_is_callbacks_kthread()); 1779 rcu_is_callbacks_kthread());
1780 return; 1780 return;
1781 } 1781 }
1782 1782
1783 /* 1783 /*
1784 * Extract the list of ready callbacks, disabling to prevent 1784 * Extract the list of ready callbacks, disabling to prevent
1785 * races with call_rcu() from interrupt handlers. 1785 * races with call_rcu() from interrupt handlers.
1786 */ 1786 */
1787 local_irq_save(flags); 1787 local_irq_save(flags);
1788 WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); 1788 WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
1789 bl = rdp->blimit; 1789 bl = rdp->blimit;
1790 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, bl); 1790 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, bl);
1791 list = rdp->nxtlist; 1791 list = rdp->nxtlist;
1792 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; 1792 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
1793 *rdp->nxttail[RCU_DONE_TAIL] = NULL; 1793 *rdp->nxttail[RCU_DONE_TAIL] = NULL;
1794 tail = rdp->nxttail[RCU_DONE_TAIL]; 1794 tail = rdp->nxttail[RCU_DONE_TAIL];
1795 for (i = RCU_NEXT_SIZE - 1; i >= 0; i--) 1795 for (i = RCU_NEXT_SIZE - 1; i >= 0; i--)
1796 if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL]) 1796 if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
1797 rdp->nxttail[i] = &rdp->nxtlist; 1797 rdp->nxttail[i] = &rdp->nxtlist;
1798 local_irq_restore(flags); 1798 local_irq_restore(flags);
1799 1799
1800 /* Invoke callbacks. */ 1800 /* Invoke callbacks. */
1801 count = count_lazy = 0; 1801 count = count_lazy = 0;
1802 while (list) { 1802 while (list) {
1803 next = list->next; 1803 next = list->next;
1804 prefetch(next); 1804 prefetch(next);
1805 debug_rcu_head_unqueue(list); 1805 debug_rcu_head_unqueue(list);
1806 if (__rcu_reclaim(rsp->name, list)) 1806 if (__rcu_reclaim(rsp->name, list))
1807 count_lazy++; 1807 count_lazy++;
1808 list = next; 1808 list = next;
1809 /* Stop only if limit reached and CPU has something to do. */ 1809 /* Stop only if limit reached and CPU has something to do. */
1810 if (++count >= bl && 1810 if (++count >= bl &&
1811 (need_resched() || 1811 (need_resched() ||
1812 (!is_idle_task(current) && !rcu_is_callbacks_kthread()))) 1812 (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
1813 break; 1813 break;
1814 } 1814 }
1815 1815
1816 local_irq_save(flags); 1816 local_irq_save(flags);
1817 trace_rcu_batch_end(rsp->name, count, !!list, need_resched(), 1817 trace_rcu_batch_end(rsp->name, count, !!list, need_resched(),
1818 is_idle_task(current), 1818 is_idle_task(current),
1819 rcu_is_callbacks_kthread()); 1819 rcu_is_callbacks_kthread());
1820 1820
1821 /* Update count, and requeue any remaining callbacks. */ 1821 /* Update count, and requeue any remaining callbacks. */
1822 if (list != NULL) { 1822 if (list != NULL) {
1823 *tail = rdp->nxtlist; 1823 *tail = rdp->nxtlist;
1824 rdp->nxtlist = list; 1824 rdp->nxtlist = list;
1825 for (i = 0; i < RCU_NEXT_SIZE; i++) 1825 for (i = 0; i < RCU_NEXT_SIZE; i++)
1826 if (&rdp->nxtlist == rdp->nxttail[i]) 1826 if (&rdp->nxtlist == rdp->nxttail[i])
1827 rdp->nxttail[i] = tail; 1827 rdp->nxttail[i] = tail;
1828 else 1828 else
1829 break; 1829 break;
1830 } 1830 }
1831 smp_mb(); /* List handling before counting for rcu_barrier(). */ 1831 smp_mb(); /* List handling before counting for rcu_barrier(). */
1832 rdp->qlen_lazy -= count_lazy; 1832 rdp->qlen_lazy -= count_lazy;
1833 ACCESS_ONCE(rdp->qlen) -= count; 1833 ACCESS_ONCE(rdp->qlen) -= count;
1834 rdp->n_cbs_invoked += count; 1834 rdp->n_cbs_invoked += count;
1835 1835
1836 /* Reinstate batch limit if we have worked down the excess. */ 1836 /* Reinstate batch limit if we have worked down the excess. */
1837 if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) 1837 if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark)
1838 rdp->blimit = blimit; 1838 rdp->blimit = blimit;
1839 1839
1840 /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ 1840 /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
1841 if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) { 1841 if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) {
1842 rdp->qlen_last_fqs_check = 0; 1842 rdp->qlen_last_fqs_check = 0;
1843 rdp->n_force_qs_snap = rsp->n_force_qs; 1843 rdp->n_force_qs_snap = rsp->n_force_qs;
1844 } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) 1844 } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
1845 rdp->qlen_last_fqs_check = rdp->qlen; 1845 rdp->qlen_last_fqs_check = rdp->qlen;
1846 WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0)); 1846 WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0));
1847 1847
1848 local_irq_restore(flags); 1848 local_irq_restore(flags);
1849 1849
1850 /* Re-invoke RCU core processing if there are callbacks remaining. */ 1850 /* Re-invoke RCU core processing if there are callbacks remaining. */
1851 if (cpu_has_callbacks_ready_to_invoke(rdp)) 1851 if (cpu_has_callbacks_ready_to_invoke(rdp))
1852 invoke_rcu_core(); 1852 invoke_rcu_core();
1853 } 1853 }
1854 1854
1855 /* 1855 /*
1856 * Check to see if this CPU is in a non-context-switch quiescent state 1856 * Check to see if this CPU is in a non-context-switch quiescent state
1857 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). 1857 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
1858 * Also schedule RCU core processing. 1858 * Also schedule RCU core processing.
1859 * 1859 *
1860 * This function must be called from hardirq context. It is normally 1860 * This function must be called from hardirq context. It is normally
1861 * invoked from the scheduling-clock interrupt. If rcu_pending returns 1861 * invoked from the scheduling-clock interrupt. If rcu_pending returns
1862 * false, there is no point in invoking rcu_check_callbacks(). 1862 * false, there is no point in invoking rcu_check_callbacks().
1863 */ 1863 */
1864 void rcu_check_callbacks(int cpu, int user) 1864 void rcu_check_callbacks(int cpu, int user)
1865 { 1865 {
1866 trace_rcu_utilization("Start scheduler-tick"); 1866 trace_rcu_utilization("Start scheduler-tick");
1867 increment_cpu_stall_ticks(); 1867 increment_cpu_stall_ticks();
1868 if (user || rcu_is_cpu_rrupt_from_idle()) { 1868 if (user || rcu_is_cpu_rrupt_from_idle()) {
1869 1869
1870 /* 1870 /*
1871 * Get here if this CPU took its interrupt from user 1871 * Get here if this CPU took its interrupt from user
1872 * mode or from the idle loop, and if this is not a 1872 * mode or from the idle loop, and if this is not a
1873 * nested interrupt. In this case, the CPU is in 1873 * nested interrupt. In this case, the CPU is in
1874 * a quiescent state, so note it. 1874 * a quiescent state, so note it.
1875 * 1875 *
1876 * No memory barrier is required here because both 1876 * No memory barrier is required here because both
1877 * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local 1877 * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local
1878 * variables that other CPUs neither access nor modify, 1878 * variables that other CPUs neither access nor modify,
1879 * at least not while the corresponding CPU is online. 1879 * at least not while the corresponding CPU is online.
1880 */ 1880 */
1881 1881
1882 rcu_sched_qs(cpu); 1882 rcu_sched_qs(cpu);
1883 rcu_bh_qs(cpu); 1883 rcu_bh_qs(cpu);
1884 1884
1885 } else if (!in_softirq()) { 1885 } else if (!in_softirq()) {
1886 1886
1887 /* 1887 /*
1888 * Get here if this CPU did not take its interrupt from 1888 * Get here if this CPU did not take its interrupt from
1889 * softirq, in other words, if it is not interrupting 1889 * softirq, in other words, if it is not interrupting
1890 * a rcu_bh read-side critical section. This is an _bh 1890 * a rcu_bh read-side critical section. This is an _bh
1891 * critical section, so note it. 1891 * critical section, so note it.
1892 */ 1892 */
1893 1893
1894 rcu_bh_qs(cpu); 1894 rcu_bh_qs(cpu);
1895 } 1895 }
1896 rcu_preempt_check_callbacks(cpu); 1896 rcu_preempt_check_callbacks(cpu);
1897 if (rcu_pending(cpu)) 1897 if (rcu_pending(cpu))
1898 invoke_rcu_core(); 1898 invoke_rcu_core();
1899 trace_rcu_utilization("End scheduler-tick"); 1899 trace_rcu_utilization("End scheduler-tick");
1900 } 1900 }
1901 1901
1902 /* 1902 /*
1903 * Scan the leaf rcu_node structures, processing dyntick state for any that 1903 * Scan the leaf rcu_node structures, processing dyntick state for any that
1904 * have not yet encountered a quiescent state, using the function specified. 1904 * have not yet encountered a quiescent state, using the function specified.
1905 * Also initiate boosting for any threads blocked on the root rcu_node. 1905 * Also initiate boosting for any threads blocked on the root rcu_node.
1906 * 1906 *
1907 * The caller must have suppressed start of new grace periods. 1907 * The caller must have suppressed start of new grace periods.
1908 */ 1908 */
1909 static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) 1909 static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
1910 { 1910 {
1911 unsigned long bit; 1911 unsigned long bit;
1912 int cpu; 1912 int cpu;
1913 unsigned long flags; 1913 unsigned long flags;
1914 unsigned long mask; 1914 unsigned long mask;
1915 struct rcu_node *rnp; 1915 struct rcu_node *rnp;
1916 1916
1917 rcu_for_each_leaf_node(rsp, rnp) { 1917 rcu_for_each_leaf_node(rsp, rnp) {
1918 cond_resched(); 1918 cond_resched();
1919 mask = 0; 1919 mask = 0;
1920 raw_spin_lock_irqsave(&rnp->lock, flags); 1920 raw_spin_lock_irqsave(&rnp->lock, flags);
1921 if (!rcu_gp_in_progress(rsp)) { 1921 if (!rcu_gp_in_progress(rsp)) {
1922 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1922 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1923 return; 1923 return;
1924 } 1924 }
1925 if (rnp->qsmask == 0) { 1925 if (rnp->qsmask == 0) {
1926 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ 1926 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
1927 continue; 1927 continue;
1928 } 1928 }
1929 cpu = rnp->grplo; 1929 cpu = rnp->grplo;
1930 bit = 1; 1930 bit = 1;
1931 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { 1931 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
1932 if ((rnp->qsmask & bit) != 0 && 1932 if ((rnp->qsmask & bit) != 0 &&
1933 f(per_cpu_ptr(rsp->rda, cpu))) 1933 f(per_cpu_ptr(rsp->rda, cpu)))
1934 mask |= bit; 1934 mask |= bit;
1935 } 1935 }
1936 if (mask != 0) { 1936 if (mask != 0) {
1937 1937
1938 /* rcu_report_qs_rnp() releases rnp->lock. */ 1938 /* rcu_report_qs_rnp() releases rnp->lock. */
1939 rcu_report_qs_rnp(mask, rsp, rnp, flags); 1939 rcu_report_qs_rnp(mask, rsp, rnp, flags);
1940 continue; 1940 continue;
1941 } 1941 }
1942 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1942 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1943 } 1943 }
1944 rnp = rcu_get_root(rsp); 1944 rnp = rcu_get_root(rsp);
1945 if (rnp->qsmask == 0) { 1945 if (rnp->qsmask == 0) {
1946 raw_spin_lock_irqsave(&rnp->lock, flags); 1946 raw_spin_lock_irqsave(&rnp->lock, flags);
1947 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ 1947 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
1948 } 1948 }
1949 } 1949 }
1950 1950
1951 /* 1951 /*
1952 * Force quiescent states on reluctant CPUs, and also detect which 1952 * Force quiescent states on reluctant CPUs, and also detect which
1953 * CPUs are in dyntick-idle mode. 1953 * CPUs are in dyntick-idle mode.
1954 */ 1954 */
1955 static void force_quiescent_state(struct rcu_state *rsp) 1955 static void force_quiescent_state(struct rcu_state *rsp)
1956 { 1956 {
1957 unsigned long flags; 1957 unsigned long flags;
1958 bool ret; 1958 bool ret;
1959 struct rcu_node *rnp; 1959 struct rcu_node *rnp;
1960 struct rcu_node *rnp_old = NULL; 1960 struct rcu_node *rnp_old = NULL;
1961 1961
1962 /* Funnel through hierarchy to reduce memory contention. */ 1962 /* Funnel through hierarchy to reduce memory contention. */
1963 rnp = per_cpu_ptr(rsp->rda, raw_smp_processor_id())->mynode; 1963 rnp = per_cpu_ptr(rsp->rda, raw_smp_processor_id())->mynode;
1964 for (; rnp != NULL; rnp = rnp->parent) { 1964 for (; rnp != NULL; rnp = rnp->parent) {
1965 ret = (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) || 1965 ret = (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) ||
1966 !raw_spin_trylock(&rnp->fqslock); 1966 !raw_spin_trylock(&rnp->fqslock);
1967 if (rnp_old != NULL) 1967 if (rnp_old != NULL)
1968 raw_spin_unlock(&rnp_old->fqslock); 1968 raw_spin_unlock(&rnp_old->fqslock);
1969 if (ret) { 1969 if (ret) {
1970 rsp->n_force_qs_lh++; 1970 rsp->n_force_qs_lh++;
1971 return; 1971 return;
1972 } 1972 }
1973 rnp_old = rnp; 1973 rnp_old = rnp;
1974 } 1974 }
1975 /* rnp_old == rcu_get_root(rsp), rnp == NULL. */ 1975 /* rnp_old == rcu_get_root(rsp), rnp == NULL. */
1976 1976
1977 /* Reached the root of the rcu_node tree, acquire lock. */ 1977 /* Reached the root of the rcu_node tree, acquire lock. */
1978 raw_spin_lock_irqsave(&rnp_old->lock, flags); 1978 raw_spin_lock_irqsave(&rnp_old->lock, flags);
1979 raw_spin_unlock(&rnp_old->fqslock); 1979 raw_spin_unlock(&rnp_old->fqslock);
1980 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { 1980 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
1981 rsp->n_force_qs_lh++; 1981 rsp->n_force_qs_lh++;
1982 raw_spin_unlock_irqrestore(&rnp_old->lock, flags); 1982 raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
1983 return; /* Someone beat us to it. */ 1983 return; /* Someone beat us to it. */
1984 } 1984 }
1985 rsp->gp_flags |= RCU_GP_FLAG_FQS; 1985 rsp->gp_flags |= RCU_GP_FLAG_FQS;
1986 raw_spin_unlock_irqrestore(&rnp_old->lock, flags); 1986 raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
1987 wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */ 1987 wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
1988 } 1988 }
1989 1989
1990 /* 1990 /*
1991 * This does the RCU core processing work for the specified rcu_state 1991 * This does the RCU core processing work for the specified rcu_state
1992 * and rcu_data structures. This may be called only from the CPU to 1992 * and rcu_data structures. This may be called only from the CPU to
1993 * whom the rdp belongs. 1993 * whom the rdp belongs.
1994 */ 1994 */
1995 static void 1995 static void
1996 __rcu_process_callbacks(struct rcu_state *rsp) 1996 __rcu_process_callbacks(struct rcu_state *rsp)
1997 { 1997 {
1998 unsigned long flags; 1998 unsigned long flags;
1999 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); 1999 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
2000 2000
2001 WARN_ON_ONCE(rdp->beenonline == 0); 2001 WARN_ON_ONCE(rdp->beenonline == 0);
2002 2002
2003 /* 2003 /*
2004 * Advance callbacks in response to end of earlier grace 2004 * Advance callbacks in response to end of earlier grace
2005 * period that some other CPU ended. 2005 * period that some other CPU ended.
2006 */ 2006 */
2007 rcu_process_gp_end(rsp, rdp); 2007 rcu_process_gp_end(rsp, rdp);
2008 2008
2009 /* Update RCU state based on any recent quiescent states. */ 2009 /* Update RCU state based on any recent quiescent states. */
2010 rcu_check_quiescent_state(rsp, rdp); 2010 rcu_check_quiescent_state(rsp, rdp);
2011 2011
2012 /* Does this CPU require a not-yet-started grace period? */ 2012 /* Does this CPU require a not-yet-started grace period? */
2013 if (cpu_needs_another_gp(rsp, rdp)) { 2013 if (cpu_needs_another_gp(rsp, rdp)) {
2014 raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags); 2014 raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags);
2015 rcu_start_gp(rsp, flags); /* releases above lock */ 2015 rcu_start_gp(rsp, flags); /* releases above lock */
2016 } 2016 }
2017 2017
2018 /* If there are callbacks ready, invoke them. */ 2018 /* If there are callbacks ready, invoke them. */
2019 if (cpu_has_callbacks_ready_to_invoke(rdp)) 2019 if (cpu_has_callbacks_ready_to_invoke(rdp))
2020 invoke_rcu_callbacks(rsp, rdp); 2020 invoke_rcu_callbacks(rsp, rdp);
2021 } 2021 }
2022 2022
2023 /* 2023 /*
2024 * Do RCU core processing for the current CPU. 2024 * Do RCU core processing for the current CPU.
2025 */ 2025 */
2026 static void rcu_process_callbacks(struct softirq_action *unused) 2026 static void rcu_process_callbacks(struct softirq_action *unused)
2027 { 2027 {
2028 struct rcu_state *rsp; 2028 struct rcu_state *rsp;
2029 2029
2030 if (cpu_is_offline(smp_processor_id())) 2030 if (cpu_is_offline(smp_processor_id()))
2031 return; 2031 return;
2032 trace_rcu_utilization("Start RCU core"); 2032 trace_rcu_utilization("Start RCU core");
2033 for_each_rcu_flavor(rsp) 2033 for_each_rcu_flavor(rsp)
2034 __rcu_process_callbacks(rsp); 2034 __rcu_process_callbacks(rsp);
2035 trace_rcu_utilization("End RCU core"); 2035 trace_rcu_utilization("End RCU core");
2036 } 2036 }
2037 2037
2038 /* 2038 /*
2039 * Schedule RCU callback invocation. If the specified type of RCU 2039 * Schedule RCU callback invocation. If the specified type of RCU
2040 * does not support RCU priority boosting, just do a direct call, 2040 * does not support RCU priority boosting, just do a direct call,
2041 * otherwise wake up the per-CPU kernel kthread. Note that because we 2041 * otherwise wake up the per-CPU kernel kthread. Note that because we
2042 * are running on the current CPU with interrupts disabled, the 2042 * are running on the current CPU with interrupts disabled, the
2043 * rcu_cpu_kthread_task cannot disappear out from under us. 2043 * rcu_cpu_kthread_task cannot disappear out from under us.
2044 */ 2044 */
2045 static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) 2045 static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
2046 { 2046 {
2047 if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active))) 2047 if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active)))
2048 return; 2048 return;
2049 if (likely(!rsp->boost)) { 2049 if (likely(!rsp->boost)) {
2050 rcu_do_batch(rsp, rdp); 2050 rcu_do_batch(rsp, rdp);
2051 return; 2051 return;
2052 } 2052 }
2053 invoke_rcu_callbacks_kthread(); 2053 invoke_rcu_callbacks_kthread();
2054 } 2054 }
2055 2055
2056 static void invoke_rcu_core(void) 2056 static void invoke_rcu_core(void)
2057 { 2057 {
2058 raise_softirq(RCU_SOFTIRQ); 2058 raise_softirq(RCU_SOFTIRQ);
2059 } 2059 }
2060 2060
2061 /* 2061 /*
2062 * Handle any core-RCU processing required by a call_rcu() invocation. 2062 * Handle any core-RCU processing required by a call_rcu() invocation.
2063 */ 2063 */
2064 static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, 2064 static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
2065 struct rcu_head *head, unsigned long flags) 2065 struct rcu_head *head, unsigned long flags)
2066 { 2066 {
2067 /* 2067 /*
2068 * If called from an extended quiescent state, invoke the RCU 2068 * If called from an extended quiescent state, invoke the RCU
2069 * core in order to force a re-evaluation of RCU's idleness. 2069 * core in order to force a re-evaluation of RCU's idleness.
2070 */ 2070 */
2071 if (rcu_is_cpu_idle() && cpu_online(smp_processor_id())) 2071 if (rcu_is_cpu_idle() && cpu_online(smp_processor_id()))
2072 invoke_rcu_core(); 2072 invoke_rcu_core();
2073 2073
2074 /* If interrupts were disabled or CPU offline, don't invoke RCU core. */ 2074 /* If interrupts were disabled or CPU offline, don't invoke RCU core. */
2075 if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id())) 2075 if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id()))
2076 return; 2076 return;
2077 2077
2078 /* 2078 /*
2079 * Force the grace period if too many callbacks or too long waiting. 2079 * Force the grace period if too many callbacks or too long waiting.
2080 * Enforce hysteresis, and don't invoke force_quiescent_state() 2080 * Enforce hysteresis, and don't invoke force_quiescent_state()
2081 * if some other CPU has recently done so. Also, don't bother 2081 * if some other CPU has recently done so. Also, don't bother
2082 * invoking force_quiescent_state() if the newly enqueued callback 2082 * invoking force_quiescent_state() if the newly enqueued callback
2083 * is the only one waiting for a grace period to complete. 2083 * is the only one waiting for a grace period to complete.
2084 */ 2084 */
2085 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { 2085 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
2086 2086
2087 /* Are we ignoring a completed grace period? */ 2087 /* Are we ignoring a completed grace period? */
2088 rcu_process_gp_end(rsp, rdp); 2088 rcu_process_gp_end(rsp, rdp);
2089 check_for_new_grace_period(rsp, rdp); 2089 check_for_new_grace_period(rsp, rdp);
2090 2090
2091 /* Start a new grace period if one not already started. */ 2091 /* Start a new grace period if one not already started. */
2092 if (!rcu_gp_in_progress(rsp)) { 2092 if (!rcu_gp_in_progress(rsp)) {
2093 unsigned long nestflag; 2093 unsigned long nestflag;
2094 struct rcu_node *rnp_root = rcu_get_root(rsp); 2094 struct rcu_node *rnp_root = rcu_get_root(rsp);
2095 2095
2096 raw_spin_lock_irqsave(&rnp_root->lock, nestflag); 2096 raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
2097 rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ 2097 rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */
2098 } else { 2098 } else {
2099 /* Give the grace period a kick. */ 2099 /* Give the grace period a kick. */
2100 rdp->blimit = LONG_MAX; 2100 rdp->blimit = LONG_MAX;
2101 if (rsp->n_force_qs == rdp->n_force_qs_snap && 2101 if (rsp->n_force_qs == rdp->n_force_qs_snap &&
2102 *rdp->nxttail[RCU_DONE_TAIL] != head) 2102 *rdp->nxttail[RCU_DONE_TAIL] != head)
2103 force_quiescent_state(rsp); 2103 force_quiescent_state(rsp);
2104 rdp->n_force_qs_snap = rsp->n_force_qs; 2104 rdp->n_force_qs_snap = rsp->n_force_qs;
2105 rdp->qlen_last_fqs_check = rdp->qlen; 2105 rdp->qlen_last_fqs_check = rdp->qlen;
2106 } 2106 }
2107 } 2107 }
2108 } 2108 }
2109 2109
2110 static void 2110 static void
2111 __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), 2111 __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
2112 struct rcu_state *rsp, bool lazy) 2112 struct rcu_state *rsp, bool lazy)
2113 { 2113 {
2114 unsigned long flags; 2114 unsigned long flags;
2115 struct rcu_data *rdp; 2115 struct rcu_data *rdp;
2116 2116
2117 WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */ 2117 WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */
2118 debug_rcu_head_queue(head); 2118 debug_rcu_head_queue(head);
2119 head->func = func; 2119 head->func = func;
2120 head->next = NULL; 2120 head->next = NULL;
2121 2121
2122 /* 2122 /*
2123 * Opportunistically note grace-period endings and beginnings. 2123 * Opportunistically note grace-period endings and beginnings.
2124 * Note that we might see a beginning right after we see an 2124 * Note that we might see a beginning right after we see an
2125 * end, but never vice versa, since this CPU has to pass through 2125 * end, but never vice versa, since this CPU has to pass through
2126 * a quiescent state betweentimes. 2126 * a quiescent state betweentimes.
2127 */ 2127 */
2128 local_irq_save(flags); 2128 local_irq_save(flags);
2129 rdp = this_cpu_ptr(rsp->rda); 2129 rdp = this_cpu_ptr(rsp->rda);
2130 2130
2131 /* Add the callback to our list. */ 2131 /* Add the callback to our list. */
2132 if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL)) { 2132 if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL)) {
2133 /* _call_rcu() is illegal on offline CPU; leak the callback. */ 2133 /* _call_rcu() is illegal on offline CPU; leak the callback. */
2134 WARN_ON_ONCE(1); 2134 WARN_ON_ONCE(1);
2135 local_irq_restore(flags); 2135 local_irq_restore(flags);
2136 return; 2136 return;
2137 } 2137 }
2138 ACCESS_ONCE(rdp->qlen)++; 2138 ACCESS_ONCE(rdp->qlen)++;
2139 if (lazy) 2139 if (lazy)
2140 rdp->qlen_lazy++; 2140 rdp->qlen_lazy++;
2141 else 2141 else
2142 rcu_idle_count_callbacks_posted(); 2142 rcu_idle_count_callbacks_posted();
2143 smp_mb(); /* Count before adding callback for rcu_barrier(). */ 2143 smp_mb(); /* Count before adding callback for rcu_barrier(). */
2144 *rdp->nxttail[RCU_NEXT_TAIL] = head; 2144 *rdp->nxttail[RCU_NEXT_TAIL] = head;
2145 rdp->nxttail[RCU_NEXT_TAIL] = &head->next; 2145 rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
2146 2146
2147 if (__is_kfree_rcu_offset((unsigned long)func)) 2147 if (__is_kfree_rcu_offset((unsigned long)func))
2148 trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, 2148 trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
2149 rdp->qlen_lazy, rdp->qlen); 2149 rdp->qlen_lazy, rdp->qlen);
2150 else 2150 else
2151 trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); 2151 trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
2152 2152
2153 /* Go handle any RCU core processing required. */ 2153 /* Go handle any RCU core processing required. */
2154 __call_rcu_core(rsp, rdp, head, flags); 2154 __call_rcu_core(rsp, rdp, head, flags);
2155 local_irq_restore(flags); 2155 local_irq_restore(flags);
2156 } 2156 }
2157 2157
2158 /* 2158 /*
2159 * Queue an RCU-sched callback for invocation after a grace period. 2159 * Queue an RCU-sched callback for invocation after a grace period.
2160 */ 2160 */
2161 void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 2161 void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
2162 { 2162 {
2163 __call_rcu(head, func, &rcu_sched_state, 0); 2163 __call_rcu(head, func, &rcu_sched_state, 0);
2164 } 2164 }
2165 EXPORT_SYMBOL_GPL(call_rcu_sched); 2165 EXPORT_SYMBOL_GPL(call_rcu_sched);
2166 2166
2167 /* 2167 /*
2168 * Queue an RCU callback for invocation after a quicker grace period. 2168 * Queue an RCU callback for invocation after a quicker grace period.
2169 */ 2169 */
2170 void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 2170 void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
2171 { 2171 {
2172 __call_rcu(head, func, &rcu_bh_state, 0); 2172 __call_rcu(head, func, &rcu_bh_state, 0);
2173 } 2173 }
2174 EXPORT_SYMBOL_GPL(call_rcu_bh); 2174 EXPORT_SYMBOL_GPL(call_rcu_bh);
2175 2175
2176 /* 2176 /*
2177 * Because a context switch is a grace period for RCU-sched and RCU-bh, 2177 * Because a context switch is a grace period for RCU-sched and RCU-bh,
2178 * any blocking grace-period wait automatically implies a grace period 2178 * any blocking grace-period wait automatically implies a grace period
2179 * if there is only one CPU online at any point time during execution 2179 * if there is only one CPU online at any point time during execution
2180 * of either synchronize_sched() or synchronize_rcu_bh(). It is OK to 2180 * of either synchronize_sched() or synchronize_rcu_bh(). It is OK to
2181 * occasionally incorrectly indicate that there are multiple CPUs online 2181 * occasionally incorrectly indicate that there are multiple CPUs online
2182 * when there was in fact only one the whole time, as this just adds 2182 * when there was in fact only one the whole time, as this just adds
2183 * some overhead: RCU still operates correctly. 2183 * some overhead: RCU still operates correctly.
2184 */ 2184 */
2185 static inline int rcu_blocking_is_gp(void) 2185 static inline int rcu_blocking_is_gp(void)
2186 { 2186 {
2187 int ret; 2187 int ret;
2188 2188
2189 might_sleep(); /* Check for RCU read-side critical section. */ 2189 might_sleep(); /* Check for RCU read-side critical section. */
2190 preempt_disable(); 2190 preempt_disable();
2191 ret = num_online_cpus() <= 1; 2191 ret = num_online_cpus() <= 1;
2192 preempt_enable(); 2192 preempt_enable();
2193 return ret; 2193 return ret;
2194 } 2194 }
2195 2195
2196 /** 2196 /**
2197 * synchronize_sched - wait until an rcu-sched grace period has elapsed. 2197 * synchronize_sched - wait until an rcu-sched grace period has elapsed.
2198 * 2198 *
2199 * Control will return to the caller some time after a full rcu-sched 2199 * Control will return to the caller some time after a full rcu-sched
2200 * grace period has elapsed, in other words after all currently executing 2200 * grace period has elapsed, in other words after all currently executing
2201 * rcu-sched read-side critical sections have completed. These read-side 2201 * rcu-sched read-side critical sections have completed. These read-side
2202 * critical sections are delimited by rcu_read_lock_sched() and 2202 * critical sections are delimited by rcu_read_lock_sched() and
2203 * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(), 2203 * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(),
2204 * local_irq_disable(), and so on may be used in place of 2204 * local_irq_disable(), and so on may be used in place of
2205 * rcu_read_lock_sched(). 2205 * rcu_read_lock_sched().
2206 * 2206 *
2207 * This means that all preempt_disable code sequences, including NMI and 2207 * This means that all preempt_disable code sequences, including NMI and
2208 * hardware-interrupt handlers, in progress on entry will have completed 2208 * hardware-interrupt handlers, in progress on entry will have completed
2209 * before this primitive returns. However, this does not guarantee that 2209 * before this primitive returns. However, this does not guarantee that
2210 * softirq handlers will have completed, since in some kernels, these 2210 * softirq handlers will have completed, since in some kernels, these
2211 * handlers can run in process context, and can block. 2211 * handlers can run in process context, and can block.
2212 * 2212 *
2213 * This primitive provides the guarantees made by the (now removed) 2213 * This primitive provides the guarantees made by the (now removed)
2214 * synchronize_kernel() API. In contrast, synchronize_rcu() only 2214 * synchronize_kernel() API. In contrast, synchronize_rcu() only
2215 * guarantees that rcu_read_lock() sections will have completed. 2215 * guarantees that rcu_read_lock() sections will have completed.
2216 * In "classic RCU", these two guarantees happen to be one and 2216 * In "classic RCU", these two guarantees happen to be one and
2217 * the same, but can differ in realtime RCU implementations. 2217 * the same, but can differ in realtime RCU implementations.
2218 */ 2218 */
2219 void synchronize_sched(void) 2219 void synchronize_sched(void)
2220 { 2220 {
2221 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && 2221 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
2222 !lock_is_held(&rcu_lock_map) && 2222 !lock_is_held(&rcu_lock_map) &&
2223 !lock_is_held(&rcu_sched_lock_map), 2223 !lock_is_held(&rcu_sched_lock_map),
2224 "Illegal synchronize_sched() in RCU-sched read-side critical section"); 2224 "Illegal synchronize_sched() in RCU-sched read-side critical section");
2225 if (rcu_blocking_is_gp()) 2225 if (rcu_blocking_is_gp())
2226 return; 2226 return;
2227 wait_rcu_gp(call_rcu_sched); 2227 wait_rcu_gp(call_rcu_sched);
2228 } 2228 }
2229 EXPORT_SYMBOL_GPL(synchronize_sched); 2229 EXPORT_SYMBOL_GPL(synchronize_sched);
2230 2230
2231 /** 2231 /**
2232 * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. 2232 * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
2233 * 2233 *
2234 * Control will return to the caller some time after a full rcu_bh grace 2234 * Control will return to the caller some time after a full rcu_bh grace
2235 * period has elapsed, in other words after all currently executing rcu_bh 2235 * period has elapsed, in other words after all currently executing rcu_bh
2236 * read-side critical sections have completed. RCU read-side critical 2236 * read-side critical sections have completed. RCU read-side critical
2237 * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(), 2237 * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
2238 * and may be nested. 2238 * and may be nested.
2239 */ 2239 */
2240 void synchronize_rcu_bh(void) 2240 void synchronize_rcu_bh(void)
2241 { 2241 {
2242 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && 2242 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
2243 !lock_is_held(&rcu_lock_map) && 2243 !lock_is_held(&rcu_lock_map) &&
2244 !lock_is_held(&rcu_sched_lock_map), 2244 !lock_is_held(&rcu_sched_lock_map),
2245 "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section"); 2245 "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
2246 if (rcu_blocking_is_gp()) 2246 if (rcu_blocking_is_gp())
2247 return; 2247 return;
2248 wait_rcu_gp(call_rcu_bh); 2248 wait_rcu_gp(call_rcu_bh);
2249 } 2249 }
2250 EXPORT_SYMBOL_GPL(synchronize_rcu_bh); 2250 EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
2251 2251
2252 static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0); 2252 static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
2253 static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0); 2253 static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
2254 2254
2255 static int synchronize_sched_expedited_cpu_stop(void *data) 2255 static int synchronize_sched_expedited_cpu_stop(void *data)
2256 { 2256 {
2257 /* 2257 /*
2258 * There must be a full memory barrier on each affected CPU 2258 * There must be a full memory barrier on each affected CPU
2259 * between the time that try_stop_cpus() is called and the 2259 * between the time that try_stop_cpus() is called and the
2260 * time that it returns. 2260 * time that it returns.
2261 * 2261 *
2262 * In the current initial implementation of cpu_stop, the 2262 * In the current initial implementation of cpu_stop, the
2263 * above condition is already met when the control reaches 2263 * above condition is already met when the control reaches
2264 * this point and the following smp_mb() is not strictly 2264 * this point and the following smp_mb() is not strictly
2265 * necessary. Do smp_mb() anyway for documentation and 2265 * necessary. Do smp_mb() anyway for documentation and
2266 * robustness against future implementation changes. 2266 * robustness against future implementation changes.
2267 */ 2267 */
2268 smp_mb(); /* See above comment block. */ 2268 smp_mb(); /* See above comment block. */
2269 return 0; 2269 return 0;
2270 } 2270 }
2271 2271
2272 /** 2272 /**
2273 * synchronize_sched_expedited - Brute-force RCU-sched grace period 2273 * synchronize_sched_expedited - Brute-force RCU-sched grace period
2274 * 2274 *
2275 * Wait for an RCU-sched grace period to elapse, but use a "big hammer" 2275 * Wait for an RCU-sched grace period to elapse, but use a "big hammer"
2276 * approach to force the grace period to end quickly. This consumes 2276 * approach to force the grace period to end quickly. This consumes
2277 * significant time on all CPUs and is unfriendly to real-time workloads, 2277 * significant time on all CPUs and is unfriendly to real-time workloads,
2278 * so is thus not recommended for any sort of common-case code. In fact, 2278 * so is thus not recommended for any sort of common-case code. In fact,
2279 * if you are using synchronize_sched_expedited() in a loop, please 2279 * if you are using synchronize_sched_expedited() in a loop, please
2280 * restructure your code to batch your updates, and then use a single 2280 * restructure your code to batch your updates, and then use a single
2281 * synchronize_sched() instead. 2281 * synchronize_sched() instead.
2282 * 2282 *
2283 * Note that it is illegal to call this function while holding any lock 2283 * Note that it is illegal to call this function while holding any lock
2284 * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal 2284 * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal
2285 * to call this function from a CPU-hotplug notifier. Failing to observe 2285 * to call this function from a CPU-hotplug notifier. Failing to observe
2286 * these restriction will result in deadlock. 2286 * these restriction will result in deadlock.
2287 * 2287 *
2288 * This implementation can be thought of as an application of ticket 2288 * This implementation can be thought of as an application of ticket
2289 * locking to RCU, with sync_sched_expedited_started and 2289 * locking to RCU, with sync_sched_expedited_started and
2290 * sync_sched_expedited_done taking on the roles of the halves 2290 * sync_sched_expedited_done taking on the roles of the halves
2291 * of the ticket-lock word. Each task atomically increments 2291 * of the ticket-lock word. Each task atomically increments
2292 * sync_sched_expedited_started upon entry, snapshotting the old value, 2292 * sync_sched_expedited_started upon entry, snapshotting the old value,
2293 * then attempts to stop all the CPUs. If this succeeds, then each 2293 * then attempts to stop all the CPUs. If this succeeds, then each
2294 * CPU will have executed a context switch, resulting in an RCU-sched 2294 * CPU will have executed a context switch, resulting in an RCU-sched
2295 * grace period. We are then done, so we use atomic_cmpxchg() to 2295 * grace period. We are then done, so we use atomic_cmpxchg() to
2296 * update sync_sched_expedited_done to match our snapshot -- but 2296 * update sync_sched_expedited_done to match our snapshot -- but
2297 * only if someone else has not already advanced past our snapshot. 2297 * only if someone else has not already advanced past our snapshot.
2298 * 2298 *
2299 * On the other hand, if try_stop_cpus() fails, we check the value 2299 * On the other hand, if try_stop_cpus() fails, we check the value
2300 * of sync_sched_expedited_done. If it has advanced past our 2300 * of sync_sched_expedited_done. If it has advanced past our
2301 * initial snapshot, then someone else must have forced a grace period 2301 * initial snapshot, then someone else must have forced a grace period
2302 * some time after we took our snapshot. In this case, our work is 2302 * some time after we took our snapshot. In this case, our work is
2303 * done for us, and we can simply return. Otherwise, we try again, 2303 * done for us, and we can simply return. Otherwise, we try again,
2304 * but keep our initial snapshot for purposes of checking for someone 2304 * but keep our initial snapshot for purposes of checking for someone
2305 * doing our work for us. 2305 * doing our work for us.
2306 * 2306 *
2307 * If we fail too many times in a row, we fall back to synchronize_sched(). 2307 * If we fail too many times in a row, we fall back to synchronize_sched().
2308 */ 2308 */
2309 void synchronize_sched_expedited(void) 2309 void synchronize_sched_expedited(void)
2310 { 2310 {
2311 int firstsnap, s, snap, trycount = 0; 2311 int firstsnap, s, snap, trycount = 0;
2312 2312
2313 /* Note that atomic_inc_return() implies full memory barrier. */ 2313 /* Note that atomic_inc_return() implies full memory barrier. */
2314 firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started); 2314 firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started);
2315 get_online_cpus(); 2315 get_online_cpus();
2316 WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id())); 2316 WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
2317 2317
2318 /* 2318 /*
2319 * Each pass through the following loop attempts to force a 2319 * Each pass through the following loop attempts to force a
2320 * context switch on each CPU. 2320 * context switch on each CPU.
2321 */ 2321 */
2322 while (try_stop_cpus(cpu_online_mask, 2322 while (try_stop_cpus(cpu_online_mask,
2323 synchronize_sched_expedited_cpu_stop, 2323 synchronize_sched_expedited_cpu_stop,
2324 NULL) == -EAGAIN) { 2324 NULL) == -EAGAIN) {
2325 put_online_cpus(); 2325 put_online_cpus();
2326 2326
2327 /* No joy, try again later. Or just synchronize_sched(). */ 2327 /* No joy, try again later. Or just synchronize_sched(). */
2328 if (trycount++ < 10) { 2328 if (trycount++ < 10) {
2329 udelay(trycount * num_online_cpus()); 2329 udelay(trycount * num_online_cpus());
2330 } else { 2330 } else {
2331 synchronize_sched(); 2331 synchronize_sched();
2332 return; 2332 return;
2333 } 2333 }
2334 2334
2335 /* Check to see if someone else did our work for us. */ 2335 /* Check to see if someone else did our work for us. */
2336 s = atomic_read(&sync_sched_expedited_done); 2336 s = atomic_read(&sync_sched_expedited_done);
2337 if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) { 2337 if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) {
2338 smp_mb(); /* ensure test happens before caller kfree */ 2338 smp_mb(); /* ensure test happens before caller kfree */
2339 return; 2339 return;
2340 } 2340 }
2341 2341
2342 /* 2342 /*
2343 * Refetching sync_sched_expedited_started allows later 2343 * Refetching sync_sched_expedited_started allows later
2344 * callers to piggyback on our grace period. We subtract 2344 * callers to piggyback on our grace period. We subtract
2345 * 1 to get the same token that the last incrementer got. 2345 * 1 to get the same token that the last incrementer got.
2346 * We retry after they started, so our grace period works 2346 * We retry after they started, so our grace period works
2347 * for them, and they started after our first try, so their 2347 * for them, and they started after our first try, so their
2348 * grace period works for us. 2348 * grace period works for us.
2349 */ 2349 */
2350 get_online_cpus(); 2350 get_online_cpus();
2351 snap = atomic_read(&sync_sched_expedited_started); 2351 snap = atomic_read(&sync_sched_expedited_started);
2352 smp_mb(); /* ensure read is before try_stop_cpus(). */ 2352 smp_mb(); /* ensure read is before try_stop_cpus(). */
2353 } 2353 }
2354 2354
2355 /* 2355 /*
2356 * Everyone up to our most recent fetch is covered by our grace 2356 * Everyone up to our most recent fetch is covered by our grace
2357 * period. Update the counter, but only if our work is still 2357 * period. Update the counter, but only if our work is still
2358 * relevant -- which it won't be if someone who started later 2358 * relevant -- which it won't be if someone who started later
2359 * than we did beat us to the punch. 2359 * than we did beat us to the punch.
2360 */ 2360 */
2361 do { 2361 do {
2362 s = atomic_read(&sync_sched_expedited_done); 2362 s = atomic_read(&sync_sched_expedited_done);
2363 if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) { 2363 if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) {
2364 smp_mb(); /* ensure test happens before caller kfree */ 2364 smp_mb(); /* ensure test happens before caller kfree */
2365 break; 2365 break;
2366 } 2366 }
2367 } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s); 2367 } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s);
2368 2368
2369 put_online_cpus(); 2369 put_online_cpus();
2370 } 2370 }
2371 EXPORT_SYMBOL_GPL(synchronize_sched_expedited); 2371 EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
2372 2372
2373 /* 2373 /*
2374 * Check to see if there is any immediate RCU-related work to be done 2374 * Check to see if there is any immediate RCU-related work to be done
2375 * by the current CPU, for the specified type of RCU, returning 1 if so. 2375 * by the current CPU, for the specified type of RCU, returning 1 if so.
2376 * The checks are in order of increasing expense: checks that can be 2376 * The checks are in order of increasing expense: checks that can be
2377 * carried out against CPU-local state are performed first. However, 2377 * carried out against CPU-local state are performed first. However,
2378 * we must check for CPU stalls first, else we might not get a chance. 2378 * we must check for CPU stalls first, else we might not get a chance.
2379 */ 2379 */
2380 static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) 2380 static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
2381 { 2381 {
2382 struct rcu_node *rnp = rdp->mynode; 2382 struct rcu_node *rnp = rdp->mynode;
2383 2383
2384 rdp->n_rcu_pending++; 2384 rdp->n_rcu_pending++;
2385 2385
2386 /* Check for CPU stalls, if enabled. */ 2386 /* Check for CPU stalls, if enabled. */
2387 check_cpu_stall(rsp, rdp); 2387 check_cpu_stall(rsp, rdp);
2388 2388
2389 /* Is the RCU core waiting for a quiescent state from this CPU? */ 2389 /* Is the RCU core waiting for a quiescent state from this CPU? */
2390 if (rcu_scheduler_fully_active && 2390 if (rcu_scheduler_fully_active &&
2391 rdp->qs_pending && !rdp->passed_quiesce) { 2391 rdp->qs_pending && !rdp->passed_quiesce) {
2392 rdp->n_rp_qs_pending++; 2392 rdp->n_rp_qs_pending++;
2393 } else if (rdp->qs_pending && rdp->passed_quiesce) { 2393 } else if (rdp->qs_pending && rdp->passed_quiesce) {
2394 rdp->n_rp_report_qs++; 2394 rdp->n_rp_report_qs++;
2395 return 1; 2395 return 1;
2396 } 2396 }
2397 2397
2398 /* Does this CPU have callbacks ready to invoke? */ 2398 /* Does this CPU have callbacks ready to invoke? */
2399 if (cpu_has_callbacks_ready_to_invoke(rdp)) { 2399 if (cpu_has_callbacks_ready_to_invoke(rdp)) {
2400 rdp->n_rp_cb_ready++; 2400 rdp->n_rp_cb_ready++;
2401 return 1; 2401 return 1;
2402 } 2402 }
2403 2403
2404 /* Has RCU gone idle with this CPU needing another grace period? */ 2404 /* Has RCU gone idle with this CPU needing another grace period? */
2405 if (cpu_needs_another_gp(rsp, rdp)) { 2405 if (cpu_needs_another_gp(rsp, rdp)) {
2406 rdp->n_rp_cpu_needs_gp++; 2406 rdp->n_rp_cpu_needs_gp++;
2407 return 1; 2407 return 1;
2408 } 2408 }
2409 2409
2410 /* Has another RCU grace period completed? */ 2410 /* Has another RCU grace period completed? */
2411 if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */ 2411 if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */
2412 rdp->n_rp_gp_completed++; 2412 rdp->n_rp_gp_completed++;
2413 return 1; 2413 return 1;
2414 } 2414 }
2415 2415
2416 /* Has a new RCU grace period started? */ 2416 /* Has a new RCU grace period started? */
2417 if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */ 2417 if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */
2418 rdp->n_rp_gp_started++; 2418 rdp->n_rp_gp_started++;
2419 return 1; 2419 return 1;
2420 } 2420 }
2421 2421
2422 /* nothing to do */ 2422 /* nothing to do */
2423 rdp->n_rp_need_nothing++; 2423 rdp->n_rp_need_nothing++;
2424 return 0; 2424 return 0;
2425 } 2425 }
2426 2426
2427 /* 2427 /*
2428 * Check to see if there is any immediate RCU-related work to be done 2428 * Check to see if there is any immediate RCU-related work to be done
2429 * by the current CPU, returning 1 if so. This function is part of the 2429 * by the current CPU, returning 1 if so. This function is part of the
2430 * RCU implementation; it is -not- an exported member of the RCU API. 2430 * RCU implementation; it is -not- an exported member of the RCU API.
2431 */ 2431 */
2432 static int rcu_pending(int cpu) 2432 static int rcu_pending(int cpu)
2433 { 2433 {
2434 struct rcu_state *rsp; 2434 struct rcu_state *rsp;
2435 2435
2436 for_each_rcu_flavor(rsp) 2436 for_each_rcu_flavor(rsp)
2437 if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu))) 2437 if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu)))
2438 return 1; 2438 return 1;
2439 return 0; 2439 return 0;
2440 } 2440 }
2441 2441
2442 /* 2442 /*
2443 * Check to see if any future RCU-related work will need to be done 2443 * Check to see if any future RCU-related work will need to be done
2444 * by the current CPU, even if none need be done immediately, returning 2444 * by the current CPU, even if none need be done immediately, returning
2445 * 1 if so. 2445 * 1 if so.
2446 */ 2446 */
2447 static int rcu_cpu_has_callbacks(int cpu) 2447 static int rcu_cpu_has_callbacks(int cpu)
2448 { 2448 {
2449 struct rcu_state *rsp; 2449 struct rcu_state *rsp;
2450 2450
2451 /* RCU callbacks either ready or pending? */ 2451 /* RCU callbacks either ready or pending? */
2452 for_each_rcu_flavor(rsp) 2452 for_each_rcu_flavor(rsp)
2453 if (per_cpu_ptr(rsp->rda, cpu)->nxtlist) 2453 if (per_cpu_ptr(rsp->rda, cpu)->nxtlist)
2454 return 1; 2454 return 1;
2455 return 0; 2455 return 0;
2456 } 2456 }
2457 2457
2458 /* 2458 /*
2459 * Helper function for _rcu_barrier() tracing. If tracing is disabled, 2459 * Helper function for _rcu_barrier() tracing. If tracing is disabled,
2460 * the compiler is expected to optimize this away. 2460 * the compiler is expected to optimize this away.
2461 */ 2461 */
2462 static void _rcu_barrier_trace(struct rcu_state *rsp, char *s, 2462 static void _rcu_barrier_trace(struct rcu_state *rsp, char *s,
2463 int cpu, unsigned long done) 2463 int cpu, unsigned long done)
2464 { 2464 {
2465 trace_rcu_barrier(rsp->name, s, cpu, 2465 trace_rcu_barrier(rsp->name, s, cpu,
2466 atomic_read(&rsp->barrier_cpu_count), done); 2466 atomic_read(&rsp->barrier_cpu_count), done);
2467 } 2467 }
2468 2468
2469 /* 2469 /*
2470 * RCU callback function for _rcu_barrier(). If we are last, wake 2470 * RCU callback function for _rcu_barrier(). If we are last, wake
2471 * up the task executing _rcu_barrier(). 2471 * up the task executing _rcu_barrier().
2472 */ 2472 */
2473 static void rcu_barrier_callback(struct rcu_head *rhp) 2473 static void rcu_barrier_callback(struct rcu_head *rhp)
2474 { 2474 {
2475 struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head); 2475 struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head);
2476 struct rcu_state *rsp = rdp->rsp; 2476 struct rcu_state *rsp = rdp->rsp;
2477 2477
2478 if (atomic_dec_and_test(&rsp->barrier_cpu_count)) { 2478 if (atomic_dec_and_test(&rsp->barrier_cpu_count)) {
2479 _rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done); 2479 _rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done);
2480 complete(&rsp->barrier_completion); 2480 complete(&rsp->barrier_completion);
2481 } else { 2481 } else {
2482 _rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done); 2482 _rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done);
2483 } 2483 }
2484 } 2484 }
2485 2485
2486 /* 2486 /*
2487 * Called with preemption disabled, and from cross-cpu IRQ context. 2487 * Called with preemption disabled, and from cross-cpu IRQ context.
2488 */ 2488 */
2489 static void rcu_barrier_func(void *type) 2489 static void rcu_barrier_func(void *type)
2490 { 2490 {
2491 struct rcu_state *rsp = type; 2491 struct rcu_state *rsp = type;
2492 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); 2492 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
2493 2493
2494 _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done); 2494 _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done);
2495 atomic_inc(&rsp->barrier_cpu_count); 2495 atomic_inc(&rsp->barrier_cpu_count);
2496 rsp->call(&rdp->barrier_head, rcu_barrier_callback); 2496 rsp->call(&rdp->barrier_head, rcu_barrier_callback);
2497 } 2497 }
2498 2498
2499 /* 2499 /*
2500 * Orchestrate the specified type of RCU barrier, waiting for all 2500 * Orchestrate the specified type of RCU barrier, waiting for all
2501 * RCU callbacks of the specified type to complete. 2501 * RCU callbacks of the specified type to complete.
2502 */ 2502 */
2503 static void _rcu_barrier(struct rcu_state *rsp) 2503 static void _rcu_barrier(struct rcu_state *rsp)
2504 { 2504 {
2505 int cpu; 2505 int cpu;
2506 struct rcu_data *rdp; 2506 struct rcu_data *rdp;
2507 unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done); 2507 unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done);
2508 unsigned long snap_done; 2508 unsigned long snap_done;
2509 2509
2510 _rcu_barrier_trace(rsp, "Begin", -1, snap); 2510 _rcu_barrier_trace(rsp, "Begin", -1, snap);
2511 2511
2512 /* Take mutex to serialize concurrent rcu_barrier() requests. */ 2512 /* Take mutex to serialize concurrent rcu_barrier() requests. */
2513 mutex_lock(&rsp->barrier_mutex); 2513 mutex_lock(&rsp->barrier_mutex);
2514 2514
2515 /* 2515 /*
2516 * Ensure that all prior references, including to ->n_barrier_done, 2516 * Ensure that all prior references, including to ->n_barrier_done,
2517 * are ordered before the _rcu_barrier() machinery. 2517 * are ordered before the _rcu_barrier() machinery.
2518 */ 2518 */
2519 smp_mb(); /* See above block comment. */ 2519 smp_mb(); /* See above block comment. */
2520 2520
2521 /* 2521 /*
2522 * Recheck ->n_barrier_done to see if others did our work for us. 2522 * Recheck ->n_barrier_done to see if others did our work for us.
2523 * This means checking ->n_barrier_done for an even-to-odd-to-even 2523 * This means checking ->n_barrier_done for an even-to-odd-to-even
2524 * transition. The "if" expression below therefore rounds the old 2524 * transition. The "if" expression below therefore rounds the old
2525 * value up to the next even number and adds two before comparing. 2525 * value up to the next even number and adds two before comparing.
2526 */ 2526 */
2527 snap_done = ACCESS_ONCE(rsp->n_barrier_done); 2527 snap_done = ACCESS_ONCE(rsp->n_barrier_done);
2528 _rcu_barrier_trace(rsp, "Check", -1, snap_done); 2528 _rcu_barrier_trace(rsp, "Check", -1, snap_done);
2529 if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) { 2529 if (ULONG_CMP_GE(snap_done, ((snap + 1) & ~0x1) + 2)) {
2530 _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done); 2530 _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done);
2531 smp_mb(); /* caller's subsequent code after above check. */ 2531 smp_mb(); /* caller's subsequent code after above check. */
2532 mutex_unlock(&rsp->barrier_mutex); 2532 mutex_unlock(&rsp->barrier_mutex);
2533 return; 2533 return;
2534 } 2534 }
2535 2535
2536 /* 2536 /*
2537 * Increment ->n_barrier_done to avoid duplicate work. Use 2537 * Increment ->n_barrier_done to avoid duplicate work. Use
2538 * ACCESS_ONCE() to prevent the compiler from speculating 2538 * ACCESS_ONCE() to prevent the compiler from speculating
2539 * the increment to precede the early-exit check. 2539 * the increment to precede the early-exit check.
2540 */ 2540 */
2541 ACCESS_ONCE(rsp->n_barrier_done)++; 2541 ACCESS_ONCE(rsp->n_barrier_done)++;
2542 WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1); 2542 WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1);
2543 _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done); 2543 _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done);
2544 smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */ 2544 smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */
2545 2545
2546 /* 2546 /*
2547 * Initialize the count to one rather than to zero in order to 2547 * Initialize the count to one rather than to zero in order to
2548 * avoid a too-soon return to zero in case of a short grace period 2548 * avoid a too-soon return to zero in case of a short grace period
2549 * (or preemption of this task). Exclude CPU-hotplug operations 2549 * (or preemption of this task). Exclude CPU-hotplug operations
2550 * to ensure that no offline CPU has callbacks queued. 2550 * to ensure that no offline CPU has callbacks queued.
2551 */ 2551 */
2552 init_completion(&rsp->barrier_completion); 2552 init_completion(&rsp->barrier_completion);
2553 atomic_set(&rsp->barrier_cpu_count, 1); 2553 atomic_set(&rsp->barrier_cpu_count, 1);
2554 get_online_cpus(); 2554 get_online_cpus();
2555 2555
2556 /* 2556 /*
2557 * Force each CPU with callbacks to register a new callback. 2557 * Force each CPU with callbacks to register a new callback.
2558 * When that callback is invoked, we will know that all of the 2558 * When that callback is invoked, we will know that all of the
2559 * corresponding CPU's preceding callbacks have been invoked. 2559 * corresponding CPU's preceding callbacks have been invoked.
2560 */ 2560 */
2561 for_each_online_cpu(cpu) { 2561 for_each_online_cpu(cpu) {
2562 rdp = per_cpu_ptr(rsp->rda, cpu); 2562 rdp = per_cpu_ptr(rsp->rda, cpu);
2563 if (ACCESS_ONCE(rdp->qlen)) { 2563 if (ACCESS_ONCE(rdp->qlen)) {
2564 _rcu_barrier_trace(rsp, "OnlineQ", cpu, 2564 _rcu_barrier_trace(rsp, "OnlineQ", cpu,
2565 rsp->n_barrier_done); 2565 rsp->n_barrier_done);
2566 smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); 2566 smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
2567 } else { 2567 } else {
2568 _rcu_barrier_trace(rsp, "OnlineNQ", cpu, 2568 _rcu_barrier_trace(rsp, "OnlineNQ", cpu,
2569 rsp->n_barrier_done); 2569 rsp->n_barrier_done);
2570 } 2570 }
2571 } 2571 }
2572 put_online_cpus(); 2572 put_online_cpus();
2573 2573
2574 /* 2574 /*
2575 * Now that we have an rcu_barrier_callback() callback on each 2575 * Now that we have an rcu_barrier_callback() callback on each
2576 * CPU, and thus each counted, remove the initial count. 2576 * CPU, and thus each counted, remove the initial count.
2577 */ 2577 */
2578 if (atomic_dec_and_test(&rsp->barrier_cpu_count)) 2578 if (atomic_dec_and_test(&rsp->barrier_cpu_count))
2579 complete(&rsp->barrier_completion); 2579 complete(&rsp->barrier_completion);
2580 2580
2581 /* Increment ->n_barrier_done to prevent duplicate work. */ 2581 /* Increment ->n_barrier_done to prevent duplicate work. */
2582 smp_mb(); /* Keep increment after above mechanism. */ 2582 smp_mb(); /* Keep increment after above mechanism. */
2583 ACCESS_ONCE(rsp->n_barrier_done)++; 2583 ACCESS_ONCE(rsp->n_barrier_done)++;
2584 WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0); 2584 WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0);
2585 _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done); 2585 _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done);
2586 smp_mb(); /* Keep increment before caller's subsequent code. */ 2586 smp_mb(); /* Keep increment before caller's subsequent code. */
2587 2587
2588 /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ 2588 /* Wait for all rcu_barrier_callback() callbacks to be invoked. */
2589 wait_for_completion(&rsp->barrier_completion); 2589 wait_for_completion(&rsp->barrier_completion);
2590 2590
2591 /* Other rcu_barrier() invocations can now safely proceed. */ 2591 /* Other rcu_barrier() invocations can now safely proceed. */
2592 mutex_unlock(&rsp->barrier_mutex); 2592 mutex_unlock(&rsp->barrier_mutex);
2593 } 2593 }
2594 2594
2595 /** 2595 /**
2596 * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. 2596 * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
2597 */ 2597 */
2598 void rcu_barrier_bh(void) 2598 void rcu_barrier_bh(void)
2599 { 2599 {
2600 _rcu_barrier(&rcu_bh_state); 2600 _rcu_barrier(&rcu_bh_state);
2601 } 2601 }
2602 EXPORT_SYMBOL_GPL(rcu_barrier_bh); 2602 EXPORT_SYMBOL_GPL(rcu_barrier_bh);
2603 2603
2604 /** 2604 /**
2605 * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. 2605 * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
2606 */ 2606 */
2607 void rcu_barrier_sched(void) 2607 void rcu_barrier_sched(void)
2608 { 2608 {
2609 _rcu_barrier(&rcu_sched_state); 2609 _rcu_barrier(&rcu_sched_state);
2610 } 2610 }
2611 EXPORT_SYMBOL_GPL(rcu_barrier_sched); 2611 EXPORT_SYMBOL_GPL(rcu_barrier_sched);
2612 2612
2613 /* 2613 /*
2614 * Do boot-time initialization of a CPU's per-CPU RCU data. 2614 * Do boot-time initialization of a CPU's per-CPU RCU data.
2615 */ 2615 */
2616 static void __init 2616 static void __init
2617 rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) 2617 rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
2618 { 2618 {
2619 unsigned long flags; 2619 unsigned long flags;
2620 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2620 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2621 struct rcu_node *rnp = rcu_get_root(rsp); 2621 struct rcu_node *rnp = rcu_get_root(rsp);
2622 2622
2623 /* Set up local state, ensuring consistent view of global state. */ 2623 /* Set up local state, ensuring consistent view of global state. */
2624 raw_spin_lock_irqsave(&rnp->lock, flags); 2624 raw_spin_lock_irqsave(&rnp->lock, flags);
2625 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); 2625 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
2626 init_callback_list(rdp); 2626 init_callback_list(rdp);
2627 rdp->qlen_lazy = 0; 2627 rdp->qlen_lazy = 0;
2628 ACCESS_ONCE(rdp->qlen) = 0; 2628 ACCESS_ONCE(rdp->qlen) = 0;
2629 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 2629 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
2630 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); 2630 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
2631 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); 2631 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
2632 #ifdef CONFIG_RCU_USER_QS 2632 #ifdef CONFIG_RCU_USER_QS
2633 WARN_ON_ONCE(rdp->dynticks->in_user); 2633 WARN_ON_ONCE(rdp->dynticks->in_user);
2634 #endif 2634 #endif
2635 rdp->cpu = cpu; 2635 rdp->cpu = cpu;
2636 rdp->rsp = rsp; 2636 rdp->rsp = rsp;
2637 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2637 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2638 } 2638 }
2639 2639
2640 /* 2640 /*
2641 * Initialize a CPU's per-CPU RCU data. Note that only one online or 2641 * Initialize a CPU's per-CPU RCU data. Note that only one online or
2642 * offline event can be happening at a given time. Note also that we 2642 * offline event can be happening at a given time. Note also that we
2643 * can accept some slop in the rsp->completed access due to the fact 2643 * can accept some slop in the rsp->completed access due to the fact
2644 * that this CPU cannot possibly have any RCU callbacks in flight yet. 2644 * that this CPU cannot possibly have any RCU callbacks in flight yet.
2645 */ 2645 */
2646 static void __cpuinit 2646 static void __cpuinit
2647 rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) 2647 rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2648 { 2648 {
2649 unsigned long flags; 2649 unsigned long flags;
2650 unsigned long mask; 2650 unsigned long mask;
2651 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2651 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2652 struct rcu_node *rnp = rcu_get_root(rsp); 2652 struct rcu_node *rnp = rcu_get_root(rsp);
2653 2653
2654 /* Exclude new grace periods. */ 2654 /* Exclude new grace periods. */
2655 mutex_lock(&rsp->onoff_mutex); 2655 mutex_lock(&rsp->onoff_mutex);
2656 2656
2657 /* Set up local state, ensuring consistent view of global state. */ 2657 /* Set up local state, ensuring consistent view of global state. */
2658 raw_spin_lock_irqsave(&rnp->lock, flags); 2658 raw_spin_lock_irqsave(&rnp->lock, flags);
2659 rdp->beenonline = 1; /* We have now been online. */ 2659 rdp->beenonline = 1; /* We have now been online. */
2660 rdp->preemptible = preemptible; 2660 rdp->preemptible = preemptible;
2661 rdp->qlen_last_fqs_check = 0; 2661 rdp->qlen_last_fqs_check = 0;
2662 rdp->n_force_qs_snap = rsp->n_force_qs; 2662 rdp->n_force_qs_snap = rsp->n_force_qs;
2663 rdp->blimit = blimit; 2663 rdp->blimit = blimit;
2664 init_callback_list(rdp); /* Re-enable callbacks on this CPU. */ 2664 init_callback_list(rdp); /* Re-enable callbacks on this CPU. */
2665 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; 2665 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
2666 atomic_set(&rdp->dynticks->dynticks, 2666 atomic_set(&rdp->dynticks->dynticks,
2667 (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); 2667 (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
2668 rcu_prepare_for_idle_init(cpu); 2668 rcu_prepare_for_idle_init(cpu);
2669 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 2669 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2670 2670
2671 /* Add CPU to rcu_node bitmasks. */ 2671 /* Add CPU to rcu_node bitmasks. */
2672 rnp = rdp->mynode; 2672 rnp = rdp->mynode;
2673 mask = rdp->grpmask; 2673 mask = rdp->grpmask;
2674 do { 2674 do {
2675 /* Exclude any attempts to start a new GP on small systems. */ 2675 /* Exclude any attempts to start a new GP on small systems. */
2676 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 2676 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
2677 rnp->qsmaskinit |= mask; 2677 rnp->qsmaskinit |= mask;
2678 mask = rnp->grpmask; 2678 mask = rnp->grpmask;
2679 if (rnp == rdp->mynode) { 2679 if (rnp == rdp->mynode) {
2680 /* 2680 /*
2681 * If there is a grace period in progress, we will 2681 * If there is a grace period in progress, we will
2682 * set up to wait for it next time we run the 2682 * set up to wait for it next time we run the
2683 * RCU core code. 2683 * RCU core code.
2684 */ 2684 */
2685 rdp->gpnum = rnp->completed; 2685 rdp->gpnum = rnp->completed;
2686 rdp->completed = rnp->completed; 2686 rdp->completed = rnp->completed;
2687 rdp->passed_quiesce = 0; 2687 rdp->passed_quiesce = 0;
2688 rdp->qs_pending = 0; 2688 rdp->qs_pending = 0;
2689 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl"); 2689 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl");
2690 } 2690 }
2691 raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ 2691 raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
2692 rnp = rnp->parent; 2692 rnp = rnp->parent;
2693 } while (rnp != NULL && !(rnp->qsmaskinit & mask)); 2693 } while (rnp != NULL && !(rnp->qsmaskinit & mask));
2694 local_irq_restore(flags); 2694 local_irq_restore(flags);
2695 2695
2696 mutex_unlock(&rsp->onoff_mutex); 2696 mutex_unlock(&rsp->onoff_mutex);
2697 } 2697 }
2698 2698
2699 static void __cpuinit rcu_prepare_cpu(int cpu) 2699 static void __cpuinit rcu_prepare_cpu(int cpu)
2700 { 2700 {
2701 struct rcu_state *rsp; 2701 struct rcu_state *rsp;
2702 2702
2703 for_each_rcu_flavor(rsp) 2703 for_each_rcu_flavor(rsp)
2704 rcu_init_percpu_data(cpu, rsp, 2704 rcu_init_percpu_data(cpu, rsp,
2705 strcmp(rsp->name, "rcu_preempt") == 0); 2705 strcmp(rsp->name, "rcu_preempt") == 0);
2706 } 2706 }
2707 2707
2708 /* 2708 /*
2709 * Handle CPU online/offline notification events. 2709 * Handle CPU online/offline notification events.
2710 */ 2710 */
2711 static int __cpuinit rcu_cpu_notify(struct notifier_block *self, 2711 static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2712 unsigned long action, void *hcpu) 2712 unsigned long action, void *hcpu)
2713 { 2713 {
2714 long cpu = (long)hcpu; 2714 long cpu = (long)hcpu;
2715 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 2715 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
2716 struct rcu_node *rnp = rdp->mynode; 2716 struct rcu_node *rnp = rdp->mynode;
2717 struct rcu_state *rsp; 2717 struct rcu_state *rsp;
2718 2718
2719 trace_rcu_utilization("Start CPU hotplug"); 2719 trace_rcu_utilization("Start CPU hotplug");
2720 switch (action) { 2720 switch (action) {
2721 case CPU_UP_PREPARE: 2721 case CPU_UP_PREPARE:
2722 case CPU_UP_PREPARE_FROZEN: 2722 case CPU_UP_PREPARE_FROZEN:
2723 rcu_prepare_cpu(cpu); 2723 rcu_prepare_cpu(cpu);
2724 rcu_prepare_kthreads(cpu); 2724 rcu_prepare_kthreads(cpu);
2725 break; 2725 break;
2726 case CPU_ONLINE: 2726 case CPU_ONLINE:
2727 case CPU_DOWN_FAILED: 2727 case CPU_DOWN_FAILED:
2728 rcu_boost_kthread_setaffinity(rnp, -1); 2728 rcu_boost_kthread_setaffinity(rnp, -1);
2729 break; 2729 break;
2730 case CPU_DOWN_PREPARE: 2730 case CPU_DOWN_PREPARE:
2731 rcu_boost_kthread_setaffinity(rnp, cpu); 2731 rcu_boost_kthread_setaffinity(rnp, cpu);
2732 break; 2732 break;
2733 case CPU_DYING: 2733 case CPU_DYING:
2734 case CPU_DYING_FROZEN: 2734 case CPU_DYING_FROZEN:
2735 /* 2735 /*
2736 * The whole machine is "stopped" except this CPU, so we can 2736 * The whole machine is "stopped" except this CPU, so we can
2737 * touch any data without introducing corruption. We send the 2737 * touch any data without introducing corruption. We send the
2738 * dying CPU's callbacks to an arbitrarily chosen online CPU. 2738 * dying CPU's callbacks to an arbitrarily chosen online CPU.
2739 */ 2739 */
2740 for_each_rcu_flavor(rsp) 2740 for_each_rcu_flavor(rsp)
2741 rcu_cleanup_dying_cpu(rsp); 2741 rcu_cleanup_dying_cpu(rsp);
2742 rcu_cleanup_after_idle(cpu); 2742 rcu_cleanup_after_idle(cpu);
2743 break; 2743 break;
2744 case CPU_DEAD: 2744 case CPU_DEAD:
2745 case CPU_DEAD_FROZEN: 2745 case CPU_DEAD_FROZEN:
2746 case CPU_UP_CANCELED: 2746 case CPU_UP_CANCELED:
2747 case CPU_UP_CANCELED_FROZEN: 2747 case CPU_UP_CANCELED_FROZEN:
2748 for_each_rcu_flavor(rsp) 2748 for_each_rcu_flavor(rsp)
2749 rcu_cleanup_dead_cpu(cpu, rsp); 2749 rcu_cleanup_dead_cpu(cpu, rsp);
2750 break; 2750 break;
2751 default: 2751 default:
2752 break; 2752 break;
2753 } 2753 }
2754 trace_rcu_utilization("End CPU hotplug"); 2754 trace_rcu_utilization("End CPU hotplug");
2755 return NOTIFY_OK; 2755 return NOTIFY_OK;
2756 } 2756 }
2757 2757
2758 /* 2758 /*
2759 * Spawn the kthread that handles this RCU flavor's grace periods. 2759 * Spawn the kthread that handles this RCU flavor's grace periods.
2760 */ 2760 */
2761 static int __init rcu_spawn_gp_kthread(void) 2761 static int __init rcu_spawn_gp_kthread(void)
2762 { 2762 {
2763 unsigned long flags; 2763 unsigned long flags;
2764 struct rcu_node *rnp; 2764 struct rcu_node *rnp;
2765 struct rcu_state *rsp; 2765 struct rcu_state *rsp;
2766 struct task_struct *t; 2766 struct task_struct *t;
2767 2767
2768 for_each_rcu_flavor(rsp) { 2768 for_each_rcu_flavor(rsp) {
2769 t = kthread_run(rcu_gp_kthread, rsp, rsp->name); 2769 t = kthread_run(rcu_gp_kthread, rsp, rsp->name);
2770 BUG_ON(IS_ERR(t)); 2770 BUG_ON(IS_ERR(t));
2771 rnp = rcu_get_root(rsp); 2771 rnp = rcu_get_root(rsp);
2772 raw_spin_lock_irqsave(&rnp->lock, flags); 2772 raw_spin_lock_irqsave(&rnp->lock, flags);
2773 rsp->gp_kthread = t; 2773 rsp->gp_kthread = t;
2774 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2774 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2775 } 2775 }
2776 return 0; 2776 return 0;
2777 } 2777 }
2778 early_initcall(rcu_spawn_gp_kthread); 2778 early_initcall(rcu_spawn_gp_kthread);
2779 2779
2780 /* 2780 /*
2781 * This function is invoked towards the end of the scheduler's initialization 2781 * This function is invoked towards the end of the scheduler's initialization
2782 * process. Before this is called, the idle task might contain 2782 * process. Before this is called, the idle task might contain
2783 * RCU read-side critical sections (during which time, this idle 2783 * RCU read-side critical sections (during which time, this idle
2784 * task is booting the system). After this function is called, the 2784 * task is booting the system). After this function is called, the
2785 * idle tasks are prohibited from containing RCU read-side critical 2785 * idle tasks are prohibited from containing RCU read-side critical
2786 * sections. This function also enables RCU lockdep checking. 2786 * sections. This function also enables RCU lockdep checking.
2787 */ 2787 */
2788 void rcu_scheduler_starting(void) 2788 void rcu_scheduler_starting(void)
2789 { 2789 {
2790 WARN_ON(num_online_cpus() != 1); 2790 WARN_ON(num_online_cpus() != 1);
2791 WARN_ON(nr_context_switches() > 0); 2791 WARN_ON(nr_context_switches() > 0);
2792 rcu_scheduler_active = 1; 2792 rcu_scheduler_active = 1;
2793 } 2793 }
2794 2794
2795 /* 2795 /*
2796 * Compute the per-level fanout, either using the exact fanout specified 2796 * Compute the per-level fanout, either using the exact fanout specified
2797 * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. 2797 * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT.
2798 */ 2798 */
2799 #ifdef CONFIG_RCU_FANOUT_EXACT 2799 #ifdef CONFIG_RCU_FANOUT_EXACT
2800 static void __init rcu_init_levelspread(struct rcu_state *rsp) 2800 static void __init rcu_init_levelspread(struct rcu_state *rsp)
2801 { 2801 {
2802 int i; 2802 int i;
2803 2803
2804 for (i = rcu_num_lvls - 1; i > 0; i--) 2804 for (i = rcu_num_lvls - 1; i > 0; i--)
2805 rsp->levelspread[i] = CONFIG_RCU_FANOUT; 2805 rsp->levelspread[i] = CONFIG_RCU_FANOUT;
2806 rsp->levelspread[0] = rcu_fanout_leaf; 2806 rsp->levelspread[0] = rcu_fanout_leaf;
2807 } 2807 }
2808 #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ 2808 #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
2809 static void __init rcu_init_levelspread(struct rcu_state *rsp) 2809 static void __init rcu_init_levelspread(struct rcu_state *rsp)
2810 { 2810 {
2811 int ccur; 2811 int ccur;
2812 int cprv; 2812 int cprv;
2813 int i; 2813 int i;
2814 2814
2815 cprv = nr_cpu_ids; 2815 cprv = nr_cpu_ids;
2816 for (i = rcu_num_lvls - 1; i >= 0; i--) { 2816 for (i = rcu_num_lvls - 1; i >= 0; i--) {
2817 ccur = rsp->levelcnt[i]; 2817 ccur = rsp->levelcnt[i];
2818 rsp->levelspread[i] = (cprv + ccur - 1) / ccur; 2818 rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
2819 cprv = ccur; 2819 cprv = ccur;
2820 } 2820 }
2821 } 2821 }
2822 #endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */ 2822 #endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */
2823 2823
2824 /* 2824 /*
2825 * Helper function for rcu_init() that initializes one rcu_state structure. 2825 * Helper function for rcu_init() that initializes one rcu_state structure.
2826 */ 2826 */
2827 static void __init rcu_init_one(struct rcu_state *rsp, 2827 static void __init rcu_init_one(struct rcu_state *rsp,
2828 struct rcu_data __percpu *rda) 2828 struct rcu_data __percpu *rda)
2829 { 2829 {
2830 static char *buf[] = { "rcu_node_0", 2830 static char *buf[] = { "rcu_node_0",
2831 "rcu_node_1", 2831 "rcu_node_1",
2832 "rcu_node_2", 2832 "rcu_node_2",
2833 "rcu_node_3" }; /* Match MAX_RCU_LVLS */ 2833 "rcu_node_3" }; /* Match MAX_RCU_LVLS */
2834 static char *fqs[] = { "rcu_node_fqs_0", 2834 static char *fqs[] = { "rcu_node_fqs_0",
2835 "rcu_node_fqs_1", 2835 "rcu_node_fqs_1",
2836 "rcu_node_fqs_2", 2836 "rcu_node_fqs_2",
2837 "rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */ 2837 "rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */
2838 int cpustride = 1; 2838 int cpustride = 1;
2839 int i; 2839 int i;
2840 int j; 2840 int j;
2841 struct rcu_node *rnp; 2841 struct rcu_node *rnp;
2842 2842
2843 BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */ 2843 BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */
2844 2844
2845 /* Initialize the level-tracking arrays. */ 2845 /* Initialize the level-tracking arrays. */
2846 2846
2847 for (i = 0; i < rcu_num_lvls; i++) 2847 for (i = 0; i < rcu_num_lvls; i++)
2848 rsp->levelcnt[i] = num_rcu_lvl[i]; 2848 rsp->levelcnt[i] = num_rcu_lvl[i];
2849 for (i = 1; i < rcu_num_lvls; i++) 2849 for (i = 1; i < rcu_num_lvls; i++)
2850 rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; 2850 rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
2851 rcu_init_levelspread(rsp); 2851 rcu_init_levelspread(rsp);
2852 2852
2853 /* Initialize the elements themselves, starting from the leaves. */ 2853 /* Initialize the elements themselves, starting from the leaves. */
2854 2854
2855 for (i = rcu_num_lvls - 1; i >= 0; i--) { 2855 for (i = rcu_num_lvls - 1; i >= 0; i--) {
2856 cpustride *= rsp->levelspread[i]; 2856 cpustride *= rsp->levelspread[i];
2857 rnp = rsp->level[i]; 2857 rnp = rsp->level[i];
2858 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { 2858 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
2859 raw_spin_lock_init(&rnp->lock); 2859 raw_spin_lock_init(&rnp->lock);
2860 lockdep_set_class_and_name(&rnp->lock, 2860 lockdep_set_class_and_name(&rnp->lock,
2861 &rcu_node_class[i], buf[i]); 2861 &rcu_node_class[i], buf[i]);
2862 raw_spin_lock_init(&rnp->fqslock); 2862 raw_spin_lock_init(&rnp->fqslock);
2863 lockdep_set_class_and_name(&rnp->fqslock, 2863 lockdep_set_class_and_name(&rnp->fqslock,
2864 &rcu_fqs_class[i], fqs[i]); 2864 &rcu_fqs_class[i], fqs[i]);
2865 rnp->gpnum = rsp->gpnum; 2865 rnp->gpnum = rsp->gpnum;
2866 rnp->completed = rsp->completed; 2866 rnp->completed = rsp->completed;
2867 rnp->qsmask = 0; 2867 rnp->qsmask = 0;
2868 rnp->qsmaskinit = 0; 2868 rnp->qsmaskinit = 0;
2869 rnp->grplo = j * cpustride; 2869 rnp->grplo = j * cpustride;
2870 rnp->grphi = (j + 1) * cpustride - 1; 2870 rnp->grphi = (j + 1) * cpustride - 1;
2871 if (rnp->grphi >= NR_CPUS) 2871 if (rnp->grphi >= NR_CPUS)
2872 rnp->grphi = NR_CPUS - 1; 2872 rnp->grphi = NR_CPUS - 1;
2873 if (i == 0) { 2873 if (i == 0) {
2874 rnp->grpnum = 0; 2874 rnp->grpnum = 0;
2875 rnp->grpmask = 0; 2875 rnp->grpmask = 0;
2876 rnp->parent = NULL; 2876 rnp->parent = NULL;
2877 } else { 2877 } else {
2878 rnp->grpnum = j % rsp->levelspread[i - 1]; 2878 rnp->grpnum = j % rsp->levelspread[i - 1];
2879 rnp->grpmask = 1UL << rnp->grpnum; 2879 rnp->grpmask = 1UL << rnp->grpnum;
2880 rnp->parent = rsp->level[i - 1] + 2880 rnp->parent = rsp->level[i - 1] +
2881 j / rsp->levelspread[i - 1]; 2881 j / rsp->levelspread[i - 1];
2882 } 2882 }
2883 rnp->level = i; 2883 rnp->level = i;
2884 INIT_LIST_HEAD(&rnp->blkd_tasks); 2884 INIT_LIST_HEAD(&rnp->blkd_tasks);
2885 } 2885 }
2886 } 2886 }
2887 2887
2888 rsp->rda = rda; 2888 rsp->rda = rda;
2889 init_waitqueue_head(&rsp->gp_wq); 2889 init_waitqueue_head(&rsp->gp_wq);
2890 rnp = rsp->level[rcu_num_lvls - 1]; 2890 rnp = rsp->level[rcu_num_lvls - 1];
2891 for_each_possible_cpu(i) { 2891 for_each_possible_cpu(i) {
2892 while (i > rnp->grphi) 2892 while (i > rnp->grphi)
2893 rnp++; 2893 rnp++;
2894 per_cpu_ptr(rsp->rda, i)->mynode = rnp; 2894 per_cpu_ptr(rsp->rda, i)->mynode = rnp;
2895 rcu_boot_init_percpu_data(i, rsp); 2895 rcu_boot_init_percpu_data(i, rsp);
2896 } 2896 }
2897 list_add(&rsp->flavors, &rcu_struct_flavors); 2897 list_add(&rsp->flavors, &rcu_struct_flavors);
2898 } 2898 }
2899 2899
2900 /* 2900 /*
2901 * Compute the rcu_node tree geometry from kernel parameters. This cannot 2901 * Compute the rcu_node tree geometry from kernel parameters. This cannot
2902 * replace the definitions in rcutree.h because those are needed to size 2902 * replace the definitions in rcutree.h because those are needed to size
2903 * the ->node array in the rcu_state structure. 2903 * the ->node array in the rcu_state structure.
2904 */ 2904 */
2905 static void __init rcu_init_geometry(void) 2905 static void __init rcu_init_geometry(void)
2906 { 2906 {
2907 int i; 2907 int i;
2908 int j; 2908 int j;
2909 int n = nr_cpu_ids; 2909 int n = nr_cpu_ids;
2910 int rcu_capacity[MAX_RCU_LVLS + 1]; 2910 int rcu_capacity[MAX_RCU_LVLS + 1];
2911 2911
2912 /* If the compile-time values are accurate, just leave. */ 2912 /* If the compile-time values are accurate, just leave. */
2913 if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF && 2913 if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF &&
2914 nr_cpu_ids == NR_CPUS) 2914 nr_cpu_ids == NR_CPUS)
2915 return; 2915 return;
2916 2916
2917 /* 2917 /*
2918 * Compute number of nodes that can be handled an rcu_node tree 2918 * Compute number of nodes that can be handled an rcu_node tree
2919 * with the given number of levels. Setting rcu_capacity[0] makes 2919 * with the given number of levels. Setting rcu_capacity[0] makes
2920 * some of the arithmetic easier. 2920 * some of the arithmetic easier.
2921 */ 2921 */
2922 rcu_capacity[0] = 1; 2922 rcu_capacity[0] = 1;
2923 rcu_capacity[1] = rcu_fanout_leaf; 2923 rcu_capacity[1] = rcu_fanout_leaf;
2924 for (i = 2; i <= MAX_RCU_LVLS; i++) 2924 for (i = 2; i <= MAX_RCU_LVLS; i++)
2925 rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT; 2925 rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT;
2926 2926
2927 /* 2927 /*
2928 * The boot-time rcu_fanout_leaf parameter is only permitted 2928 * The boot-time rcu_fanout_leaf parameter is only permitted
2929 * to increase the leaf-level fanout, not decrease it. Of course, 2929 * to increase the leaf-level fanout, not decrease it. Of course,
2930 * the leaf-level fanout cannot exceed the number of bits in 2930 * the leaf-level fanout cannot exceed the number of bits in
2931 * the rcu_node masks. Finally, the tree must be able to accommodate 2931 * the rcu_node masks. Finally, the tree must be able to accommodate
2932 * the configured number of CPUs. Complain and fall back to the 2932 * the configured number of CPUs. Complain and fall back to the
2933 * compile-time values if these limits are exceeded. 2933 * compile-time values if these limits are exceeded.
2934 */ 2934 */
2935 if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF || 2935 if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF ||
2936 rcu_fanout_leaf > sizeof(unsigned long) * 8 || 2936 rcu_fanout_leaf > sizeof(unsigned long) * 8 ||
2937 n > rcu_capacity[MAX_RCU_LVLS]) { 2937 n > rcu_capacity[MAX_RCU_LVLS]) {
2938 WARN_ON(1); 2938 WARN_ON(1);
2939 return; 2939 return;
2940 } 2940 }
2941 2941
2942 /* Calculate the number of rcu_nodes at each level of the tree. */ 2942 /* Calculate the number of rcu_nodes at each level of the tree. */
2943 for (i = 1; i <= MAX_RCU_LVLS; i++) 2943 for (i = 1; i <= MAX_RCU_LVLS; i++)
2944 if (n <= rcu_capacity[i]) { 2944 if (n <= rcu_capacity[i]) {
2945 for (j = 0; j <= i; j++) 2945 for (j = 0; j <= i; j++)
2946 num_rcu_lvl[j] = 2946 num_rcu_lvl[j] =
2947 DIV_ROUND_UP(n, rcu_capacity[i - j]); 2947 DIV_ROUND_UP(n, rcu_capacity[i - j]);
2948 rcu_num_lvls = i; 2948 rcu_num_lvls = i;
2949 for (j = i + 1; j <= MAX_RCU_LVLS; j++) 2949 for (j = i + 1; j <= MAX_RCU_LVLS; j++)
2950 num_rcu_lvl[j] = 0; 2950 num_rcu_lvl[j] = 0;
2951 break; 2951 break;
2952 } 2952 }
2953 2953
2954 /* Calculate the total number of rcu_node structures. */ 2954 /* Calculate the total number of rcu_node structures. */
2955 rcu_num_nodes = 0; 2955 rcu_num_nodes = 0;
2956 for (i = 0; i <= MAX_RCU_LVLS; i++) 2956 for (i = 0; i <= MAX_RCU_LVLS; i++)
2957 rcu_num_nodes += num_rcu_lvl[i]; 2957 rcu_num_nodes += num_rcu_lvl[i];
2958 rcu_num_nodes -= n; 2958 rcu_num_nodes -= n;
2959 } 2959 }
2960 2960
2961 void __init rcu_init(void) 2961 void __init rcu_init(void)
2962 { 2962 {
2963 int cpu; 2963 int cpu;
2964 2964
2965 rcu_bootup_announce(); 2965 rcu_bootup_announce();
2966 rcu_init_geometry(); 2966 rcu_init_geometry();
2967 rcu_init_one(&rcu_sched_state, &rcu_sched_data); 2967 rcu_init_one(&rcu_sched_state, &rcu_sched_data);
2968 rcu_init_one(&rcu_bh_state, &rcu_bh_data); 2968 rcu_init_one(&rcu_bh_state, &rcu_bh_data);
2969 __rcu_init_preempt(); 2969 __rcu_init_preempt();
2970 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 2970 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
2971 2971
2972 /* 2972 /*
2973 * We don't need protection against CPU-hotplug here because 2973 * We don't need protection against CPU-hotplug here because
2974 * this is called early in boot, before either interrupts 2974 * this is called early in boot, before either interrupts
2975 * or the scheduler are operational. 2975 * or the scheduler are operational.
2976 */ 2976 */
2977 cpu_notifier(rcu_cpu_notify, 0); 2977 cpu_notifier(rcu_cpu_notify, 0);
2978 for_each_online_cpu(cpu) 2978 for_each_online_cpu(cpu)
2979 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); 2979 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
2980 check_cpu_stall_init(); 2980 check_cpu_stall_init();
2981 } 2981 }
2982 2982
2983 #include "rcutree_plugin.h" 2983 #include "rcutree_plugin.h"
2984 2984
1 /* 1 /*
2 * Read-Copy Update mechanism for mutual exclusion (tree-based version) 2 * Read-Copy Update mechanism for mutual exclusion (tree-based version)
3 * Internal non-public definitions. 3 * Internal non-public definitions.
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details. 13 * GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * 18 *
19 * Copyright IBM Corporation, 2008 19 * Copyright IBM Corporation, 2008
20 * 20 *
21 * Author: Ingo Molnar <mingo@elte.hu> 21 * Author: Ingo Molnar <mingo@elte.hu>
22 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> 22 * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
23 */ 23 */
24 24
25 #include <linux/cache.h> 25 #include <linux/cache.h>
26 #include <linux/spinlock.h> 26 #include <linux/spinlock.h>
27 #include <linux/threads.h> 27 #include <linux/threads.h>
28 #include <linux/cpumask.h> 28 #include <linux/cpumask.h>
29 #include <linux/seqlock.h> 29 #include <linux/seqlock.h>
30 30
31 /* 31 /*
32 * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and 32 * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
33 * CONFIG_RCU_FANOUT_LEAF. 33 * CONFIG_RCU_FANOUT_LEAF.
34 * In theory, it should be possible to add more levels straightforwardly. 34 * In theory, it should be possible to add more levels straightforwardly.
35 * In practice, this did work well going from three levels to four. 35 * In practice, this did work well going from three levels to four.
36 * Of course, your mileage may vary. 36 * Of course, your mileage may vary.
37 */ 37 */
38 #define MAX_RCU_LVLS 4 38 #define MAX_RCU_LVLS 4
39 #define RCU_FANOUT_1 (CONFIG_RCU_FANOUT_LEAF) 39 #define RCU_FANOUT_1 (CONFIG_RCU_FANOUT_LEAF)
40 #define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT) 40 #define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT)
41 #define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT) 41 #define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT)
42 #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) 42 #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
43 43
44 #if NR_CPUS <= RCU_FANOUT_1 44 #if NR_CPUS <= RCU_FANOUT_1
45 # define RCU_NUM_LVLS 1 45 # define RCU_NUM_LVLS 1
46 # define NUM_RCU_LVL_0 1 46 # define NUM_RCU_LVL_0 1
47 # define NUM_RCU_LVL_1 (NR_CPUS) 47 # define NUM_RCU_LVL_1 (NR_CPUS)
48 # define NUM_RCU_LVL_2 0 48 # define NUM_RCU_LVL_2 0
49 # define NUM_RCU_LVL_3 0 49 # define NUM_RCU_LVL_3 0
50 # define NUM_RCU_LVL_4 0 50 # define NUM_RCU_LVL_4 0
51 #elif NR_CPUS <= RCU_FANOUT_2 51 #elif NR_CPUS <= RCU_FANOUT_2
52 # define RCU_NUM_LVLS 2 52 # define RCU_NUM_LVLS 2
53 # define NUM_RCU_LVL_0 1 53 # define NUM_RCU_LVL_0 1
54 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) 54 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
55 # define NUM_RCU_LVL_2 (NR_CPUS) 55 # define NUM_RCU_LVL_2 (NR_CPUS)
56 # define NUM_RCU_LVL_3 0 56 # define NUM_RCU_LVL_3 0
57 # define NUM_RCU_LVL_4 0 57 # define NUM_RCU_LVL_4 0
58 #elif NR_CPUS <= RCU_FANOUT_3 58 #elif NR_CPUS <= RCU_FANOUT_3
59 # define RCU_NUM_LVLS 3 59 # define RCU_NUM_LVLS 3
60 # define NUM_RCU_LVL_0 1 60 # define NUM_RCU_LVL_0 1
61 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) 61 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
62 # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) 62 # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
63 # define NUM_RCU_LVL_3 (NR_CPUS) 63 # define NUM_RCU_LVL_3 (NR_CPUS)
64 # define NUM_RCU_LVL_4 0 64 # define NUM_RCU_LVL_4 0
65 #elif NR_CPUS <= RCU_FANOUT_4 65 #elif NR_CPUS <= RCU_FANOUT_4
66 # define RCU_NUM_LVLS 4 66 # define RCU_NUM_LVLS 4
67 # define NUM_RCU_LVL_0 1 67 # define NUM_RCU_LVL_0 1
68 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) 68 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
69 # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) 69 # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
70 # define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) 70 # define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
71 # define NUM_RCU_LVL_4 (NR_CPUS) 71 # define NUM_RCU_LVL_4 (NR_CPUS)
72 #else 72 #else
73 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" 73 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
74 #endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */ 74 #endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
75 75
76 #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) 76 #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
77 #define NUM_RCU_NODES (RCU_SUM - NR_CPUS) 77 #define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
78 78
79 extern int rcu_num_lvls; 79 extern int rcu_num_lvls;
80 extern int rcu_num_nodes; 80 extern int rcu_num_nodes;
81 81
82 /* 82 /*
83 * Dynticks per-CPU state. 83 * Dynticks per-CPU state.
84 */ 84 */
85 struct rcu_dynticks { 85 struct rcu_dynticks {
86 long long dynticks_nesting; /* Track irq/process nesting level. */ 86 long long dynticks_nesting; /* Track irq/process nesting level. */
87 /* Process level is worth LLONG_MAX/2. */ 87 /* Process level is worth LLONG_MAX/2. */
88 int dynticks_nmi_nesting; /* Track NMI nesting level. */ 88 int dynticks_nmi_nesting; /* Track NMI nesting level. */
89 atomic_t dynticks; /* Even value for idle, else odd. */ 89 atomic_t dynticks; /* Even value for idle, else odd. */
90 #ifdef CONFIG_RCU_FAST_NO_HZ 90 #ifdef CONFIG_RCU_FAST_NO_HZ
91 int dyntick_drain; /* Prepare-for-idle state variable. */ 91 int dyntick_drain; /* Prepare-for-idle state variable. */
92 unsigned long dyntick_holdoff; 92 unsigned long dyntick_holdoff;
93 /* No retries for the jiffy of failure. */ 93 /* No retries for the jiffy of failure. */
94 struct timer_list idle_gp_timer; 94 struct timer_list idle_gp_timer;
95 /* Wake up CPU sleeping with callbacks. */ 95 /* Wake up CPU sleeping with callbacks. */
96 unsigned long idle_gp_timer_expires; 96 unsigned long idle_gp_timer_expires;
97 /* When to wake up CPU (for repost). */ 97 /* When to wake up CPU (for repost). */
98 bool idle_first_pass; /* First pass of attempt to go idle? */ 98 bool idle_first_pass; /* First pass of attempt to go idle? */
99 unsigned long nonlazy_posted; 99 unsigned long nonlazy_posted;
100 /* # times non-lazy CBs posted to CPU. */ 100 /* # times non-lazy CBs posted to CPU. */
101 unsigned long nonlazy_posted_snap; 101 unsigned long nonlazy_posted_snap;
102 /* idle-period nonlazy_posted snapshot. */ 102 /* idle-period nonlazy_posted snapshot. */
103 int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ 103 int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
104 #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 104 #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
105 #ifdef CONFIG_RCU_USER_QS 105 #ifdef CONFIG_RCU_USER_QS
106 bool ignore_user_qs; /* Treat userspace as extended QS or not */ 106 bool ignore_user_qs; /* Treat userspace as extended QS or not */
107 bool in_user; /* Is the CPU in userland from RCU POV? */ 107 bool in_user; /* Is the CPU in userland from RCU POV? */
108 #endif 108 #endif
109 }; 109 };
110 110
111 /* RCU's kthread states for tracing. */ 111 /* RCU's kthread states for tracing. */
112 #define RCU_KTHREAD_STOPPED 0 112 #define RCU_KTHREAD_STOPPED 0
113 #define RCU_KTHREAD_RUNNING 1 113 #define RCU_KTHREAD_RUNNING 1
114 #define RCU_KTHREAD_WAITING 2 114 #define RCU_KTHREAD_WAITING 2
115 #define RCU_KTHREAD_OFFCPU 3 115 #define RCU_KTHREAD_OFFCPU 3
116 #define RCU_KTHREAD_YIELDING 4 116 #define RCU_KTHREAD_YIELDING 4
117 #define RCU_KTHREAD_MAX 4 117 #define RCU_KTHREAD_MAX 4
118 118
119 /* 119 /*
120 * Definition for node within the RCU grace-period-detection hierarchy. 120 * Definition for node within the RCU grace-period-detection hierarchy.
121 */ 121 */
122 struct rcu_node { 122 struct rcu_node {
123 raw_spinlock_t lock; /* Root rcu_node's lock protects some */ 123 raw_spinlock_t lock; /* Root rcu_node's lock protects some */
124 /* rcu_state fields as well as following. */ 124 /* rcu_state fields as well as following. */
125 unsigned long gpnum; /* Current grace period for this node. */ 125 unsigned long gpnum; /* Current grace period for this node. */
126 /* This will either be equal to or one */ 126 /* This will either be equal to or one */
127 /* behind the root rcu_node's gpnum. */ 127 /* behind the root rcu_node's gpnum. */
128 unsigned long completed; /* Last GP completed for this node. */ 128 unsigned long completed; /* Last GP completed for this node. */
129 /* This will either be equal to or one */ 129 /* This will either be equal to or one */
130 /* behind the root rcu_node's gpnum. */ 130 /* behind the root rcu_node's gpnum. */
131 unsigned long qsmask; /* CPUs or groups that need to switch in */ 131 unsigned long qsmask; /* CPUs or groups that need to switch in */
132 /* order for current grace period to proceed.*/ 132 /* order for current grace period to proceed.*/
133 /* In leaf rcu_node, each bit corresponds to */ 133 /* In leaf rcu_node, each bit corresponds to */
134 /* an rcu_data structure, otherwise, each */ 134 /* an rcu_data structure, otherwise, each */
135 /* bit corresponds to a child rcu_node */ 135 /* bit corresponds to a child rcu_node */
136 /* structure. */ 136 /* structure. */
137 unsigned long expmask; /* Groups that have ->blkd_tasks */ 137 unsigned long expmask; /* Groups that have ->blkd_tasks */
138 /* elements that need to drain to allow the */ 138 /* elements that need to drain to allow the */
139 /* current expedited grace period to */ 139 /* current expedited grace period to */
140 /* complete (only for TREE_PREEMPT_RCU). */ 140 /* complete (only for TREE_PREEMPT_RCU). */
141 atomic_t wakemask; /* CPUs whose kthread needs to be awakened. */ 141 atomic_t wakemask; /* CPUs whose kthread needs to be awakened. */
142 /* Since this has meaning only for leaf */ 142 /* Since this has meaning only for leaf */
143 /* rcu_node structures, 32 bits suffices. */ 143 /* rcu_node structures, 32 bits suffices. */
144 unsigned long qsmaskinit; 144 unsigned long qsmaskinit;
145 /* Per-GP initial value for qsmask & expmask. */ 145 /* Per-GP initial value for qsmask & expmask. */
146 unsigned long grpmask; /* Mask to apply to parent qsmask. */ 146 unsigned long grpmask; /* Mask to apply to parent qsmask. */
147 /* Only one bit will be set in this mask. */ 147 /* Only one bit will be set in this mask. */
148 int grplo; /* lowest-numbered CPU or group here. */ 148 int grplo; /* lowest-numbered CPU or group here. */
149 int grphi; /* highest-numbered CPU or group here. */ 149 int grphi; /* highest-numbered CPU or group here. */
150 u8 grpnum; /* CPU/group number for next level up. */ 150 u8 grpnum; /* CPU/group number for next level up. */
151 u8 level; /* root is at level 0. */ 151 u8 level; /* root is at level 0. */
152 struct rcu_node *parent; 152 struct rcu_node *parent;
153 struct list_head blkd_tasks; 153 struct list_head blkd_tasks;
154 /* Tasks blocked in RCU read-side critical */ 154 /* Tasks blocked in RCU read-side critical */
155 /* section. Tasks are placed at the head */ 155 /* section. Tasks are placed at the head */
156 /* of this list and age towards the tail. */ 156 /* of this list and age towards the tail. */
157 struct list_head *gp_tasks; 157 struct list_head *gp_tasks;
158 /* Pointer to the first task blocking the */ 158 /* Pointer to the first task blocking the */
159 /* current grace period, or NULL if there */ 159 /* current grace period, or NULL if there */
160 /* is no such task. */ 160 /* is no such task. */
161 struct list_head *exp_tasks; 161 struct list_head *exp_tasks;
162 /* Pointer to the first task blocking the */ 162 /* Pointer to the first task blocking the */
163 /* current expedited grace period, or NULL */ 163 /* current expedited grace period, or NULL */
164 /* if there is no such task. If there */ 164 /* if there is no such task. If there */
165 /* is no current expedited grace period, */ 165 /* is no current expedited grace period, */
166 /* then there can cannot be any such task. */ 166 /* then there can cannot be any such task. */
167 #ifdef CONFIG_RCU_BOOST 167 #ifdef CONFIG_RCU_BOOST
168 struct list_head *boost_tasks; 168 struct list_head *boost_tasks;
169 /* Pointer to first task that needs to be */ 169 /* Pointer to first task that needs to be */
170 /* priority boosted, or NULL if no priority */ 170 /* priority boosted, or NULL if no priority */
171 /* boosting is needed for this rcu_node */ 171 /* boosting is needed for this rcu_node */
172 /* structure. If there are no tasks */ 172 /* structure. If there are no tasks */
173 /* queued on this rcu_node structure that */ 173 /* queued on this rcu_node structure that */
174 /* are blocking the current grace period, */ 174 /* are blocking the current grace period, */
175 /* there can be no such task. */ 175 /* there can be no such task. */
176 unsigned long boost_time; 176 unsigned long boost_time;
177 /* When to start boosting (jiffies). */ 177 /* When to start boosting (jiffies). */
178 struct task_struct *boost_kthread_task; 178 struct task_struct *boost_kthread_task;
179 /* kthread that takes care of priority */ 179 /* kthread that takes care of priority */
180 /* boosting for this rcu_node structure. */ 180 /* boosting for this rcu_node structure. */
181 unsigned int boost_kthread_status; 181 unsigned int boost_kthread_status;
182 /* State of boost_kthread_task for tracing. */ 182 /* State of boost_kthread_task for tracing. */
183 unsigned long n_tasks_boosted; 183 unsigned long n_tasks_boosted;
184 /* Total number of tasks boosted. */ 184 /* Total number of tasks boosted. */
185 unsigned long n_exp_boosts; 185 unsigned long n_exp_boosts;
186 /* Number of tasks boosted for expedited GP. */ 186 /* Number of tasks boosted for expedited GP. */
187 unsigned long n_normal_boosts; 187 unsigned long n_normal_boosts;
188 /* Number of tasks boosted for normal GP. */ 188 /* Number of tasks boosted for normal GP. */
189 unsigned long n_balk_blkd_tasks; 189 unsigned long n_balk_blkd_tasks;
190 /* Refused to boost: no blocked tasks. */ 190 /* Refused to boost: no blocked tasks. */
191 unsigned long n_balk_exp_gp_tasks; 191 unsigned long n_balk_exp_gp_tasks;
192 /* Refused to boost: nothing blocking GP. */ 192 /* Refused to boost: nothing blocking GP. */
193 unsigned long n_balk_boost_tasks; 193 unsigned long n_balk_boost_tasks;
194 /* Refused to boost: already boosting. */ 194 /* Refused to boost: already boosting. */
195 unsigned long n_balk_notblocked; 195 unsigned long n_balk_notblocked;
196 /* Refused to boost: RCU RS CS still running. */ 196 /* Refused to boost: RCU RS CS still running. */
197 unsigned long n_balk_notyet; 197 unsigned long n_balk_notyet;
198 /* Refused to boost: not yet time. */ 198 /* Refused to boost: not yet time. */
199 unsigned long n_balk_nos; 199 unsigned long n_balk_nos;
200 /* Refused to boost: not sure why, though. */ 200 /* Refused to boost: not sure why, though. */
201 /* This can happen due to race conditions. */ 201 /* This can happen due to race conditions. */
202 #endif /* #ifdef CONFIG_RCU_BOOST */ 202 #endif /* #ifdef CONFIG_RCU_BOOST */
203 raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp; 203 raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
204 } ____cacheline_internodealigned_in_smp; 204 } ____cacheline_internodealigned_in_smp;
205 205
206 /* 206 /*
207 * Do a full breadth-first scan of the rcu_node structures for the 207 * Do a full breadth-first scan of the rcu_node structures for the
208 * specified rcu_state structure. 208 * specified rcu_state structure.
209 */ 209 */
210 #define rcu_for_each_node_breadth_first(rsp, rnp) \ 210 #define rcu_for_each_node_breadth_first(rsp, rnp) \
211 for ((rnp) = &(rsp)->node[0]; \ 211 for ((rnp) = &(rsp)->node[0]; \
212 (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) 212 (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
213 213
214 /* 214 /*
215 * Do a breadth-first scan of the non-leaf rcu_node structures for the 215 * Do a breadth-first scan of the non-leaf rcu_node structures for the
216 * specified rcu_state structure. Note that if there is a singleton 216 * specified rcu_state structure. Note that if there is a singleton
217 * rcu_node tree with but one rcu_node structure, this loop is a no-op. 217 * rcu_node tree with but one rcu_node structure, this loop is a no-op.
218 */ 218 */
219 #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ 219 #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
220 for ((rnp) = &(rsp)->node[0]; \ 220 for ((rnp) = &(rsp)->node[0]; \
221 (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++) 221 (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
222 222
223 /* 223 /*
224 * Scan the leaves of the rcu_node hierarchy for the specified rcu_state 224 * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
225 * structure. Note that if there is a singleton rcu_node tree with but 225 * structure. Note that if there is a singleton rcu_node tree with but
226 * one rcu_node structure, this loop -will- visit the rcu_node structure. 226 * one rcu_node structure, this loop -will- visit the rcu_node structure.
227 * It is still a leaf node, even if it is also the root node. 227 * It is still a leaf node, even if it is also the root node.
228 */ 228 */
229 #define rcu_for_each_leaf_node(rsp, rnp) \ 229 #define rcu_for_each_leaf_node(rsp, rnp) \
230 for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \ 230 for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
231 (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) 231 (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
232 232
233 /* Index values for nxttail array in struct rcu_data. */ 233 /* Index values for nxttail array in struct rcu_data. */
234 #define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ 234 #define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */
235 #define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */ 235 #define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */
236 #define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */ 236 #define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */
237 #define RCU_NEXT_TAIL 3 237 #define RCU_NEXT_TAIL 3
238 #define RCU_NEXT_SIZE 4 238 #define RCU_NEXT_SIZE 4
239 239
240 /* Per-CPU data for read-copy update. */ 240 /* Per-CPU data for read-copy update. */
241 struct rcu_data { 241 struct rcu_data {
242 /* 1) quiescent-state and grace-period handling : */ 242 /* 1) quiescent-state and grace-period handling : */
243 unsigned long completed; /* Track rsp->completed gp number */ 243 unsigned long completed; /* Track rsp->completed gp number */
244 /* in order to detect GP end. */ 244 /* in order to detect GP end. */
245 unsigned long gpnum; /* Highest gp number that this CPU */ 245 unsigned long gpnum; /* Highest gp number that this CPU */
246 /* is aware of having started. */ 246 /* is aware of having started. */
247 bool passed_quiesce; /* User-mode/idle loop etc. */ 247 bool passed_quiesce; /* User-mode/idle loop etc. */
248 bool qs_pending; /* Core waits for quiesc state. */ 248 bool qs_pending; /* Core waits for quiesc state. */
249 bool beenonline; /* CPU online at least once. */ 249 bool beenonline; /* CPU online at least once. */
250 bool preemptible; /* Preemptible RCU? */ 250 bool preemptible; /* Preemptible RCU? */
251 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ 251 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
252 unsigned long grpmask; /* Mask to apply to leaf qsmask. */ 252 unsigned long grpmask; /* Mask to apply to leaf qsmask. */
253 #ifdef CONFIG_RCU_CPU_STALL_INFO 253 #ifdef CONFIG_RCU_CPU_STALL_INFO
254 unsigned long ticks_this_gp; /* The number of scheduling-clock */ 254 unsigned long ticks_this_gp; /* The number of scheduling-clock */
255 /* ticks this CPU has handled */ 255 /* ticks this CPU has handled */
256 /* during and after the last grace */ 256 /* during and after the last grace */
257 /* period it is aware of. */ 257 /* period it is aware of. */
258 #endif /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ 258 #endif /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
259 259
260 /* 2) batch handling */ 260 /* 2) batch handling */
261 /* 261 /*
262 * If nxtlist is not NULL, it is partitioned as follows. 262 * If nxtlist is not NULL, it is partitioned as follows.
263 * Any of the partitions might be empty, in which case the 263 * Any of the partitions might be empty, in which case the
264 * pointer to that partition will be equal to the pointer for 264 * pointer to that partition will be equal to the pointer for
265 * the following partition. When the list is empty, all of 265 * the following partition. When the list is empty, all of
266 * the nxttail elements point to the ->nxtlist pointer itself, 266 * the nxttail elements point to the ->nxtlist pointer itself,
267 * which in that case is NULL. 267 * which in that case is NULL.
268 * 268 *
269 * [nxtlist, *nxttail[RCU_DONE_TAIL]): 269 * [nxtlist, *nxttail[RCU_DONE_TAIL]):
270 * Entries that batch # <= ->completed 270 * Entries that batch # <= ->completed
271 * The grace period for these entries has completed, and 271 * The grace period for these entries has completed, and
272 * the other grace-period-completed entries may be moved 272 * the other grace-period-completed entries may be moved
273 * here temporarily in rcu_process_callbacks(). 273 * here temporarily in rcu_process_callbacks().
274 * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]): 274 * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
275 * Entries that batch # <= ->completed - 1: waiting for current GP 275 * Entries that batch # <= ->completed - 1: waiting for current GP
276 * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]): 276 * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
277 * Entries known to have arrived before current GP ended 277 * Entries known to have arrived before current GP ended
278 * [*nxttail[RCU_NEXT_READY_TAIL], *nxttail[RCU_NEXT_TAIL]): 278 * [*nxttail[RCU_NEXT_READY_TAIL], *nxttail[RCU_NEXT_TAIL]):
279 * Entries that might have arrived after current GP ended 279 * Entries that might have arrived after current GP ended
280 * Note that the value of *nxttail[RCU_NEXT_TAIL] will 280 * Note that the value of *nxttail[RCU_NEXT_TAIL] will
281 * always be NULL, as this is the end of the list. 281 * always be NULL, as this is the end of the list.
282 */ 282 */
283 struct rcu_head *nxtlist; 283 struct rcu_head *nxtlist;
284 struct rcu_head **nxttail[RCU_NEXT_SIZE]; 284 struct rcu_head **nxttail[RCU_NEXT_SIZE];
285 long qlen_lazy; /* # of lazy queued callbacks */ 285 long qlen_lazy; /* # of lazy queued callbacks */
286 long qlen; /* # of queued callbacks, incl lazy */ 286 long qlen; /* # of queued callbacks, incl lazy */
287 long qlen_last_fqs_check; 287 long qlen_last_fqs_check;
288 /* qlen at last check for QS forcing */ 288 /* qlen at last check for QS forcing */
289 unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ 289 unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */
290 unsigned long n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */ 290 unsigned long n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */
291 unsigned long n_cbs_adopted; /* RCU cbs adopted from dying CPU */ 291 unsigned long n_cbs_adopted; /* RCU cbs adopted from dying CPU */
292 unsigned long n_force_qs_snap; 292 unsigned long n_force_qs_snap;
293 /* did other CPU force QS recently? */ 293 /* did other CPU force QS recently? */
294 long blimit; /* Upper limit on a processed batch */ 294 long blimit; /* Upper limit on a processed batch */
295 295
296 /* 3) dynticks interface. */ 296 /* 3) dynticks interface. */
297 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ 297 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */
298 int dynticks_snap; /* Per-GP tracking for dynticks. */ 298 int dynticks_snap; /* Per-GP tracking for dynticks. */
299 299
300 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ 300 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */
301 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ 301 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */
302 unsigned long offline_fqs; /* Kicked due to being offline. */ 302 unsigned long offline_fqs; /* Kicked due to being offline. */
303 303
304 /* 5) __rcu_pending() statistics. */ 304 /* 5) __rcu_pending() statistics. */
305 unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */ 305 unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */
306 unsigned long n_rp_qs_pending; 306 unsigned long n_rp_qs_pending;
307 unsigned long n_rp_report_qs; 307 unsigned long n_rp_report_qs;
308 unsigned long n_rp_cb_ready; 308 unsigned long n_rp_cb_ready;
309 unsigned long n_rp_cpu_needs_gp; 309 unsigned long n_rp_cpu_needs_gp;
310 unsigned long n_rp_gp_completed; 310 unsigned long n_rp_gp_completed;
311 unsigned long n_rp_gp_started; 311 unsigned long n_rp_gp_started;
312 unsigned long n_rp_need_nothing; 312 unsigned long n_rp_need_nothing;
313 313
314 /* 6) _rcu_barrier() and OOM callbacks. */ 314 /* 6) _rcu_barrier() and OOM callbacks. */
315 struct rcu_head barrier_head; 315 struct rcu_head barrier_head;
316 #ifdef CONFIG_RCU_FAST_NO_HZ 316 #ifdef CONFIG_RCU_FAST_NO_HZ
317 struct rcu_head oom_head; 317 struct rcu_head oom_head;
318 #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 318 #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
319 319
320 int cpu; 320 int cpu;
321 struct rcu_state *rsp; 321 struct rcu_state *rsp;
322 }; 322 };
323 323
324 /* Values for fqs_state field in struct rcu_state. */ 324 /* Values for fqs_state field in struct rcu_state. */
325 #define RCU_GP_IDLE 0 /* No grace period in progress. */ 325 #define RCU_GP_IDLE 0 /* No grace period in progress. */
326 #define RCU_GP_INIT 1 /* Grace period being initialized. */ 326 #define RCU_GP_INIT 1 /* Grace period being initialized. */
327 #define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */ 327 #define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */
328 #define RCU_FORCE_QS 3 /* Need to force quiescent state. */ 328 #define RCU_FORCE_QS 3 /* Need to force quiescent state. */
329 #define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK 329 #define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK
330 330
331 #define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ 331 #define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
332 332
333 #ifdef CONFIG_PROVE_RCU 333 #ifdef CONFIG_PROVE_RCU
334 #define RCU_STALL_DELAY_DELTA (5 * HZ) 334 #define RCU_STALL_DELAY_DELTA (5 * HZ)
335 #else 335 #else
336 #define RCU_STALL_DELAY_DELTA 0 336 #define RCU_STALL_DELAY_DELTA 0
337 #endif 337 #endif
338 #define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ 338 #define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
339 /* to take at least one */ 339 /* to take at least one */
340 /* scheduling clock irq */ 340 /* scheduling clock irq */
341 /* before ratting on them. */ 341 /* before ratting on them. */
342 342
343 #define rcu_wait(cond) \ 343 #define rcu_wait(cond) \
344 do { \ 344 do { \
345 for (;;) { \ 345 for (;;) { \
346 set_current_state(TASK_INTERRUPTIBLE); \ 346 set_current_state(TASK_INTERRUPTIBLE); \
347 if (cond) \ 347 if (cond) \
348 break; \ 348 break; \
349 schedule(); \ 349 schedule(); \
350 } \ 350 } \
351 __set_current_state(TASK_RUNNING); \ 351 __set_current_state(TASK_RUNNING); \
352 } while (0) 352 } while (0)
353 353
354 /* 354 /*
355 * RCU global state, including node hierarchy. This hierarchy is 355 * RCU global state, including node hierarchy. This hierarchy is
356 * represented in "heap" form in a dense array. The root (first level) 356 * represented in "heap" form in a dense array. The root (first level)
357 * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second 357 * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second
358 * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]), 358 * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]),
359 * and the third level in ->node[m+1] and following (->node[m+1] referenced 359 * and the third level in ->node[m+1] and following (->node[m+1] referenced
360 * by ->level[2]). The number of levels is determined by the number of 360 * by ->level[2]). The number of levels is determined by the number of
361 * CPUs and by CONFIG_RCU_FANOUT. Small systems will have a "hierarchy" 361 * CPUs and by CONFIG_RCU_FANOUT. Small systems will have a "hierarchy"
362 * consisting of a single rcu_node. 362 * consisting of a single rcu_node.
363 */ 363 */
364 struct rcu_state { 364 struct rcu_state {
365 struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ 365 struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */
366 struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */ 366 struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */
367 u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ 367 u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */
368 u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */ 368 u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */
369 struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ 369 struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
370 void (*call)(struct rcu_head *head, /* call_rcu() flavor. */ 370 void (*call)(struct rcu_head *head, /* call_rcu() flavor. */
371 void (*func)(struct rcu_head *head)); 371 void (*func)(struct rcu_head *head));
372 372
373 /* The following fields are guarded by the root rcu_node's lock. */ 373 /* The following fields are guarded by the root rcu_node's lock. */
374 374
375 u8 fqs_state ____cacheline_internodealigned_in_smp; 375 u8 fqs_state ____cacheline_internodealigned_in_smp;
376 /* Force QS state. */ 376 /* Force QS state. */
377 u8 boost; /* Subject to priority boost. */ 377 u8 boost; /* Subject to priority boost. */
378 unsigned long gpnum; /* Current gp number. */ 378 unsigned long gpnum; /* Current gp number. */
379 unsigned long completed; /* # of last completed gp. */ 379 unsigned long completed; /* # of last completed gp. */
380 struct task_struct *gp_kthread; /* Task for grace periods. */ 380 struct task_struct *gp_kthread; /* Task for grace periods. */
381 wait_queue_head_t gp_wq; /* Where GP task waits. */ 381 wait_queue_head_t gp_wq; /* Where GP task waits. */
382 int gp_flags; /* Commands for GP task. */ 382 int gp_flags; /* Commands for GP task. */
383 383
384 /* End of fields guarded by root rcu_node's lock. */ 384 /* End of fields guarded by root rcu_node's lock. */
385 385
386 raw_spinlock_t onofflock ____cacheline_internodealigned_in_smp; 386 raw_spinlock_t orphan_lock ____cacheline_internodealigned_in_smp;
387 /* exclude on/offline and */ 387 /* Protect following fields. */
388 /* starting new GP. */
389 struct rcu_head *orphan_nxtlist; /* Orphaned callbacks that */ 388 struct rcu_head *orphan_nxtlist; /* Orphaned callbacks that */
390 /* need a grace period. */ 389 /* need a grace period. */
391 struct rcu_head **orphan_nxttail; /* Tail of above. */ 390 struct rcu_head **orphan_nxttail; /* Tail of above. */
392 struct rcu_head *orphan_donelist; /* Orphaned callbacks that */ 391 struct rcu_head *orphan_donelist; /* Orphaned callbacks that */
393 /* are ready to invoke. */ 392 /* are ready to invoke. */
394 struct rcu_head **orphan_donetail; /* Tail of above. */ 393 struct rcu_head **orphan_donetail; /* Tail of above. */
395 long qlen_lazy; /* Number of lazy callbacks. */ 394 long qlen_lazy; /* Number of lazy callbacks. */
396 long qlen; /* Total number of callbacks. */ 395 long qlen; /* Total number of callbacks. */
397 /* End of fields guarded by onofflock. */ 396 /* End of fields guarded by orphan_lock. */
398 397
399 struct mutex onoff_mutex; /* Coordinate hotplug & GPs. */ 398 struct mutex onoff_mutex; /* Coordinate hotplug & GPs. */
400 399
401 struct mutex barrier_mutex; /* Guards barrier fields. */ 400 struct mutex barrier_mutex; /* Guards barrier fields. */
402 atomic_t barrier_cpu_count; /* # CPUs waiting on. */ 401 atomic_t barrier_cpu_count; /* # CPUs waiting on. */
403 struct completion barrier_completion; /* Wake at barrier end. */ 402 struct completion barrier_completion; /* Wake at barrier end. */
404 unsigned long n_barrier_done; /* ++ at start and end of */ 403 unsigned long n_barrier_done; /* ++ at start and end of */
405 /* _rcu_barrier(). */ 404 /* _rcu_barrier(). */
406 /* End of fields guarded by barrier_mutex. */ 405 /* End of fields guarded by barrier_mutex. */
407 406
408 unsigned long jiffies_force_qs; /* Time at which to invoke */ 407 unsigned long jiffies_force_qs; /* Time at which to invoke */
409 /* force_quiescent_state(). */ 408 /* force_quiescent_state(). */
410 unsigned long n_force_qs; /* Number of calls to */ 409 unsigned long n_force_qs; /* Number of calls to */
411 /* force_quiescent_state(). */ 410 /* force_quiescent_state(). */
412 unsigned long n_force_qs_lh; /* ~Number of calls leaving */ 411 unsigned long n_force_qs_lh; /* ~Number of calls leaving */
413 /* due to lock unavailable. */ 412 /* due to lock unavailable. */
414 unsigned long n_force_qs_ngp; /* Number of calls leaving */ 413 unsigned long n_force_qs_ngp; /* Number of calls leaving */
415 /* due to no GP active. */ 414 /* due to no GP active. */
416 unsigned long gp_start; /* Time at which GP started, */ 415 unsigned long gp_start; /* Time at which GP started, */
417 /* but in jiffies. */ 416 /* but in jiffies. */
418 unsigned long jiffies_stall; /* Time at which to check */ 417 unsigned long jiffies_stall; /* Time at which to check */
419 /* for CPU stalls. */ 418 /* for CPU stalls. */
420 unsigned long gp_max; /* Maximum GP duration in */ 419 unsigned long gp_max; /* Maximum GP duration in */
421 /* jiffies. */ 420 /* jiffies. */
422 char *name; /* Name of structure. */ 421 char *name; /* Name of structure. */
423 struct list_head flavors; /* List of RCU flavors. */ 422 struct list_head flavors; /* List of RCU flavors. */
424 }; 423 };
425 424
426 /* Values for rcu_state structure's gp_flags field. */ 425 /* Values for rcu_state structure's gp_flags field. */
427 #define RCU_GP_FLAG_INIT 0x1 /* Need grace-period initialization. */ 426 #define RCU_GP_FLAG_INIT 0x1 /* Need grace-period initialization. */
428 #define RCU_GP_FLAG_FQS 0x2 /* Need grace-period quiescent-state forcing. */ 427 #define RCU_GP_FLAG_FQS 0x2 /* Need grace-period quiescent-state forcing. */
429 428
430 extern struct list_head rcu_struct_flavors; 429 extern struct list_head rcu_struct_flavors;
431 #define for_each_rcu_flavor(rsp) \ 430 #define for_each_rcu_flavor(rsp) \
432 list_for_each_entry((rsp), &rcu_struct_flavors, flavors) 431 list_for_each_entry((rsp), &rcu_struct_flavors, flavors)
433 432
434 /* Return values for rcu_preempt_offline_tasks(). */ 433 /* Return values for rcu_preempt_offline_tasks(). */
435 434
436 #define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */ 435 #define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */
437 /* GP were moved to root. */ 436 /* GP were moved to root. */
438 #define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */ 437 #define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */
439 /* GP were moved to root. */ 438 /* GP were moved to root. */
440 439
441 /* 440 /*
442 * RCU implementation internal declarations: 441 * RCU implementation internal declarations:
443 */ 442 */
444 extern struct rcu_state rcu_sched_state; 443 extern struct rcu_state rcu_sched_state;
445 DECLARE_PER_CPU(struct rcu_data, rcu_sched_data); 444 DECLARE_PER_CPU(struct rcu_data, rcu_sched_data);
446 445
447 extern struct rcu_state rcu_bh_state; 446 extern struct rcu_state rcu_bh_state;
448 DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); 447 DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
449 448
450 #ifdef CONFIG_TREE_PREEMPT_RCU 449 #ifdef CONFIG_TREE_PREEMPT_RCU
451 extern struct rcu_state rcu_preempt_state; 450 extern struct rcu_state rcu_preempt_state;
452 DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); 451 DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
453 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 452 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
454 453
455 #ifdef CONFIG_RCU_BOOST 454 #ifdef CONFIG_RCU_BOOST
456 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); 455 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
457 DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu); 456 DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
458 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); 457 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
459 DECLARE_PER_CPU(char, rcu_cpu_has_work); 458 DECLARE_PER_CPU(char, rcu_cpu_has_work);
460 #endif /* #ifdef CONFIG_RCU_BOOST */ 459 #endif /* #ifdef CONFIG_RCU_BOOST */
461 460
462 #ifndef RCU_TREE_NONCORE 461 #ifndef RCU_TREE_NONCORE
463 462
464 /* Forward declarations for rcutree_plugin.h */ 463 /* Forward declarations for rcutree_plugin.h */
465 static void rcu_bootup_announce(void); 464 static void rcu_bootup_announce(void);
466 long rcu_batches_completed(void); 465 long rcu_batches_completed(void);
467 static void rcu_preempt_note_context_switch(int cpu); 466 static void rcu_preempt_note_context_switch(int cpu);
468 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); 467 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
469 #ifdef CONFIG_HOTPLUG_CPU 468 #ifdef CONFIG_HOTPLUG_CPU
470 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, 469 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
471 unsigned long flags); 470 unsigned long flags);
472 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 471 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
473 static void rcu_print_detail_task_stall(struct rcu_state *rsp); 472 static void rcu_print_detail_task_stall(struct rcu_state *rsp);
474 static int rcu_print_task_stall(struct rcu_node *rnp); 473 static int rcu_print_task_stall(struct rcu_node *rnp);
475 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); 474 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
476 #ifdef CONFIG_HOTPLUG_CPU 475 #ifdef CONFIG_HOTPLUG_CPU
477 static int rcu_preempt_offline_tasks(struct rcu_state *rsp, 476 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
478 struct rcu_node *rnp, 477 struct rcu_node *rnp,
479 struct rcu_data *rdp); 478 struct rcu_data *rdp);
480 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 479 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
481 static void rcu_preempt_check_callbacks(int cpu); 480 static void rcu_preempt_check_callbacks(int cpu);
482 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); 481 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
483 #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) 482 #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
484 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, 483 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
485 bool wake); 484 bool wake);
486 #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ 485 #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
487 static void __init __rcu_init_preempt(void); 486 static void __init __rcu_init_preempt(void);
488 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); 487 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
489 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); 488 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
490 static void invoke_rcu_callbacks_kthread(void); 489 static void invoke_rcu_callbacks_kthread(void);
491 static bool rcu_is_callbacks_kthread(void); 490 static bool rcu_is_callbacks_kthread(void);
492 #ifdef CONFIG_RCU_BOOST 491 #ifdef CONFIG_RCU_BOOST
493 static void rcu_preempt_do_callbacks(void); 492 static void rcu_preempt_do_callbacks(void);
494 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, 493 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
495 struct rcu_node *rnp); 494 struct rcu_node *rnp);
496 #endif /* #ifdef CONFIG_RCU_BOOST */ 495 #endif /* #ifdef CONFIG_RCU_BOOST */
497 static void __cpuinit rcu_prepare_kthreads(int cpu); 496 static void __cpuinit rcu_prepare_kthreads(int cpu);
498 static void rcu_prepare_for_idle_init(int cpu); 497 static void rcu_prepare_for_idle_init(int cpu);
499 static void rcu_cleanup_after_idle(int cpu); 498 static void rcu_cleanup_after_idle(int cpu);
500 static void rcu_prepare_for_idle(int cpu); 499 static void rcu_prepare_for_idle(int cpu);
501 static void rcu_idle_count_callbacks_posted(void); 500 static void rcu_idle_count_callbacks_posted(void);
502 static void print_cpu_stall_info_begin(void); 501 static void print_cpu_stall_info_begin(void);
503 static void print_cpu_stall_info(struct rcu_state *rsp, int cpu); 502 static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
504 static void print_cpu_stall_info_end(void); 503 static void print_cpu_stall_info_end(void);
505 static void zero_cpu_stall_ticks(struct rcu_data *rdp); 504 static void zero_cpu_stall_ticks(struct rcu_data *rdp);
506 static void increment_cpu_stall_ticks(void); 505 static void increment_cpu_stall_ticks(void);
507 506
508 #endif /* #ifndef RCU_TREE_NONCORE */ 507 #endif /* #ifndef RCU_TREE_NONCORE */
509 508
kernel/rcutree_plugin.h
1 /* 1 /*
2 * Read-Copy Update mechanism for mutual exclusion (tree-based version) 2 * Read-Copy Update mechanism for mutual exclusion (tree-based version)
3 * Internal non-public definitions that provide either classic 3 * Internal non-public definitions that provide either classic
4 * or preemptible semantics. 4 * or preemptible semantics.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or 8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version. 9 * (at your option) any later version.
10 * 10 *
11 * This program is distributed in the hope that it will be useful, 11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details. 14 * GNU General Public License for more details.
15 * 15 *
16 * You should have received a copy of the GNU General Public License 16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software 17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 * 19 *
20 * Copyright Red Hat, 2009 20 * Copyright Red Hat, 2009
21 * Copyright IBM Corporation, 2009 21 * Copyright IBM Corporation, 2009
22 * 22 *
23 * Author: Ingo Molnar <mingo@elte.hu> 23 * Author: Ingo Molnar <mingo@elte.hu>
24 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> 24 * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
25 */ 25 */
26 26
27 #include <linux/delay.h> 27 #include <linux/delay.h>
28 #include <linux/oom.h> 28 #include <linux/oom.h>
29 #include <linux/smpboot.h> 29 #include <linux/smpboot.h>
30 30
31 #define RCU_KTHREAD_PRIO 1 31 #define RCU_KTHREAD_PRIO 1
32 32
33 #ifdef CONFIG_RCU_BOOST 33 #ifdef CONFIG_RCU_BOOST
34 #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO 34 #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
35 #else 35 #else
36 #define RCU_BOOST_PRIO RCU_KTHREAD_PRIO 36 #define RCU_BOOST_PRIO RCU_KTHREAD_PRIO
37 #endif 37 #endif
38 38
39 /* 39 /*
40 * Check the RCU kernel configuration parameters and print informative 40 * Check the RCU kernel configuration parameters and print informative
41 * messages about anything out of the ordinary. If you like #ifdef, you 41 * messages about anything out of the ordinary. If you like #ifdef, you
42 * will love this function. 42 * will love this function.
43 */ 43 */
44 static void __init rcu_bootup_announce_oddness(void) 44 static void __init rcu_bootup_announce_oddness(void)
45 { 45 {
46 #ifdef CONFIG_RCU_TRACE 46 #ifdef CONFIG_RCU_TRACE
47 printk(KERN_INFO "\tRCU debugfs-based tracing is enabled.\n"); 47 printk(KERN_INFO "\tRCU debugfs-based tracing is enabled.\n");
48 #endif 48 #endif
49 #if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32) 49 #if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)
50 printk(KERN_INFO "\tCONFIG_RCU_FANOUT set to non-default value of %d\n", 50 printk(KERN_INFO "\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
51 CONFIG_RCU_FANOUT); 51 CONFIG_RCU_FANOUT);
52 #endif 52 #endif
53 #ifdef CONFIG_RCU_FANOUT_EXACT 53 #ifdef CONFIG_RCU_FANOUT_EXACT
54 printk(KERN_INFO "\tHierarchical RCU autobalancing is disabled.\n"); 54 printk(KERN_INFO "\tHierarchical RCU autobalancing is disabled.\n");
55 #endif 55 #endif
56 #ifdef CONFIG_RCU_FAST_NO_HZ 56 #ifdef CONFIG_RCU_FAST_NO_HZ
57 printk(KERN_INFO 57 printk(KERN_INFO
58 "\tRCU dyntick-idle grace-period acceleration is enabled.\n"); 58 "\tRCU dyntick-idle grace-period acceleration is enabled.\n");
59 #endif 59 #endif
60 #ifdef CONFIG_PROVE_RCU 60 #ifdef CONFIG_PROVE_RCU
61 printk(KERN_INFO "\tRCU lockdep checking is enabled.\n"); 61 printk(KERN_INFO "\tRCU lockdep checking is enabled.\n");
62 #endif 62 #endif
63 #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE 63 #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE
64 printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); 64 printk(KERN_INFO "\tRCU torture testing starts during boot.\n");
65 #endif 65 #endif
66 #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) 66 #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE)
67 printk(KERN_INFO "\tDump stacks of tasks blocking RCU-preempt GP.\n"); 67 printk(KERN_INFO "\tDump stacks of tasks blocking RCU-preempt GP.\n");
68 #endif 68 #endif
69 #if defined(CONFIG_RCU_CPU_STALL_INFO) 69 #if defined(CONFIG_RCU_CPU_STALL_INFO)
70 printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n"); 70 printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n");
71 #endif 71 #endif
72 #if NUM_RCU_LVL_4 != 0 72 #if NUM_RCU_LVL_4 != 0
73 printk(KERN_INFO "\tFour-level hierarchy is enabled.\n"); 73 printk(KERN_INFO "\tFour-level hierarchy is enabled.\n");
74 #endif 74 #endif
75 if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) 75 if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
76 printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); 76 printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
77 if (nr_cpu_ids != NR_CPUS) 77 if (nr_cpu_ids != NR_CPUS)
78 printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); 78 printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
79 } 79 }
80 80
81 #ifdef CONFIG_TREE_PREEMPT_RCU 81 #ifdef CONFIG_TREE_PREEMPT_RCU
82 82
83 struct rcu_state rcu_preempt_state = 83 struct rcu_state rcu_preempt_state =
84 RCU_STATE_INITIALIZER(rcu_preempt, call_rcu); 84 RCU_STATE_INITIALIZER(rcu_preempt, call_rcu);
85 DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); 85 DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
86 static struct rcu_state *rcu_state = &rcu_preempt_state; 86 static struct rcu_state *rcu_state = &rcu_preempt_state;
87 87
88 static int rcu_preempted_readers_exp(struct rcu_node *rnp); 88 static int rcu_preempted_readers_exp(struct rcu_node *rnp);
89 89
90 /* 90 /*
91 * Tell them what RCU they are running. 91 * Tell them what RCU they are running.
92 */ 92 */
93 static void __init rcu_bootup_announce(void) 93 static void __init rcu_bootup_announce(void)
94 { 94 {
95 printk(KERN_INFO "Preemptible hierarchical RCU implementation.\n"); 95 printk(KERN_INFO "Preemptible hierarchical RCU implementation.\n");
96 rcu_bootup_announce_oddness(); 96 rcu_bootup_announce_oddness();
97 } 97 }
98 98
99 /* 99 /*
100 * Return the number of RCU-preempt batches processed thus far 100 * Return the number of RCU-preempt batches processed thus far
101 * for debug and statistics. 101 * for debug and statistics.
102 */ 102 */
103 long rcu_batches_completed_preempt(void) 103 long rcu_batches_completed_preempt(void)
104 { 104 {
105 return rcu_preempt_state.completed; 105 return rcu_preempt_state.completed;
106 } 106 }
107 EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt); 107 EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
108 108
109 /* 109 /*
110 * Return the number of RCU batches processed thus far for debug & stats. 110 * Return the number of RCU batches processed thus far for debug & stats.
111 */ 111 */
112 long rcu_batches_completed(void) 112 long rcu_batches_completed(void)
113 { 113 {
114 return rcu_batches_completed_preempt(); 114 return rcu_batches_completed_preempt();
115 } 115 }
116 EXPORT_SYMBOL_GPL(rcu_batches_completed); 116 EXPORT_SYMBOL_GPL(rcu_batches_completed);
117 117
118 /* 118 /*
119 * Force a quiescent state for preemptible RCU. 119 * Force a quiescent state for preemptible RCU.
120 */ 120 */
121 void rcu_force_quiescent_state(void) 121 void rcu_force_quiescent_state(void)
122 { 122 {
123 force_quiescent_state(&rcu_preempt_state); 123 force_quiescent_state(&rcu_preempt_state);
124 } 124 }
125 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 125 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
126 126
127 /* 127 /*
128 * Record a preemptible-RCU quiescent state for the specified CPU. Note 128 * Record a preemptible-RCU quiescent state for the specified CPU. Note
129 * that this just means that the task currently running on the CPU is 129 * that this just means that the task currently running on the CPU is
130 * not in a quiescent state. There might be any number of tasks blocked 130 * not in a quiescent state. There might be any number of tasks blocked
131 * while in an RCU read-side critical section. 131 * while in an RCU read-side critical section.
132 * 132 *
133 * Unlike the other rcu_*_qs() functions, callers to this function 133 * Unlike the other rcu_*_qs() functions, callers to this function
134 * must disable irqs in order to protect the assignment to 134 * must disable irqs in order to protect the assignment to
135 * ->rcu_read_unlock_special. 135 * ->rcu_read_unlock_special.
136 */ 136 */
137 static void rcu_preempt_qs(int cpu) 137 static void rcu_preempt_qs(int cpu)
138 { 138 {
139 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 139 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
140 140
141 if (rdp->passed_quiesce == 0) 141 if (rdp->passed_quiesce == 0)
142 trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs"); 142 trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs");
143 rdp->passed_quiesce = 1; 143 rdp->passed_quiesce = 1;
144 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 144 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
145 } 145 }
146 146
147 /* 147 /*
148 * We have entered the scheduler, and the current task might soon be 148 * We have entered the scheduler, and the current task might soon be
149 * context-switched away from. If this task is in an RCU read-side 149 * context-switched away from. If this task is in an RCU read-side
150 * critical section, we will no longer be able to rely on the CPU to 150 * critical section, we will no longer be able to rely on the CPU to
151 * record that fact, so we enqueue the task on the blkd_tasks list. 151 * record that fact, so we enqueue the task on the blkd_tasks list.
152 * The task will dequeue itself when it exits the outermost enclosing 152 * The task will dequeue itself when it exits the outermost enclosing
153 * RCU read-side critical section. Therefore, the current grace period 153 * RCU read-side critical section. Therefore, the current grace period
154 * cannot be permitted to complete until the blkd_tasks list entries 154 * cannot be permitted to complete until the blkd_tasks list entries
155 * predating the current grace period drain, in other words, until 155 * predating the current grace period drain, in other words, until
156 * rnp->gp_tasks becomes NULL. 156 * rnp->gp_tasks becomes NULL.
157 * 157 *
158 * Caller must disable preemption. 158 * Caller must disable preemption.
159 */ 159 */
160 static void rcu_preempt_note_context_switch(int cpu) 160 static void rcu_preempt_note_context_switch(int cpu)
161 { 161 {
162 struct task_struct *t = current; 162 struct task_struct *t = current;
163 unsigned long flags; 163 unsigned long flags;
164 struct rcu_data *rdp; 164 struct rcu_data *rdp;
165 struct rcu_node *rnp; 165 struct rcu_node *rnp;
166 166
167 if (t->rcu_read_lock_nesting > 0 && 167 if (t->rcu_read_lock_nesting > 0 &&
168 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { 168 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
169 169
170 /* Possibly blocking in an RCU read-side critical section. */ 170 /* Possibly blocking in an RCU read-side critical section. */
171 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); 171 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
172 rnp = rdp->mynode; 172 rnp = rdp->mynode;
173 raw_spin_lock_irqsave(&rnp->lock, flags); 173 raw_spin_lock_irqsave(&rnp->lock, flags);
174 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 174 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
175 t->rcu_blocked_node = rnp; 175 t->rcu_blocked_node = rnp;
176 176
177 /* 177 /*
178 * If this CPU has already checked in, then this task 178 * If this CPU has already checked in, then this task
179 * will hold up the next grace period rather than the 179 * will hold up the next grace period rather than the
180 * current grace period. Queue the task accordingly. 180 * current grace period. Queue the task accordingly.
181 * If the task is queued for the current grace period 181 * If the task is queued for the current grace period
182 * (i.e., this CPU has not yet passed through a quiescent 182 * (i.e., this CPU has not yet passed through a quiescent
183 * state for the current grace period), then as long 183 * state for the current grace period), then as long
184 * as that task remains queued, the current grace period 184 * as that task remains queued, the current grace period
185 * cannot end. Note that there is some uncertainty as 185 * cannot end. Note that there is some uncertainty as
186 * to exactly when the current grace period started. 186 * to exactly when the current grace period started.
187 * We take a conservative approach, which can result 187 * We take a conservative approach, which can result
188 * in unnecessarily waiting on tasks that started very 188 * in unnecessarily waiting on tasks that started very
189 * slightly after the current grace period began. C'est 189 * slightly after the current grace period began. C'est
190 * la vie!!! 190 * la vie!!!
191 * 191 *
192 * But first, note that the current CPU must still be 192 * But first, note that the current CPU must still be
193 * on line! 193 * on line!
194 */ 194 */
195 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); 195 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
196 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); 196 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
197 if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) { 197 if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
198 list_add(&t->rcu_node_entry, rnp->gp_tasks->prev); 198 list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
199 rnp->gp_tasks = &t->rcu_node_entry; 199 rnp->gp_tasks = &t->rcu_node_entry;
200 #ifdef CONFIG_RCU_BOOST 200 #ifdef CONFIG_RCU_BOOST
201 if (rnp->boost_tasks != NULL) 201 if (rnp->boost_tasks != NULL)
202 rnp->boost_tasks = rnp->gp_tasks; 202 rnp->boost_tasks = rnp->gp_tasks;
203 #endif /* #ifdef CONFIG_RCU_BOOST */ 203 #endif /* #ifdef CONFIG_RCU_BOOST */
204 } else { 204 } else {
205 list_add(&t->rcu_node_entry, &rnp->blkd_tasks); 205 list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
206 if (rnp->qsmask & rdp->grpmask) 206 if (rnp->qsmask & rdp->grpmask)
207 rnp->gp_tasks = &t->rcu_node_entry; 207 rnp->gp_tasks = &t->rcu_node_entry;
208 } 208 }
209 trace_rcu_preempt_task(rdp->rsp->name, 209 trace_rcu_preempt_task(rdp->rsp->name,
210 t->pid, 210 t->pid,
211 (rnp->qsmask & rdp->grpmask) 211 (rnp->qsmask & rdp->grpmask)
212 ? rnp->gpnum 212 ? rnp->gpnum
213 : rnp->gpnum + 1); 213 : rnp->gpnum + 1);
214 raw_spin_unlock_irqrestore(&rnp->lock, flags); 214 raw_spin_unlock_irqrestore(&rnp->lock, flags);
215 } else if (t->rcu_read_lock_nesting < 0 && 215 } else if (t->rcu_read_lock_nesting < 0 &&
216 t->rcu_read_unlock_special) { 216 t->rcu_read_unlock_special) {
217 217
218 /* 218 /*
219 * Complete exit from RCU read-side critical section on 219 * Complete exit from RCU read-side critical section on
220 * behalf of preempted instance of __rcu_read_unlock(). 220 * behalf of preempted instance of __rcu_read_unlock().
221 */ 221 */
222 rcu_read_unlock_special(t); 222 rcu_read_unlock_special(t);
223 } 223 }
224 224
225 /* 225 /*
226 * Either we were not in an RCU read-side critical section to 226 * Either we were not in an RCU read-side critical section to
227 * begin with, or we have now recorded that critical section 227 * begin with, or we have now recorded that critical section
228 * globally. Either way, we can now note a quiescent state 228 * globally. Either way, we can now note a quiescent state
229 * for this CPU. Again, if we were in an RCU read-side critical 229 * for this CPU. Again, if we were in an RCU read-side critical
230 * section, and if that critical section was blocking the current 230 * section, and if that critical section was blocking the current
231 * grace period, then the fact that the task has been enqueued 231 * grace period, then the fact that the task has been enqueued
232 * means that we continue to block the current grace period. 232 * means that we continue to block the current grace period.
233 */ 233 */
234 local_irq_save(flags); 234 local_irq_save(flags);
235 rcu_preempt_qs(cpu); 235 rcu_preempt_qs(cpu);
236 local_irq_restore(flags); 236 local_irq_restore(flags);
237 } 237 }
238 238
239 /* 239 /*
240 * Check for preempted RCU readers blocking the current grace period 240 * Check for preempted RCU readers blocking the current grace period
241 * for the specified rcu_node structure. If the caller needs a reliable 241 * for the specified rcu_node structure. If the caller needs a reliable
242 * answer, it must hold the rcu_node's ->lock. 242 * answer, it must hold the rcu_node's ->lock.
243 */ 243 */
244 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) 244 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
245 { 245 {
246 return rnp->gp_tasks != NULL; 246 return rnp->gp_tasks != NULL;
247 } 247 }
248 248
249 /* 249 /*
250 * Record a quiescent state for all tasks that were previously queued 250 * Record a quiescent state for all tasks that were previously queued
251 * on the specified rcu_node structure and that were blocking the current 251 * on the specified rcu_node structure and that were blocking the current
252 * RCU grace period. The caller must hold the specified rnp->lock with 252 * RCU grace period. The caller must hold the specified rnp->lock with
253 * irqs disabled, and this lock is released upon return, but irqs remain 253 * irqs disabled, and this lock is released upon return, but irqs remain
254 * disabled. 254 * disabled.
255 */ 255 */
256 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) 256 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
257 __releases(rnp->lock) 257 __releases(rnp->lock)
258 { 258 {
259 unsigned long mask; 259 unsigned long mask;
260 struct rcu_node *rnp_p; 260 struct rcu_node *rnp_p;
261 261
262 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { 262 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
263 raw_spin_unlock_irqrestore(&rnp->lock, flags); 263 raw_spin_unlock_irqrestore(&rnp->lock, flags);
264 return; /* Still need more quiescent states! */ 264 return; /* Still need more quiescent states! */
265 } 265 }
266 266
267 rnp_p = rnp->parent; 267 rnp_p = rnp->parent;
268 if (rnp_p == NULL) { 268 if (rnp_p == NULL) {
269 /* 269 /*
270 * Either there is only one rcu_node in the tree, 270 * Either there is only one rcu_node in the tree,
271 * or tasks were kicked up to root rcu_node due to 271 * or tasks were kicked up to root rcu_node due to
272 * CPUs going offline. 272 * CPUs going offline.
273 */ 273 */
274 rcu_report_qs_rsp(&rcu_preempt_state, flags); 274 rcu_report_qs_rsp(&rcu_preempt_state, flags);
275 return; 275 return;
276 } 276 }
277 277
278 /* Report up the rest of the hierarchy. */ 278 /* Report up the rest of the hierarchy. */
279 mask = rnp->grpmask; 279 mask = rnp->grpmask;
280 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 280 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
281 raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */ 281 raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */
282 rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags); 282 rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
283 } 283 }
284 284
285 /* 285 /*
286 * Advance a ->blkd_tasks-list pointer to the next entry, instead 286 * Advance a ->blkd_tasks-list pointer to the next entry, instead
287 * returning NULL if at the end of the list. 287 * returning NULL if at the end of the list.
288 */ 288 */
289 static struct list_head *rcu_next_node_entry(struct task_struct *t, 289 static struct list_head *rcu_next_node_entry(struct task_struct *t,
290 struct rcu_node *rnp) 290 struct rcu_node *rnp)
291 { 291 {
292 struct list_head *np; 292 struct list_head *np;
293 293
294 np = t->rcu_node_entry.next; 294 np = t->rcu_node_entry.next;
295 if (np == &rnp->blkd_tasks) 295 if (np == &rnp->blkd_tasks)
296 np = NULL; 296 np = NULL;
297 return np; 297 return np;
298 } 298 }
299 299
300 /* 300 /*
301 * Handle special cases during rcu_read_unlock(), such as needing to 301 * Handle special cases during rcu_read_unlock(), such as needing to
302 * notify RCU core processing or task having blocked during the RCU 302 * notify RCU core processing or task having blocked during the RCU
303 * read-side critical section. 303 * read-side critical section.
304 */ 304 */
305 void rcu_read_unlock_special(struct task_struct *t) 305 void rcu_read_unlock_special(struct task_struct *t)
306 { 306 {
307 int empty; 307 int empty;
308 int empty_exp; 308 int empty_exp;
309 int empty_exp_now; 309 int empty_exp_now;
310 unsigned long flags; 310 unsigned long flags;
311 struct list_head *np; 311 struct list_head *np;
312 #ifdef CONFIG_RCU_BOOST 312 #ifdef CONFIG_RCU_BOOST
313 struct rt_mutex *rbmp = NULL; 313 struct rt_mutex *rbmp = NULL;
314 #endif /* #ifdef CONFIG_RCU_BOOST */ 314 #endif /* #ifdef CONFIG_RCU_BOOST */
315 struct rcu_node *rnp; 315 struct rcu_node *rnp;
316 int special; 316 int special;
317 317
318 /* NMI handlers cannot block and cannot safely manipulate state. */ 318 /* NMI handlers cannot block and cannot safely manipulate state. */
319 if (in_nmi()) 319 if (in_nmi())
320 return; 320 return;
321 321
322 local_irq_save(flags); 322 local_irq_save(flags);
323 323
324 /* 324 /*
325 * If RCU core is waiting for this CPU to exit critical section, 325 * If RCU core is waiting for this CPU to exit critical section,
326 * let it know that we have done so. 326 * let it know that we have done so.
327 */ 327 */
328 special = t->rcu_read_unlock_special; 328 special = t->rcu_read_unlock_special;
329 if (special & RCU_READ_UNLOCK_NEED_QS) { 329 if (special & RCU_READ_UNLOCK_NEED_QS) {
330 rcu_preempt_qs(smp_processor_id()); 330 rcu_preempt_qs(smp_processor_id());
331 } 331 }
332 332
333 /* Hardware IRQ handlers cannot block. */ 333 /* Hardware IRQ handlers cannot block. */
334 if (in_irq() || in_serving_softirq()) { 334 if (in_irq() || in_serving_softirq()) {
335 local_irq_restore(flags); 335 local_irq_restore(flags);
336 return; 336 return;
337 } 337 }
338 338
339 /* Clean up if blocked during RCU read-side critical section. */ 339 /* Clean up if blocked during RCU read-side critical section. */
340 if (special & RCU_READ_UNLOCK_BLOCKED) { 340 if (special & RCU_READ_UNLOCK_BLOCKED) {
341 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; 341 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
342 342
343 /* 343 /*
344 * Remove this task from the list it blocked on. The 344 * Remove this task from the list it blocked on. The
345 * task can migrate while we acquire the lock, but at 345 * task can migrate while we acquire the lock, but at
346 * most one time. So at most two passes through loop. 346 * most one time. So at most two passes through loop.
347 */ 347 */
348 for (;;) { 348 for (;;) {
349 rnp = t->rcu_blocked_node; 349 rnp = t->rcu_blocked_node;
350 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 350 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
351 if (rnp == t->rcu_blocked_node) 351 if (rnp == t->rcu_blocked_node)
352 break; 352 break;
353 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 353 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
354 } 354 }
355 empty = !rcu_preempt_blocked_readers_cgp(rnp); 355 empty = !rcu_preempt_blocked_readers_cgp(rnp);
356 empty_exp = !rcu_preempted_readers_exp(rnp); 356 empty_exp = !rcu_preempted_readers_exp(rnp);
357 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ 357 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
358 np = rcu_next_node_entry(t, rnp); 358 np = rcu_next_node_entry(t, rnp);
359 list_del_init(&t->rcu_node_entry); 359 list_del_init(&t->rcu_node_entry);
360 t->rcu_blocked_node = NULL; 360 t->rcu_blocked_node = NULL;
361 trace_rcu_unlock_preempted_task("rcu_preempt", 361 trace_rcu_unlock_preempted_task("rcu_preempt",
362 rnp->gpnum, t->pid); 362 rnp->gpnum, t->pid);
363 if (&t->rcu_node_entry == rnp->gp_tasks) 363 if (&t->rcu_node_entry == rnp->gp_tasks)
364 rnp->gp_tasks = np; 364 rnp->gp_tasks = np;
365 if (&t->rcu_node_entry == rnp->exp_tasks) 365 if (&t->rcu_node_entry == rnp->exp_tasks)
366 rnp->exp_tasks = np; 366 rnp->exp_tasks = np;
367 #ifdef CONFIG_RCU_BOOST 367 #ifdef CONFIG_RCU_BOOST
368 if (&t->rcu_node_entry == rnp->boost_tasks) 368 if (&t->rcu_node_entry == rnp->boost_tasks)
369 rnp->boost_tasks = np; 369 rnp->boost_tasks = np;
370 /* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */ 370 /* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */
371 if (t->rcu_boost_mutex) { 371 if (t->rcu_boost_mutex) {
372 rbmp = t->rcu_boost_mutex; 372 rbmp = t->rcu_boost_mutex;
373 t->rcu_boost_mutex = NULL; 373 t->rcu_boost_mutex = NULL;
374 } 374 }
375 #endif /* #ifdef CONFIG_RCU_BOOST */ 375 #endif /* #ifdef CONFIG_RCU_BOOST */
376 376
377 /* 377 /*
378 * If this was the last task on the current list, and if 378 * If this was the last task on the current list, and if
379 * we aren't waiting on any CPUs, report the quiescent state. 379 * we aren't waiting on any CPUs, report the quiescent state.
380 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock, 380 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
381 * so we must take a snapshot of the expedited state. 381 * so we must take a snapshot of the expedited state.
382 */ 382 */
383 empty_exp_now = !rcu_preempted_readers_exp(rnp); 383 empty_exp_now = !rcu_preempted_readers_exp(rnp);
384 if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { 384 if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) {
385 trace_rcu_quiescent_state_report("preempt_rcu", 385 trace_rcu_quiescent_state_report("preempt_rcu",
386 rnp->gpnum, 386 rnp->gpnum,
387 0, rnp->qsmask, 387 0, rnp->qsmask,
388 rnp->level, 388 rnp->level,
389 rnp->grplo, 389 rnp->grplo,
390 rnp->grphi, 390 rnp->grphi,
391 !!rnp->gp_tasks); 391 !!rnp->gp_tasks);
392 rcu_report_unblock_qs_rnp(rnp, flags); 392 rcu_report_unblock_qs_rnp(rnp, flags);
393 } else { 393 } else {
394 raw_spin_unlock_irqrestore(&rnp->lock, flags); 394 raw_spin_unlock_irqrestore(&rnp->lock, flags);
395 } 395 }
396 396
397 #ifdef CONFIG_RCU_BOOST 397 #ifdef CONFIG_RCU_BOOST
398 /* Unboost if we were boosted. */ 398 /* Unboost if we were boosted. */
399 if (rbmp) 399 if (rbmp)
400 rt_mutex_unlock(rbmp); 400 rt_mutex_unlock(rbmp);
401 #endif /* #ifdef CONFIG_RCU_BOOST */ 401 #endif /* #ifdef CONFIG_RCU_BOOST */
402 402
403 /* 403 /*
404 * If this was the last task on the expedited lists, 404 * If this was the last task on the expedited lists,
405 * then we need to report up the rcu_node hierarchy. 405 * then we need to report up the rcu_node hierarchy.
406 */ 406 */
407 if (!empty_exp && empty_exp_now) 407 if (!empty_exp && empty_exp_now)
408 rcu_report_exp_rnp(&rcu_preempt_state, rnp, true); 408 rcu_report_exp_rnp(&rcu_preempt_state, rnp, true);
409 } else { 409 } else {
410 local_irq_restore(flags); 410 local_irq_restore(flags);
411 } 411 }
412 } 412 }
413 413
414 #ifdef CONFIG_RCU_CPU_STALL_VERBOSE 414 #ifdef CONFIG_RCU_CPU_STALL_VERBOSE
415 415
416 /* 416 /*
417 * Dump detailed information for all tasks blocking the current RCU 417 * Dump detailed information for all tasks blocking the current RCU
418 * grace period on the specified rcu_node structure. 418 * grace period on the specified rcu_node structure.
419 */ 419 */
420 static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) 420 static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
421 { 421 {
422 unsigned long flags; 422 unsigned long flags;
423 struct task_struct *t; 423 struct task_struct *t;
424 424
425 raw_spin_lock_irqsave(&rnp->lock, flags); 425 raw_spin_lock_irqsave(&rnp->lock, flags);
426 if (!rcu_preempt_blocked_readers_cgp(rnp)) { 426 if (!rcu_preempt_blocked_readers_cgp(rnp)) {
427 raw_spin_unlock_irqrestore(&rnp->lock, flags); 427 raw_spin_unlock_irqrestore(&rnp->lock, flags);
428 return; 428 return;
429 } 429 }
430 t = list_entry(rnp->gp_tasks, 430 t = list_entry(rnp->gp_tasks,
431 struct task_struct, rcu_node_entry); 431 struct task_struct, rcu_node_entry);
432 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) 432 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
433 sched_show_task(t); 433 sched_show_task(t);
434 raw_spin_unlock_irqrestore(&rnp->lock, flags); 434 raw_spin_unlock_irqrestore(&rnp->lock, flags);
435 } 435 }
436 436
437 /* 437 /*
438 * Dump detailed information for all tasks blocking the current RCU 438 * Dump detailed information for all tasks blocking the current RCU
439 * grace period. 439 * grace period.
440 */ 440 */
441 static void rcu_print_detail_task_stall(struct rcu_state *rsp) 441 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
442 { 442 {
443 struct rcu_node *rnp = rcu_get_root(rsp); 443 struct rcu_node *rnp = rcu_get_root(rsp);
444 444
445 rcu_print_detail_task_stall_rnp(rnp); 445 rcu_print_detail_task_stall_rnp(rnp);
446 rcu_for_each_leaf_node(rsp, rnp) 446 rcu_for_each_leaf_node(rsp, rnp)
447 rcu_print_detail_task_stall_rnp(rnp); 447 rcu_print_detail_task_stall_rnp(rnp);
448 } 448 }
449 449
450 #else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ 450 #else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
451 451
452 static void rcu_print_detail_task_stall(struct rcu_state *rsp) 452 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
453 { 453 {
454 } 454 }
455 455
456 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ 456 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
457 457
458 #ifdef CONFIG_RCU_CPU_STALL_INFO 458 #ifdef CONFIG_RCU_CPU_STALL_INFO
459 459
460 static void rcu_print_task_stall_begin(struct rcu_node *rnp) 460 static void rcu_print_task_stall_begin(struct rcu_node *rnp)
461 { 461 {
462 printk(KERN_ERR "\tTasks blocked on level-%d rcu_node (CPUs %d-%d):", 462 printk(KERN_ERR "\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
463 rnp->level, rnp->grplo, rnp->grphi); 463 rnp->level, rnp->grplo, rnp->grphi);
464 } 464 }
465 465
466 static void rcu_print_task_stall_end(void) 466 static void rcu_print_task_stall_end(void)
467 { 467 {
468 printk(KERN_CONT "\n"); 468 printk(KERN_CONT "\n");
469 } 469 }
470 470
471 #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ 471 #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
472 472
473 static void rcu_print_task_stall_begin(struct rcu_node *rnp) 473 static void rcu_print_task_stall_begin(struct rcu_node *rnp)
474 { 474 {
475 } 475 }
476 476
477 static void rcu_print_task_stall_end(void) 477 static void rcu_print_task_stall_end(void)
478 { 478 {
479 } 479 }
480 480
481 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */ 481 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
482 482
483 /* 483 /*
484 * Scan the current list of tasks blocked within RCU read-side critical 484 * Scan the current list of tasks blocked within RCU read-side critical
485 * sections, printing out the tid of each. 485 * sections, printing out the tid of each.
486 */ 486 */
487 static int rcu_print_task_stall(struct rcu_node *rnp) 487 static int rcu_print_task_stall(struct rcu_node *rnp)
488 { 488 {
489 struct task_struct *t; 489 struct task_struct *t;
490 int ndetected = 0; 490 int ndetected = 0;
491 491
492 if (!rcu_preempt_blocked_readers_cgp(rnp)) 492 if (!rcu_preempt_blocked_readers_cgp(rnp))
493 return 0; 493 return 0;
494 rcu_print_task_stall_begin(rnp); 494 rcu_print_task_stall_begin(rnp);
495 t = list_entry(rnp->gp_tasks, 495 t = list_entry(rnp->gp_tasks,
496 struct task_struct, rcu_node_entry); 496 struct task_struct, rcu_node_entry);
497 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { 497 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
498 printk(KERN_CONT " P%d", t->pid); 498 printk(KERN_CONT " P%d", t->pid);
499 ndetected++; 499 ndetected++;
500 } 500 }
501 rcu_print_task_stall_end(); 501 rcu_print_task_stall_end();
502 return ndetected; 502 return ndetected;
503 } 503 }
504 504
505 /* 505 /*
506 * Check that the list of blocked tasks for the newly completed grace 506 * Check that the list of blocked tasks for the newly completed grace
507 * period is in fact empty. It is a serious bug to complete a grace 507 * period is in fact empty. It is a serious bug to complete a grace
508 * period that still has RCU readers blocked! This function must be 508 * period that still has RCU readers blocked! This function must be
509 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock 509 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
510 * must be held by the caller. 510 * must be held by the caller.
511 * 511 *
512 * Also, if there are blocked tasks on the list, they automatically 512 * Also, if there are blocked tasks on the list, they automatically
513 * block the newly created grace period, so set up ->gp_tasks accordingly. 513 * block the newly created grace period, so set up ->gp_tasks accordingly.
514 */ 514 */
515 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 515 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
516 { 516 {
517 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); 517 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
518 if (!list_empty(&rnp->blkd_tasks)) 518 if (!list_empty(&rnp->blkd_tasks))
519 rnp->gp_tasks = rnp->blkd_tasks.next; 519 rnp->gp_tasks = rnp->blkd_tasks.next;
520 WARN_ON_ONCE(rnp->qsmask); 520 WARN_ON_ONCE(rnp->qsmask);
521 } 521 }
522 522
523 #ifdef CONFIG_HOTPLUG_CPU 523 #ifdef CONFIG_HOTPLUG_CPU
524 524
525 /* 525 /*
526 * Handle tasklist migration for case in which all CPUs covered by the 526 * Handle tasklist migration for case in which all CPUs covered by the
527 * specified rcu_node have gone offline. Move them up to the root 527 * specified rcu_node have gone offline. Move them up to the root
528 * rcu_node. The reason for not just moving them to the immediate 528 * rcu_node. The reason for not just moving them to the immediate
529 * parent is to remove the need for rcu_read_unlock_special() to 529 * parent is to remove the need for rcu_read_unlock_special() to
530 * make more than two attempts to acquire the target rcu_node's lock. 530 * make more than two attempts to acquire the target rcu_node's lock.
531 * Returns true if there were tasks blocking the current RCU grace 531 * Returns true if there were tasks blocking the current RCU grace
532 * period. 532 * period.
533 * 533 *
534 * Returns 1 if there was previously a task blocking the current grace 534 * Returns 1 if there was previously a task blocking the current grace
535 * period on the specified rcu_node structure. 535 * period on the specified rcu_node structure.
536 * 536 *
537 * The caller must hold rnp->lock with irqs disabled. 537 * The caller must hold rnp->lock with irqs disabled.
538 */ 538 */
539 static int rcu_preempt_offline_tasks(struct rcu_state *rsp, 539 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
540 struct rcu_node *rnp, 540 struct rcu_node *rnp,
541 struct rcu_data *rdp) 541 struct rcu_data *rdp)
542 { 542 {
543 struct list_head *lp; 543 struct list_head *lp;
544 struct list_head *lp_root; 544 struct list_head *lp_root;
545 int retval = 0; 545 int retval = 0;
546 struct rcu_node *rnp_root = rcu_get_root(rsp); 546 struct rcu_node *rnp_root = rcu_get_root(rsp);
547 struct task_struct *t; 547 struct task_struct *t;
548 548
549 if (rnp == rnp_root) { 549 if (rnp == rnp_root) {
550 WARN_ONCE(1, "Last CPU thought to be offlined?"); 550 WARN_ONCE(1, "Last CPU thought to be offlined?");
551 return 0; /* Shouldn't happen: at least one CPU online. */ 551 return 0; /* Shouldn't happen: at least one CPU online. */
552 } 552 }
553 553
554 /* If we are on an internal node, complain bitterly. */ 554 /* If we are on an internal node, complain bitterly. */
555 WARN_ON_ONCE(rnp != rdp->mynode); 555 WARN_ON_ONCE(rnp != rdp->mynode);
556 556
557 /* 557 /*
558 * Move tasks up to root rcu_node. Don't try to get fancy for 558 * Move tasks up to root rcu_node. Don't try to get fancy for
559 * this corner-case operation -- just put this node's tasks 559 * this corner-case operation -- just put this node's tasks
560 * at the head of the root node's list, and update the root node's 560 * at the head of the root node's list, and update the root node's
561 * ->gp_tasks and ->exp_tasks pointers to those of this node's, 561 * ->gp_tasks and ->exp_tasks pointers to those of this node's,
562 * if non-NULL. This might result in waiting for more tasks than 562 * if non-NULL. This might result in waiting for more tasks than
563 * absolutely necessary, but this is a good performance/complexity 563 * absolutely necessary, but this is a good performance/complexity
564 * tradeoff. 564 * tradeoff.
565 */ 565 */
566 if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0) 566 if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0)
567 retval |= RCU_OFL_TASKS_NORM_GP; 567 retval |= RCU_OFL_TASKS_NORM_GP;
568 if (rcu_preempted_readers_exp(rnp)) 568 if (rcu_preempted_readers_exp(rnp))
569 retval |= RCU_OFL_TASKS_EXP_GP; 569 retval |= RCU_OFL_TASKS_EXP_GP;
570 lp = &rnp->blkd_tasks; 570 lp = &rnp->blkd_tasks;
571 lp_root = &rnp_root->blkd_tasks; 571 lp_root = &rnp_root->blkd_tasks;
572 while (!list_empty(lp)) { 572 while (!list_empty(lp)) {
573 t = list_entry(lp->next, typeof(*t), rcu_node_entry); 573 t = list_entry(lp->next, typeof(*t), rcu_node_entry);
574 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ 574 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
575 list_del(&t->rcu_node_entry); 575 list_del(&t->rcu_node_entry);
576 t->rcu_blocked_node = rnp_root; 576 t->rcu_blocked_node = rnp_root;
577 list_add(&t->rcu_node_entry, lp_root); 577 list_add(&t->rcu_node_entry, lp_root);
578 if (&t->rcu_node_entry == rnp->gp_tasks) 578 if (&t->rcu_node_entry == rnp->gp_tasks)
579 rnp_root->gp_tasks = rnp->gp_tasks; 579 rnp_root->gp_tasks = rnp->gp_tasks;
580 if (&t->rcu_node_entry == rnp->exp_tasks) 580 if (&t->rcu_node_entry == rnp->exp_tasks)
581 rnp_root->exp_tasks = rnp->exp_tasks; 581 rnp_root->exp_tasks = rnp->exp_tasks;
582 #ifdef CONFIG_RCU_BOOST 582 #ifdef CONFIG_RCU_BOOST
583 if (&t->rcu_node_entry == rnp->boost_tasks) 583 if (&t->rcu_node_entry == rnp->boost_tasks)
584 rnp_root->boost_tasks = rnp->boost_tasks; 584 rnp_root->boost_tasks = rnp->boost_tasks;
585 #endif /* #ifdef CONFIG_RCU_BOOST */ 585 #endif /* #ifdef CONFIG_RCU_BOOST */
586 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ 586 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
587 } 587 }
588 588
589 rnp->gp_tasks = NULL; 589 rnp->gp_tasks = NULL;
590 rnp->exp_tasks = NULL; 590 rnp->exp_tasks = NULL;
591 #ifdef CONFIG_RCU_BOOST 591 #ifdef CONFIG_RCU_BOOST
592 rnp->boost_tasks = NULL; 592 rnp->boost_tasks = NULL;
593 /* 593 /*
594 * In case root is being boosted and leaf was not. Make sure 594 * In case root is being boosted and leaf was not. Make sure
595 * that we boost the tasks blocking the current grace period 595 * that we boost the tasks blocking the current grace period
596 * in this case. 596 * in this case.
597 */ 597 */
598 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ 598 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
599 if (rnp_root->boost_tasks != NULL && 599 if (rnp_root->boost_tasks != NULL &&
600 rnp_root->boost_tasks != rnp_root->gp_tasks && 600 rnp_root->boost_tasks != rnp_root->gp_tasks &&
601 rnp_root->boost_tasks != rnp_root->exp_tasks) 601 rnp_root->boost_tasks != rnp_root->exp_tasks)
602 rnp_root->boost_tasks = rnp_root->gp_tasks; 602 rnp_root->boost_tasks = rnp_root->gp_tasks;
603 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ 603 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
604 #endif /* #ifdef CONFIG_RCU_BOOST */ 604 #endif /* #ifdef CONFIG_RCU_BOOST */
605 605
606 return retval; 606 return retval;
607 } 607 }
608 608
609 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 609 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
610 610
611 /* 611 /*
612 * Check for a quiescent state from the current CPU. When a task blocks, 612 * Check for a quiescent state from the current CPU. When a task blocks,
613 * the task is recorded in the corresponding CPU's rcu_node structure, 613 * the task is recorded in the corresponding CPU's rcu_node structure,
614 * which is checked elsewhere. 614 * which is checked elsewhere.
615 * 615 *
616 * Caller must disable hard irqs. 616 * Caller must disable hard irqs.
617 */ 617 */
618 static void rcu_preempt_check_callbacks(int cpu) 618 static void rcu_preempt_check_callbacks(int cpu)
619 { 619 {
620 struct task_struct *t = current; 620 struct task_struct *t = current;
621 621
622 if (t->rcu_read_lock_nesting == 0) { 622 if (t->rcu_read_lock_nesting == 0) {
623 rcu_preempt_qs(cpu); 623 rcu_preempt_qs(cpu);
624 return; 624 return;
625 } 625 }
626 if (t->rcu_read_lock_nesting > 0 && 626 if (t->rcu_read_lock_nesting > 0 &&
627 per_cpu(rcu_preempt_data, cpu).qs_pending) 627 per_cpu(rcu_preempt_data, cpu).qs_pending)
628 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; 628 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
629 } 629 }
630 630
631 #ifdef CONFIG_RCU_BOOST 631 #ifdef CONFIG_RCU_BOOST
632 632
633 static void rcu_preempt_do_callbacks(void) 633 static void rcu_preempt_do_callbacks(void)
634 { 634 {
635 rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data)); 635 rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data));
636 } 636 }
637 637
638 #endif /* #ifdef CONFIG_RCU_BOOST */ 638 #endif /* #ifdef CONFIG_RCU_BOOST */
639 639
640 /* 640 /*
641 * Queue a preemptible-RCU callback for invocation after a grace period. 641 * Queue a preemptible-RCU callback for invocation after a grace period.
642 */ 642 */
643 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 643 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
644 { 644 {
645 __call_rcu(head, func, &rcu_preempt_state, 0); 645 __call_rcu(head, func, &rcu_preempt_state, 0);
646 } 646 }
647 EXPORT_SYMBOL_GPL(call_rcu); 647 EXPORT_SYMBOL_GPL(call_rcu);
648 648
649 /* 649 /*
650 * Queue an RCU callback for lazy invocation after a grace period. 650 * Queue an RCU callback for lazy invocation after a grace period.
651 * This will likely be later named something like "call_rcu_lazy()", 651 * This will likely be later named something like "call_rcu_lazy()",
652 * but this change will require some way of tagging the lazy RCU 652 * but this change will require some way of tagging the lazy RCU
653 * callbacks in the list of pending callbacks. Until then, this 653 * callbacks in the list of pending callbacks. Until then, this
654 * function may only be called from __kfree_rcu(). 654 * function may only be called from __kfree_rcu().
655 */ 655 */
656 void kfree_call_rcu(struct rcu_head *head, 656 void kfree_call_rcu(struct rcu_head *head,
657 void (*func)(struct rcu_head *rcu)) 657 void (*func)(struct rcu_head *rcu))
658 { 658 {
659 __call_rcu(head, func, &rcu_preempt_state, 1); 659 __call_rcu(head, func, &rcu_preempt_state, 1);
660 } 660 }
661 EXPORT_SYMBOL_GPL(kfree_call_rcu); 661 EXPORT_SYMBOL_GPL(kfree_call_rcu);
662 662
663 /** 663 /**
664 * synchronize_rcu - wait until a grace period has elapsed. 664 * synchronize_rcu - wait until a grace period has elapsed.
665 * 665 *
666 * Control will return to the caller some time after a full grace 666 * Control will return to the caller some time after a full grace
667 * period has elapsed, in other words after all currently executing RCU 667 * period has elapsed, in other words after all currently executing RCU
668 * read-side critical sections have completed. Note, however, that 668 * read-side critical sections have completed. Note, however, that
669 * upon return from synchronize_rcu(), the caller might well be executing 669 * upon return from synchronize_rcu(), the caller might well be executing
670 * concurrently with new RCU read-side critical sections that began while 670 * concurrently with new RCU read-side critical sections that began while
671 * synchronize_rcu() was waiting. RCU read-side critical sections are 671 * synchronize_rcu() was waiting. RCU read-side critical sections are
672 * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. 672 * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
673 */ 673 */
674 void synchronize_rcu(void) 674 void synchronize_rcu(void)
675 { 675 {
676 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && 676 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
677 !lock_is_held(&rcu_lock_map) && 677 !lock_is_held(&rcu_lock_map) &&
678 !lock_is_held(&rcu_sched_lock_map), 678 !lock_is_held(&rcu_sched_lock_map),
679 "Illegal synchronize_rcu() in RCU read-side critical section"); 679 "Illegal synchronize_rcu() in RCU read-side critical section");
680 if (!rcu_scheduler_active) 680 if (!rcu_scheduler_active)
681 return; 681 return;
682 wait_rcu_gp(call_rcu); 682 wait_rcu_gp(call_rcu);
683 } 683 }
684 EXPORT_SYMBOL_GPL(synchronize_rcu); 684 EXPORT_SYMBOL_GPL(synchronize_rcu);
685 685
686 static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); 686 static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
687 static unsigned long sync_rcu_preempt_exp_count; 687 static unsigned long sync_rcu_preempt_exp_count;
688 static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); 688 static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
689 689
690 /* 690 /*
691 * Return non-zero if there are any tasks in RCU read-side critical 691 * Return non-zero if there are any tasks in RCU read-side critical
692 * sections blocking the current preemptible-RCU expedited grace period. 692 * sections blocking the current preemptible-RCU expedited grace period.
693 * If there is no preemptible-RCU expedited grace period currently in 693 * If there is no preemptible-RCU expedited grace period currently in
694 * progress, returns zero unconditionally. 694 * progress, returns zero unconditionally.
695 */ 695 */
696 static int rcu_preempted_readers_exp(struct rcu_node *rnp) 696 static int rcu_preempted_readers_exp(struct rcu_node *rnp)
697 { 697 {
698 return rnp->exp_tasks != NULL; 698 return rnp->exp_tasks != NULL;
699 } 699 }
700 700
701 /* 701 /*
702 * return non-zero if there is no RCU expedited grace period in progress 702 * return non-zero if there is no RCU expedited grace period in progress
703 * for the specified rcu_node structure, in other words, if all CPUs and 703 * for the specified rcu_node structure, in other words, if all CPUs and
704 * tasks covered by the specified rcu_node structure have done their bit 704 * tasks covered by the specified rcu_node structure have done their bit
705 * for the current expedited grace period. Works only for preemptible 705 * for the current expedited grace period. Works only for preemptible
706 * RCU -- other RCU implementation use other means. 706 * RCU -- other RCU implementation use other means.
707 * 707 *
708 * Caller must hold sync_rcu_preempt_exp_mutex. 708 * Caller must hold sync_rcu_preempt_exp_mutex.
709 */ 709 */
710 static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) 710 static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
711 { 711 {
712 return !rcu_preempted_readers_exp(rnp) && 712 return !rcu_preempted_readers_exp(rnp) &&
713 ACCESS_ONCE(rnp->expmask) == 0; 713 ACCESS_ONCE(rnp->expmask) == 0;
714 } 714 }
715 715
716 /* 716 /*
717 * Report the exit from RCU read-side critical section for the last task 717 * Report the exit from RCU read-side critical section for the last task
718 * that queued itself during or before the current expedited preemptible-RCU 718 * that queued itself during or before the current expedited preemptible-RCU
719 * grace period. This event is reported either to the rcu_node structure on 719 * grace period. This event is reported either to the rcu_node structure on
720 * which the task was queued or to one of that rcu_node structure's ancestors, 720 * which the task was queued or to one of that rcu_node structure's ancestors,
721 * recursively up the tree. (Calm down, calm down, we do the recursion 721 * recursively up the tree. (Calm down, calm down, we do the recursion
722 * iteratively!) 722 * iteratively!)
723 * 723 *
724 * Most callers will set the "wake" flag, but the task initiating the 724 * Most callers will set the "wake" flag, but the task initiating the
725 * expedited grace period need not wake itself. 725 * expedited grace period need not wake itself.
726 * 726 *
727 * Caller must hold sync_rcu_preempt_exp_mutex. 727 * Caller must hold sync_rcu_preempt_exp_mutex.
728 */ 728 */
729 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, 729 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
730 bool wake) 730 bool wake)
731 { 731 {
732 unsigned long flags; 732 unsigned long flags;
733 unsigned long mask; 733 unsigned long mask;
734 734
735 raw_spin_lock_irqsave(&rnp->lock, flags); 735 raw_spin_lock_irqsave(&rnp->lock, flags);
736 for (;;) { 736 for (;;) {
737 if (!sync_rcu_preempt_exp_done(rnp)) { 737 if (!sync_rcu_preempt_exp_done(rnp)) {
738 raw_spin_unlock_irqrestore(&rnp->lock, flags); 738 raw_spin_unlock_irqrestore(&rnp->lock, flags);
739 break; 739 break;
740 } 740 }
741 if (rnp->parent == NULL) { 741 if (rnp->parent == NULL) {
742 raw_spin_unlock_irqrestore(&rnp->lock, flags); 742 raw_spin_unlock_irqrestore(&rnp->lock, flags);
743 if (wake) 743 if (wake)
744 wake_up(&sync_rcu_preempt_exp_wq); 744 wake_up(&sync_rcu_preempt_exp_wq);
745 break; 745 break;
746 } 746 }
747 mask = rnp->grpmask; 747 mask = rnp->grpmask;
748 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 748 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
749 rnp = rnp->parent; 749 rnp = rnp->parent;
750 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 750 raw_spin_lock(&rnp->lock); /* irqs already disabled */
751 rnp->expmask &= ~mask; 751 rnp->expmask &= ~mask;
752 } 752 }
753 } 753 }
754 754
755 /* 755 /*
756 * Snapshot the tasks blocking the newly started preemptible-RCU expedited 756 * Snapshot the tasks blocking the newly started preemptible-RCU expedited
757 * grace period for the specified rcu_node structure. If there are no such 757 * grace period for the specified rcu_node structure. If there are no such
758 * tasks, report it up the rcu_node hierarchy. 758 * tasks, report it up the rcu_node hierarchy.
759 * 759 *
760 * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock. 760 * Caller must hold sync_rcu_preempt_exp_mutex and must exclude
761 * CPU hotplug operations.
761 */ 762 */
762 static void 763 static void
763 sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) 764 sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
764 { 765 {
765 unsigned long flags; 766 unsigned long flags;
766 int must_wait = 0; 767 int must_wait = 0;
767 768
768 raw_spin_lock_irqsave(&rnp->lock, flags); 769 raw_spin_lock_irqsave(&rnp->lock, flags);
769 if (list_empty(&rnp->blkd_tasks)) { 770 if (list_empty(&rnp->blkd_tasks)) {
770 raw_spin_unlock_irqrestore(&rnp->lock, flags); 771 raw_spin_unlock_irqrestore(&rnp->lock, flags);
771 } else { 772 } else {
772 rnp->exp_tasks = rnp->blkd_tasks.next; 773 rnp->exp_tasks = rnp->blkd_tasks.next;
773 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ 774 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
774 must_wait = 1; 775 must_wait = 1;
775 } 776 }
776 if (!must_wait) 777 if (!must_wait)
777 rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */ 778 rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */
778 } 779 }
779 780
780 /** 781 /**
781 * synchronize_rcu_expedited - Brute-force RCU grace period 782 * synchronize_rcu_expedited - Brute-force RCU grace period
782 * 783 *
783 * Wait for an RCU-preempt grace period, but expedite it. The basic 784 * Wait for an RCU-preempt grace period, but expedite it. The basic
784 * idea is to invoke synchronize_sched_expedited() to push all the tasks to 785 * idea is to invoke synchronize_sched_expedited() to push all the tasks to
785 * the ->blkd_tasks lists and wait for this list to drain. This consumes 786 * the ->blkd_tasks lists and wait for this list to drain. This consumes
786 * significant time on all CPUs and is unfriendly to real-time workloads, 787 * significant time on all CPUs and is unfriendly to real-time workloads,
787 * so is thus not recommended for any sort of common-case code. 788 * so is thus not recommended for any sort of common-case code.
788 * In fact, if you are using synchronize_rcu_expedited() in a loop, 789 * In fact, if you are using synchronize_rcu_expedited() in a loop,
789 * please restructure your code to batch your updates, and then Use a 790 * please restructure your code to batch your updates, and then Use a
790 * single synchronize_rcu() instead. 791 * single synchronize_rcu() instead.
791 * 792 *
792 * Note that it is illegal to call this function while holding any lock 793 * Note that it is illegal to call this function while holding any lock
793 * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal 794 * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal
794 * to call this function from a CPU-hotplug notifier. Failing to observe 795 * to call this function from a CPU-hotplug notifier. Failing to observe
795 * these restriction will result in deadlock. 796 * these restriction will result in deadlock.
796 */ 797 */
797 void synchronize_rcu_expedited(void) 798 void synchronize_rcu_expedited(void)
798 { 799 {
799 unsigned long flags; 800 unsigned long flags;
800 struct rcu_node *rnp; 801 struct rcu_node *rnp;
801 struct rcu_state *rsp = &rcu_preempt_state; 802 struct rcu_state *rsp = &rcu_preempt_state;
802 unsigned long snap; 803 unsigned long snap;
803 int trycount = 0; 804 int trycount = 0;
804 805
805 smp_mb(); /* Caller's modifications seen first by other CPUs. */ 806 smp_mb(); /* Caller's modifications seen first by other CPUs. */
806 snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1; 807 snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
807 smp_mb(); /* Above access cannot bleed into critical section. */ 808 smp_mb(); /* Above access cannot bleed into critical section. */
808 809
809 /* 810 /*
810 * Block CPU-hotplug operations. This means that any CPU-hotplug 811 * Block CPU-hotplug operations. This means that any CPU-hotplug
811 * operation that finds an rcu_node structure with tasks in the 812 * operation that finds an rcu_node structure with tasks in the
812 * process of being boosted will know that all tasks blocking 813 * process of being boosted will know that all tasks blocking
813 * this expedited grace period will already be in the process of 814 * this expedited grace period will already be in the process of
814 * being boosted. This simplifies the process of moving tasks 815 * being boosted. This simplifies the process of moving tasks
815 * from leaf to root rcu_node structures. 816 * from leaf to root rcu_node structures.
816 */ 817 */
817 get_online_cpus(); 818 get_online_cpus();
818 819
819 /* 820 /*
820 * Acquire lock, falling back to synchronize_rcu() if too many 821 * Acquire lock, falling back to synchronize_rcu() if too many
821 * lock-acquisition failures. Of course, if someone does the 822 * lock-acquisition failures. Of course, if someone does the
822 * expedited grace period for us, just leave. 823 * expedited grace period for us, just leave.
823 */ 824 */
824 while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { 825 while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
825 if (ULONG_CMP_LT(snap, 826 if (ULONG_CMP_LT(snap,
826 ACCESS_ONCE(sync_rcu_preempt_exp_count))) { 827 ACCESS_ONCE(sync_rcu_preempt_exp_count))) {
827 put_online_cpus(); 828 put_online_cpus();
828 goto mb_ret; /* Others did our work for us. */ 829 goto mb_ret; /* Others did our work for us. */
829 } 830 }
830 if (trycount++ < 10) { 831 if (trycount++ < 10) {
831 udelay(trycount * num_online_cpus()); 832 udelay(trycount * num_online_cpus());
832 } else { 833 } else {
833 put_online_cpus(); 834 put_online_cpus();
834 synchronize_rcu(); 835 synchronize_rcu();
835 return; 836 return;
836 } 837 }
837 } 838 }
838 if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) { 839 if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) {
839 put_online_cpus(); 840 put_online_cpus();
840 goto unlock_mb_ret; /* Others did our work for us. */ 841 goto unlock_mb_ret; /* Others did our work for us. */
841 } 842 }
842 843
843 /* force all RCU readers onto ->blkd_tasks lists. */ 844 /* force all RCU readers onto ->blkd_tasks lists. */
844 synchronize_sched_expedited(); 845 synchronize_sched_expedited();
845 846
846 /* Initialize ->expmask for all non-leaf rcu_node structures. */ 847 /* Initialize ->expmask for all non-leaf rcu_node structures. */
847 rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { 848 rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
848 raw_spin_lock_irqsave(&rnp->lock, flags); 849 raw_spin_lock_irqsave(&rnp->lock, flags);
849 rnp->expmask = rnp->qsmaskinit; 850 rnp->expmask = rnp->qsmaskinit;
850 raw_spin_unlock_irqrestore(&rnp->lock, flags); 851 raw_spin_unlock_irqrestore(&rnp->lock, flags);
851 } 852 }
852 853
853 /* Snapshot current state of ->blkd_tasks lists. */ 854 /* Snapshot current state of ->blkd_tasks lists. */
854 rcu_for_each_leaf_node(rsp, rnp) 855 rcu_for_each_leaf_node(rsp, rnp)
855 sync_rcu_preempt_exp_init(rsp, rnp); 856 sync_rcu_preempt_exp_init(rsp, rnp);
856 if (NUM_RCU_NODES > 1) 857 if (NUM_RCU_NODES > 1)
857 sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); 858 sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
858 859
859 put_online_cpus(); 860 put_online_cpus();
860 861
861 /* Wait for snapshotted ->blkd_tasks lists to drain. */ 862 /* Wait for snapshotted ->blkd_tasks lists to drain. */
862 rnp = rcu_get_root(rsp); 863 rnp = rcu_get_root(rsp);
863 wait_event(sync_rcu_preempt_exp_wq, 864 wait_event(sync_rcu_preempt_exp_wq,
864 sync_rcu_preempt_exp_done(rnp)); 865 sync_rcu_preempt_exp_done(rnp));
865 866
866 /* Clean up and exit. */ 867 /* Clean up and exit. */
867 smp_mb(); /* ensure expedited GP seen before counter increment. */ 868 smp_mb(); /* ensure expedited GP seen before counter increment. */
868 ACCESS_ONCE(sync_rcu_preempt_exp_count)++; 869 ACCESS_ONCE(sync_rcu_preempt_exp_count)++;
869 unlock_mb_ret: 870 unlock_mb_ret:
870 mutex_unlock(&sync_rcu_preempt_exp_mutex); 871 mutex_unlock(&sync_rcu_preempt_exp_mutex);
871 mb_ret: 872 mb_ret:
872 smp_mb(); /* ensure subsequent action seen after grace period. */ 873 smp_mb(); /* ensure subsequent action seen after grace period. */
873 } 874 }
874 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 875 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
875 876
876 /** 877 /**
877 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. 878 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
878 */ 879 */
879 void rcu_barrier(void) 880 void rcu_barrier(void)
880 { 881 {
881 _rcu_barrier(&rcu_preempt_state); 882 _rcu_barrier(&rcu_preempt_state);
882 } 883 }
883 EXPORT_SYMBOL_GPL(rcu_barrier); 884 EXPORT_SYMBOL_GPL(rcu_barrier);
884 885
885 /* 886 /*
886 * Initialize preemptible RCU's state structures. 887 * Initialize preemptible RCU's state structures.
887 */ 888 */
888 static void __init __rcu_init_preempt(void) 889 static void __init __rcu_init_preempt(void)
889 { 890 {
890 rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); 891 rcu_init_one(&rcu_preempt_state, &rcu_preempt_data);
891 } 892 }
892 893
893 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 894 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
894 895
895 static struct rcu_state *rcu_state = &rcu_sched_state; 896 static struct rcu_state *rcu_state = &rcu_sched_state;
896 897
897 /* 898 /*
898 * Tell them what RCU they are running. 899 * Tell them what RCU they are running.
899 */ 900 */
900 static void __init rcu_bootup_announce(void) 901 static void __init rcu_bootup_announce(void)
901 { 902 {
902 printk(KERN_INFO "Hierarchical RCU implementation.\n"); 903 printk(KERN_INFO "Hierarchical RCU implementation.\n");
903 rcu_bootup_announce_oddness(); 904 rcu_bootup_announce_oddness();
904 } 905 }
905 906
906 /* 907 /*
907 * Return the number of RCU batches processed thus far for debug & stats. 908 * Return the number of RCU batches processed thus far for debug & stats.
908 */ 909 */
909 long rcu_batches_completed(void) 910 long rcu_batches_completed(void)
910 { 911 {
911 return rcu_batches_completed_sched(); 912 return rcu_batches_completed_sched();
912 } 913 }
913 EXPORT_SYMBOL_GPL(rcu_batches_completed); 914 EXPORT_SYMBOL_GPL(rcu_batches_completed);
914 915
915 /* 916 /*
916 * Force a quiescent state for RCU, which, because there is no preemptible 917 * Force a quiescent state for RCU, which, because there is no preemptible
917 * RCU, becomes the same as rcu-sched. 918 * RCU, becomes the same as rcu-sched.
918 */ 919 */
919 void rcu_force_quiescent_state(void) 920 void rcu_force_quiescent_state(void)
920 { 921 {
921 rcu_sched_force_quiescent_state(); 922 rcu_sched_force_quiescent_state();
922 } 923 }
923 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 924 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
924 925
925 /* 926 /*
926 * Because preemptible RCU does not exist, we never have to check for 927 * Because preemptible RCU does not exist, we never have to check for
927 * CPUs being in quiescent states. 928 * CPUs being in quiescent states.
928 */ 929 */
929 static void rcu_preempt_note_context_switch(int cpu) 930 static void rcu_preempt_note_context_switch(int cpu)
930 { 931 {
931 } 932 }
932 933
933 /* 934 /*
934 * Because preemptible RCU does not exist, there are never any preempted 935 * Because preemptible RCU does not exist, there are never any preempted
935 * RCU readers. 936 * RCU readers.
936 */ 937 */
937 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) 938 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
938 { 939 {
939 return 0; 940 return 0;
940 } 941 }
941 942
942 #ifdef CONFIG_HOTPLUG_CPU 943 #ifdef CONFIG_HOTPLUG_CPU
943 944
944 /* Because preemptible RCU does not exist, no quieting of tasks. */ 945 /* Because preemptible RCU does not exist, no quieting of tasks. */
945 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) 946 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
946 { 947 {
947 raw_spin_unlock_irqrestore(&rnp->lock, flags); 948 raw_spin_unlock_irqrestore(&rnp->lock, flags);
948 } 949 }
949 950
950 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 951 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
951 952
952 /* 953 /*
953 * Because preemptible RCU does not exist, we never have to check for 954 * Because preemptible RCU does not exist, we never have to check for
954 * tasks blocked within RCU read-side critical sections. 955 * tasks blocked within RCU read-side critical sections.
955 */ 956 */
956 static void rcu_print_detail_task_stall(struct rcu_state *rsp) 957 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
957 { 958 {
958 } 959 }
959 960
960 /* 961 /*
961 * Because preemptible RCU does not exist, we never have to check for 962 * Because preemptible RCU does not exist, we never have to check for
962 * tasks blocked within RCU read-side critical sections. 963 * tasks blocked within RCU read-side critical sections.
963 */ 964 */
964 static int rcu_print_task_stall(struct rcu_node *rnp) 965 static int rcu_print_task_stall(struct rcu_node *rnp)
965 { 966 {
966 return 0; 967 return 0;
967 } 968 }
968 969
969 /* 970 /*
970 * Because there is no preemptible RCU, there can be no readers blocked, 971 * Because there is no preemptible RCU, there can be no readers blocked,
971 * so there is no need to check for blocked tasks. So check only for 972 * so there is no need to check for blocked tasks. So check only for
972 * bogus qsmask values. 973 * bogus qsmask values.
973 */ 974 */
974 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 975 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
975 { 976 {
976 WARN_ON_ONCE(rnp->qsmask); 977 WARN_ON_ONCE(rnp->qsmask);
977 } 978 }
978 979
979 #ifdef CONFIG_HOTPLUG_CPU 980 #ifdef CONFIG_HOTPLUG_CPU
980 981
981 /* 982 /*
982 * Because preemptible RCU does not exist, it never needs to migrate 983 * Because preemptible RCU does not exist, it never needs to migrate
983 * tasks that were blocked within RCU read-side critical sections, and 984 * tasks that were blocked within RCU read-side critical sections, and
984 * such non-existent tasks cannot possibly have been blocking the current 985 * such non-existent tasks cannot possibly have been blocking the current
985 * grace period. 986 * grace period.
986 */ 987 */
987 static int rcu_preempt_offline_tasks(struct rcu_state *rsp, 988 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
988 struct rcu_node *rnp, 989 struct rcu_node *rnp,
989 struct rcu_data *rdp) 990 struct rcu_data *rdp)
990 { 991 {
991 return 0; 992 return 0;
992 } 993 }
993 994
994 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 995 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
995 996
996 /* 997 /*
997 * Because preemptible RCU does not exist, it never has any callbacks 998 * Because preemptible RCU does not exist, it never has any callbacks
998 * to check. 999 * to check.
999 */ 1000 */
1000 static void rcu_preempt_check_callbacks(int cpu) 1001 static void rcu_preempt_check_callbacks(int cpu)
1001 { 1002 {
1002 } 1003 }
1003 1004
1004 /* 1005 /*
1005 * Queue an RCU callback for lazy invocation after a grace period. 1006 * Queue an RCU callback for lazy invocation after a grace period.
1006 * This will likely be later named something like "call_rcu_lazy()", 1007 * This will likely be later named something like "call_rcu_lazy()",
1007 * but this change will require some way of tagging the lazy RCU 1008 * but this change will require some way of tagging the lazy RCU
1008 * callbacks in the list of pending callbacks. Until then, this 1009 * callbacks in the list of pending callbacks. Until then, this
1009 * function may only be called from __kfree_rcu(). 1010 * function may only be called from __kfree_rcu().
1010 * 1011 *
1011 * Because there is no preemptible RCU, we use RCU-sched instead. 1012 * Because there is no preemptible RCU, we use RCU-sched instead.
1012 */ 1013 */
1013 void kfree_call_rcu(struct rcu_head *head, 1014 void kfree_call_rcu(struct rcu_head *head,
1014 void (*func)(struct rcu_head *rcu)) 1015 void (*func)(struct rcu_head *rcu))
1015 { 1016 {
1016 __call_rcu(head, func, &rcu_sched_state, 1); 1017 __call_rcu(head, func, &rcu_sched_state, 1);
1017 } 1018 }
1018 EXPORT_SYMBOL_GPL(kfree_call_rcu); 1019 EXPORT_SYMBOL_GPL(kfree_call_rcu);
1019 1020
1020 /* 1021 /*
1021 * Wait for an rcu-preempt grace period, but make it happen quickly. 1022 * Wait for an rcu-preempt grace period, but make it happen quickly.
1022 * But because preemptible RCU does not exist, map to rcu-sched. 1023 * But because preemptible RCU does not exist, map to rcu-sched.
1023 */ 1024 */
1024 void synchronize_rcu_expedited(void) 1025 void synchronize_rcu_expedited(void)
1025 { 1026 {
1026 synchronize_sched_expedited(); 1027 synchronize_sched_expedited();
1027 } 1028 }
1028 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 1029 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
1029 1030
1030 #ifdef CONFIG_HOTPLUG_CPU 1031 #ifdef CONFIG_HOTPLUG_CPU
1031 1032
1032 /* 1033 /*
1033 * Because preemptible RCU does not exist, there is never any need to 1034 * Because preemptible RCU does not exist, there is never any need to
1034 * report on tasks preempted in RCU read-side critical sections during 1035 * report on tasks preempted in RCU read-side critical sections during
1035 * expedited RCU grace periods. 1036 * expedited RCU grace periods.
1036 */ 1037 */
1037 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, 1038 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
1038 bool wake) 1039 bool wake)
1039 { 1040 {
1040 } 1041 }
1041 1042
1042 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 1043 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
1043 1044
1044 /* 1045 /*
1045 * Because preemptible RCU does not exist, rcu_barrier() is just 1046 * Because preemptible RCU does not exist, rcu_barrier() is just
1046 * another name for rcu_barrier_sched(). 1047 * another name for rcu_barrier_sched().
1047 */ 1048 */
1048 void rcu_barrier(void) 1049 void rcu_barrier(void)
1049 { 1050 {
1050 rcu_barrier_sched(); 1051 rcu_barrier_sched();
1051 } 1052 }
1052 EXPORT_SYMBOL_GPL(rcu_barrier); 1053 EXPORT_SYMBOL_GPL(rcu_barrier);
1053 1054
1054 /* 1055 /*
1055 * Because preemptible RCU does not exist, it need not be initialized. 1056 * Because preemptible RCU does not exist, it need not be initialized.
1056 */ 1057 */
1057 static void __init __rcu_init_preempt(void) 1058 static void __init __rcu_init_preempt(void)
1058 { 1059 {
1059 } 1060 }
1060 1061
1061 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ 1062 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
1062 1063
1063 #ifdef CONFIG_RCU_BOOST 1064 #ifdef CONFIG_RCU_BOOST
1064 1065
1065 #include "rtmutex_common.h" 1066 #include "rtmutex_common.h"
1066 1067
1067 #ifdef CONFIG_RCU_TRACE 1068 #ifdef CONFIG_RCU_TRACE
1068 1069
1069 static void rcu_initiate_boost_trace(struct rcu_node *rnp) 1070 static void rcu_initiate_boost_trace(struct rcu_node *rnp)
1070 { 1071 {
1071 if (list_empty(&rnp->blkd_tasks)) 1072 if (list_empty(&rnp->blkd_tasks))
1072 rnp->n_balk_blkd_tasks++; 1073 rnp->n_balk_blkd_tasks++;
1073 else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL) 1074 else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)
1074 rnp->n_balk_exp_gp_tasks++; 1075 rnp->n_balk_exp_gp_tasks++;
1075 else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL) 1076 else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL)
1076 rnp->n_balk_boost_tasks++; 1077 rnp->n_balk_boost_tasks++;
1077 else if (rnp->gp_tasks != NULL && rnp->qsmask != 0) 1078 else if (rnp->gp_tasks != NULL && rnp->qsmask != 0)
1078 rnp->n_balk_notblocked++; 1079 rnp->n_balk_notblocked++;
1079 else if (rnp->gp_tasks != NULL && 1080 else if (rnp->gp_tasks != NULL &&
1080 ULONG_CMP_LT(jiffies, rnp->boost_time)) 1081 ULONG_CMP_LT(jiffies, rnp->boost_time))
1081 rnp->n_balk_notyet++; 1082 rnp->n_balk_notyet++;
1082 else 1083 else
1083 rnp->n_balk_nos++; 1084 rnp->n_balk_nos++;
1084 } 1085 }
1085 1086
1086 #else /* #ifdef CONFIG_RCU_TRACE */ 1087 #else /* #ifdef CONFIG_RCU_TRACE */
1087 1088
1088 static void rcu_initiate_boost_trace(struct rcu_node *rnp) 1089 static void rcu_initiate_boost_trace(struct rcu_node *rnp)
1089 { 1090 {
1090 } 1091 }
1091 1092
1092 #endif /* #else #ifdef CONFIG_RCU_TRACE */ 1093 #endif /* #else #ifdef CONFIG_RCU_TRACE */
1093 1094
1094 static void rcu_wake_cond(struct task_struct *t, int status) 1095 static void rcu_wake_cond(struct task_struct *t, int status)
1095 { 1096 {
1096 /* 1097 /*
1097 * If the thread is yielding, only wake it when this 1098 * If the thread is yielding, only wake it when this
1098 * is invoked from idle 1099 * is invoked from idle
1099 */ 1100 */
1100 if (status != RCU_KTHREAD_YIELDING || is_idle_task(current)) 1101 if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
1101 wake_up_process(t); 1102 wake_up_process(t);
1102 } 1103 }
1103 1104
1104 /* 1105 /*
1105 * Carry out RCU priority boosting on the task indicated by ->exp_tasks 1106 * Carry out RCU priority boosting on the task indicated by ->exp_tasks
1106 * or ->boost_tasks, advancing the pointer to the next task in the 1107 * or ->boost_tasks, advancing the pointer to the next task in the
1107 * ->blkd_tasks list. 1108 * ->blkd_tasks list.
1108 * 1109 *
1109 * Note that irqs must be enabled: boosting the task can block. 1110 * Note that irqs must be enabled: boosting the task can block.
1110 * Returns 1 if there are more tasks needing to be boosted. 1111 * Returns 1 if there are more tasks needing to be boosted.
1111 */ 1112 */
1112 static int rcu_boost(struct rcu_node *rnp) 1113 static int rcu_boost(struct rcu_node *rnp)
1113 { 1114 {
1114 unsigned long flags; 1115 unsigned long flags;
1115 struct rt_mutex mtx; 1116 struct rt_mutex mtx;
1116 struct task_struct *t; 1117 struct task_struct *t;
1117 struct list_head *tb; 1118 struct list_head *tb;
1118 1119
1119 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) 1120 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL)
1120 return 0; /* Nothing left to boost. */ 1121 return 0; /* Nothing left to boost. */
1121 1122
1122 raw_spin_lock_irqsave(&rnp->lock, flags); 1123 raw_spin_lock_irqsave(&rnp->lock, flags);
1123 1124
1124 /* 1125 /*
1125 * Recheck under the lock: all tasks in need of boosting 1126 * Recheck under the lock: all tasks in need of boosting
1126 * might exit their RCU read-side critical sections on their own. 1127 * might exit their RCU read-side critical sections on their own.
1127 */ 1128 */
1128 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) { 1129 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
1129 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1130 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1130 return 0; 1131 return 0;
1131 } 1132 }
1132 1133
1133 /* 1134 /*
1134 * Preferentially boost tasks blocking expedited grace periods. 1135 * Preferentially boost tasks blocking expedited grace periods.
1135 * This cannot starve the normal grace periods because a second 1136 * This cannot starve the normal grace periods because a second
1136 * expedited grace period must boost all blocked tasks, including 1137 * expedited grace period must boost all blocked tasks, including
1137 * those blocking the pre-existing normal grace period. 1138 * those blocking the pre-existing normal grace period.
1138 */ 1139 */
1139 if (rnp->exp_tasks != NULL) { 1140 if (rnp->exp_tasks != NULL) {
1140 tb = rnp->exp_tasks; 1141 tb = rnp->exp_tasks;
1141 rnp->n_exp_boosts++; 1142 rnp->n_exp_boosts++;
1142 } else { 1143 } else {
1143 tb = rnp->boost_tasks; 1144 tb = rnp->boost_tasks;
1144 rnp->n_normal_boosts++; 1145 rnp->n_normal_boosts++;
1145 } 1146 }
1146 rnp->n_tasks_boosted++; 1147 rnp->n_tasks_boosted++;
1147 1148
1148 /* 1149 /*
1149 * We boost task t by manufacturing an rt_mutex that appears to 1150 * We boost task t by manufacturing an rt_mutex that appears to
1150 * be held by task t. We leave a pointer to that rt_mutex where 1151 * be held by task t. We leave a pointer to that rt_mutex where
1151 * task t can find it, and task t will release the mutex when it 1152 * task t can find it, and task t will release the mutex when it
1152 * exits its outermost RCU read-side critical section. Then 1153 * exits its outermost RCU read-side critical section. Then
1153 * simply acquiring this artificial rt_mutex will boost task 1154 * simply acquiring this artificial rt_mutex will boost task
1154 * t's priority. (Thanks to tglx for suggesting this approach!) 1155 * t's priority. (Thanks to tglx for suggesting this approach!)
1155 * 1156 *
1156 * Note that task t must acquire rnp->lock to remove itself from 1157 * Note that task t must acquire rnp->lock to remove itself from
1157 * the ->blkd_tasks list, which it will do from exit() if from 1158 * the ->blkd_tasks list, which it will do from exit() if from
1158 * nowhere else. We therefore are guaranteed that task t will 1159 * nowhere else. We therefore are guaranteed that task t will
1159 * stay around at least until we drop rnp->lock. Note that 1160 * stay around at least until we drop rnp->lock. Note that
1160 * rnp->lock also resolves races between our priority boosting 1161 * rnp->lock also resolves races between our priority boosting
1161 * and task t's exiting its outermost RCU read-side critical 1162 * and task t's exiting its outermost RCU read-side critical
1162 * section. 1163 * section.
1163 */ 1164 */
1164 t = container_of(tb, struct task_struct, rcu_node_entry); 1165 t = container_of(tb, struct task_struct, rcu_node_entry);
1165 rt_mutex_init_proxy_locked(&mtx, t); 1166 rt_mutex_init_proxy_locked(&mtx, t);
1166 t->rcu_boost_mutex = &mtx; 1167 t->rcu_boost_mutex = &mtx;
1167 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1168 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1168 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ 1169 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
1169 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ 1170 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
1170 1171
1171 return ACCESS_ONCE(rnp->exp_tasks) != NULL || 1172 return ACCESS_ONCE(rnp->exp_tasks) != NULL ||
1172 ACCESS_ONCE(rnp->boost_tasks) != NULL; 1173 ACCESS_ONCE(rnp->boost_tasks) != NULL;
1173 } 1174 }
1174 1175
1175 /* 1176 /*
1176 * Priority-boosting kthread. One per leaf rcu_node and one for the 1177 * Priority-boosting kthread. One per leaf rcu_node and one for the
1177 * root rcu_node. 1178 * root rcu_node.
1178 */ 1179 */
1179 static int rcu_boost_kthread(void *arg) 1180 static int rcu_boost_kthread(void *arg)
1180 { 1181 {
1181 struct rcu_node *rnp = (struct rcu_node *)arg; 1182 struct rcu_node *rnp = (struct rcu_node *)arg;
1182 int spincnt = 0; 1183 int spincnt = 0;
1183 int more2boost; 1184 int more2boost;
1184 1185
1185 trace_rcu_utilization("Start boost kthread@init"); 1186 trace_rcu_utilization("Start boost kthread@init");
1186 for (;;) { 1187 for (;;) {
1187 rnp->boost_kthread_status = RCU_KTHREAD_WAITING; 1188 rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
1188 trace_rcu_utilization("End boost kthread@rcu_wait"); 1189 trace_rcu_utilization("End boost kthread@rcu_wait");
1189 rcu_wait(rnp->boost_tasks || rnp->exp_tasks); 1190 rcu_wait(rnp->boost_tasks || rnp->exp_tasks);
1190 trace_rcu_utilization("Start boost kthread@rcu_wait"); 1191 trace_rcu_utilization("Start boost kthread@rcu_wait");
1191 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; 1192 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
1192 more2boost = rcu_boost(rnp); 1193 more2boost = rcu_boost(rnp);
1193 if (more2boost) 1194 if (more2boost)
1194 spincnt++; 1195 spincnt++;
1195 else 1196 else
1196 spincnt = 0; 1197 spincnt = 0;
1197 if (spincnt > 10) { 1198 if (spincnt > 10) {
1198 rnp->boost_kthread_status = RCU_KTHREAD_YIELDING; 1199 rnp->boost_kthread_status = RCU_KTHREAD_YIELDING;
1199 trace_rcu_utilization("End boost kthread@rcu_yield"); 1200 trace_rcu_utilization("End boost kthread@rcu_yield");
1200 schedule_timeout_interruptible(2); 1201 schedule_timeout_interruptible(2);
1201 trace_rcu_utilization("Start boost kthread@rcu_yield"); 1202 trace_rcu_utilization("Start boost kthread@rcu_yield");
1202 spincnt = 0; 1203 spincnt = 0;
1203 } 1204 }
1204 } 1205 }
1205 /* NOTREACHED */ 1206 /* NOTREACHED */
1206 trace_rcu_utilization("End boost kthread@notreached"); 1207 trace_rcu_utilization("End boost kthread@notreached");
1207 return 0; 1208 return 0;
1208 } 1209 }
1209 1210
1210 /* 1211 /*
1211 * Check to see if it is time to start boosting RCU readers that are 1212 * Check to see if it is time to start boosting RCU readers that are
1212 * blocking the current grace period, and, if so, tell the per-rcu_node 1213 * blocking the current grace period, and, if so, tell the per-rcu_node
1213 * kthread to start boosting them. If there is an expedited grace 1214 * kthread to start boosting them. If there is an expedited grace
1214 * period in progress, it is always time to boost. 1215 * period in progress, it is always time to boost.
1215 * 1216 *
1216 * The caller must hold rnp->lock, which this function releases. 1217 * The caller must hold rnp->lock, which this function releases.
1217 * The ->boost_kthread_task is immortal, so we don't need to worry 1218 * The ->boost_kthread_task is immortal, so we don't need to worry
1218 * about it going away. 1219 * about it going away.
1219 */ 1220 */
1220 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) 1221 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1221 { 1222 {
1222 struct task_struct *t; 1223 struct task_struct *t;
1223 1224
1224 if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) { 1225 if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
1225 rnp->n_balk_exp_gp_tasks++; 1226 rnp->n_balk_exp_gp_tasks++;
1226 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1227 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1227 return; 1228 return;
1228 } 1229 }
1229 if (rnp->exp_tasks != NULL || 1230 if (rnp->exp_tasks != NULL ||
1230 (rnp->gp_tasks != NULL && 1231 (rnp->gp_tasks != NULL &&
1231 rnp->boost_tasks == NULL && 1232 rnp->boost_tasks == NULL &&
1232 rnp->qsmask == 0 && 1233 rnp->qsmask == 0 &&
1233 ULONG_CMP_GE(jiffies, rnp->boost_time))) { 1234 ULONG_CMP_GE(jiffies, rnp->boost_time))) {
1234 if (rnp->exp_tasks == NULL) 1235 if (rnp->exp_tasks == NULL)
1235 rnp->boost_tasks = rnp->gp_tasks; 1236 rnp->boost_tasks = rnp->gp_tasks;
1236 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1237 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1237 t = rnp->boost_kthread_task; 1238 t = rnp->boost_kthread_task;
1238 if (t) 1239 if (t)
1239 rcu_wake_cond(t, rnp->boost_kthread_status); 1240 rcu_wake_cond(t, rnp->boost_kthread_status);
1240 } else { 1241 } else {
1241 rcu_initiate_boost_trace(rnp); 1242 rcu_initiate_boost_trace(rnp);
1242 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1243 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1243 } 1244 }
1244 } 1245 }
1245 1246
1246 /* 1247 /*
1247 * Wake up the per-CPU kthread to invoke RCU callbacks. 1248 * Wake up the per-CPU kthread to invoke RCU callbacks.
1248 */ 1249 */
1249 static void invoke_rcu_callbacks_kthread(void) 1250 static void invoke_rcu_callbacks_kthread(void)
1250 { 1251 {
1251 unsigned long flags; 1252 unsigned long flags;
1252 1253
1253 local_irq_save(flags); 1254 local_irq_save(flags);
1254 __this_cpu_write(rcu_cpu_has_work, 1); 1255 __this_cpu_write(rcu_cpu_has_work, 1);
1255 if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && 1256 if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
1256 current != __this_cpu_read(rcu_cpu_kthread_task)) { 1257 current != __this_cpu_read(rcu_cpu_kthread_task)) {
1257 rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task), 1258 rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
1258 __this_cpu_read(rcu_cpu_kthread_status)); 1259 __this_cpu_read(rcu_cpu_kthread_status));
1259 } 1260 }
1260 local_irq_restore(flags); 1261 local_irq_restore(flags);
1261 } 1262 }
1262 1263
1263 /* 1264 /*
1264 * Is the current CPU running the RCU-callbacks kthread? 1265 * Is the current CPU running the RCU-callbacks kthread?
1265 * Caller must have preemption disabled. 1266 * Caller must have preemption disabled.
1266 */ 1267 */
1267 static bool rcu_is_callbacks_kthread(void) 1268 static bool rcu_is_callbacks_kthread(void)
1268 { 1269 {
1269 return __get_cpu_var(rcu_cpu_kthread_task) == current; 1270 return __get_cpu_var(rcu_cpu_kthread_task) == current;
1270 } 1271 }
1271 1272
1272 #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000) 1273 #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
1273 1274
1274 /* 1275 /*
1275 * Do priority-boost accounting for the start of a new grace period. 1276 * Do priority-boost accounting for the start of a new grace period.
1276 */ 1277 */
1277 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) 1278 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1278 { 1279 {
1279 rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES; 1280 rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
1280 } 1281 }
1281 1282
1282 /* 1283 /*
1283 * Create an RCU-boost kthread for the specified node if one does not 1284 * Create an RCU-boost kthread for the specified node if one does not
1284 * already exist. We only create this kthread for preemptible RCU. 1285 * already exist. We only create this kthread for preemptible RCU.
1285 * Returns zero if all is well, a negated errno otherwise. 1286 * Returns zero if all is well, a negated errno otherwise.
1286 */ 1287 */
1287 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, 1288 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1288 struct rcu_node *rnp) 1289 struct rcu_node *rnp)
1289 { 1290 {
1290 int rnp_index = rnp - &rsp->node[0]; 1291 int rnp_index = rnp - &rsp->node[0];
1291 unsigned long flags; 1292 unsigned long flags;
1292 struct sched_param sp; 1293 struct sched_param sp;
1293 struct task_struct *t; 1294 struct task_struct *t;
1294 1295
1295 if (&rcu_preempt_state != rsp) 1296 if (&rcu_preempt_state != rsp)
1296 return 0; 1297 return 0;
1297 1298
1298 if (!rcu_scheduler_fully_active || rnp->qsmaskinit == 0) 1299 if (!rcu_scheduler_fully_active || rnp->qsmaskinit == 0)
1299 return 0; 1300 return 0;
1300 1301
1301 rsp->boost = 1; 1302 rsp->boost = 1;
1302 if (rnp->boost_kthread_task != NULL) 1303 if (rnp->boost_kthread_task != NULL)
1303 return 0; 1304 return 0;
1304 t = kthread_create(rcu_boost_kthread, (void *)rnp, 1305 t = kthread_create(rcu_boost_kthread, (void *)rnp,
1305 "rcub/%d", rnp_index); 1306 "rcub/%d", rnp_index);
1306 if (IS_ERR(t)) 1307 if (IS_ERR(t))
1307 return PTR_ERR(t); 1308 return PTR_ERR(t);
1308 raw_spin_lock_irqsave(&rnp->lock, flags); 1309 raw_spin_lock_irqsave(&rnp->lock, flags);
1309 rnp->boost_kthread_task = t; 1310 rnp->boost_kthread_task = t;
1310 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1311 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1311 sp.sched_priority = RCU_BOOST_PRIO; 1312 sp.sched_priority = RCU_BOOST_PRIO;
1312 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1313 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1313 wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ 1314 wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
1314 return 0; 1315 return 0;
1315 } 1316 }
1316 1317
1317 static void rcu_kthread_do_work(void) 1318 static void rcu_kthread_do_work(void)
1318 { 1319 {
1319 rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); 1320 rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data));
1320 rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); 1321 rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
1321 rcu_preempt_do_callbacks(); 1322 rcu_preempt_do_callbacks();
1322 } 1323 }
1323 1324
1324 static void rcu_cpu_kthread_setup(unsigned int cpu) 1325 static void rcu_cpu_kthread_setup(unsigned int cpu)
1325 { 1326 {
1326 struct sched_param sp; 1327 struct sched_param sp;
1327 1328
1328 sp.sched_priority = RCU_KTHREAD_PRIO; 1329 sp.sched_priority = RCU_KTHREAD_PRIO;
1329 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1330 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1330 } 1331 }
1331 1332
1332 static void rcu_cpu_kthread_park(unsigned int cpu) 1333 static void rcu_cpu_kthread_park(unsigned int cpu)
1333 { 1334 {
1334 per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; 1335 per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
1335 } 1336 }
1336 1337
1337 static int rcu_cpu_kthread_should_run(unsigned int cpu) 1338 static int rcu_cpu_kthread_should_run(unsigned int cpu)
1338 { 1339 {
1339 return __get_cpu_var(rcu_cpu_has_work); 1340 return __get_cpu_var(rcu_cpu_has_work);
1340 } 1341 }
1341 1342
1342 /* 1343 /*
1343 * Per-CPU kernel thread that invokes RCU callbacks. This replaces the 1344 * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
1344 * RCU softirq used in flavors and configurations of RCU that do not 1345 * RCU softirq used in flavors and configurations of RCU that do not
1345 * support RCU priority boosting. 1346 * support RCU priority boosting.
1346 */ 1347 */
1347 static void rcu_cpu_kthread(unsigned int cpu) 1348 static void rcu_cpu_kthread(unsigned int cpu)
1348 { 1349 {
1349 unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status); 1350 unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status);
1350 char work, *workp = &__get_cpu_var(rcu_cpu_has_work); 1351 char work, *workp = &__get_cpu_var(rcu_cpu_has_work);
1351 int spincnt; 1352 int spincnt;
1352 1353
1353 for (spincnt = 0; spincnt < 10; spincnt++) { 1354 for (spincnt = 0; spincnt < 10; spincnt++) {
1354 trace_rcu_utilization("Start CPU kthread@rcu_wait"); 1355 trace_rcu_utilization("Start CPU kthread@rcu_wait");
1355 local_bh_disable(); 1356 local_bh_disable();
1356 *statusp = RCU_KTHREAD_RUNNING; 1357 *statusp = RCU_KTHREAD_RUNNING;
1357 this_cpu_inc(rcu_cpu_kthread_loops); 1358 this_cpu_inc(rcu_cpu_kthread_loops);
1358 local_irq_disable(); 1359 local_irq_disable();
1359 work = *workp; 1360 work = *workp;
1360 *workp = 0; 1361 *workp = 0;
1361 local_irq_enable(); 1362 local_irq_enable();
1362 if (work) 1363 if (work)
1363 rcu_kthread_do_work(); 1364 rcu_kthread_do_work();
1364 local_bh_enable(); 1365 local_bh_enable();
1365 if (*workp == 0) { 1366 if (*workp == 0) {
1366 trace_rcu_utilization("End CPU kthread@rcu_wait"); 1367 trace_rcu_utilization("End CPU kthread@rcu_wait");
1367 *statusp = RCU_KTHREAD_WAITING; 1368 *statusp = RCU_KTHREAD_WAITING;
1368 return; 1369 return;
1369 } 1370 }
1370 } 1371 }
1371 *statusp = RCU_KTHREAD_YIELDING; 1372 *statusp = RCU_KTHREAD_YIELDING;
1372 trace_rcu_utilization("Start CPU kthread@rcu_yield"); 1373 trace_rcu_utilization("Start CPU kthread@rcu_yield");
1373 schedule_timeout_interruptible(2); 1374 schedule_timeout_interruptible(2);
1374 trace_rcu_utilization("End CPU kthread@rcu_yield"); 1375 trace_rcu_utilization("End CPU kthread@rcu_yield");
1375 *statusp = RCU_KTHREAD_WAITING; 1376 *statusp = RCU_KTHREAD_WAITING;
1376 } 1377 }
1377 1378
1378 /* 1379 /*
1379 * Set the per-rcu_node kthread's affinity to cover all CPUs that are 1380 * Set the per-rcu_node kthread's affinity to cover all CPUs that are
1380 * served by the rcu_node in question. The CPU hotplug lock is still 1381 * served by the rcu_node in question. The CPU hotplug lock is still
1381 * held, so the value of rnp->qsmaskinit will be stable. 1382 * held, so the value of rnp->qsmaskinit will be stable.
1382 * 1383 *
1383 * We don't include outgoingcpu in the affinity set, use -1 if there is 1384 * We don't include outgoingcpu in the affinity set, use -1 if there is
1384 * no outgoing CPU. If there are no CPUs left in the affinity set, 1385 * no outgoing CPU. If there are no CPUs left in the affinity set,
1385 * this function allows the kthread to execute on any CPU. 1386 * this function allows the kthread to execute on any CPU.
1386 */ 1387 */
1387 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) 1388 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1388 { 1389 {
1389 struct task_struct *t = rnp->boost_kthread_task; 1390 struct task_struct *t = rnp->boost_kthread_task;
1390 unsigned long mask = rnp->qsmaskinit; 1391 unsigned long mask = rnp->qsmaskinit;
1391 cpumask_var_t cm; 1392 cpumask_var_t cm;
1392 int cpu; 1393 int cpu;
1393 1394
1394 if (!t) 1395 if (!t)
1395 return; 1396 return;
1396 if (!zalloc_cpumask_var(&cm, GFP_KERNEL)) 1397 if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
1397 return; 1398 return;
1398 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) 1399 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
1399 if ((mask & 0x1) && cpu != outgoingcpu) 1400 if ((mask & 0x1) && cpu != outgoingcpu)
1400 cpumask_set_cpu(cpu, cm); 1401 cpumask_set_cpu(cpu, cm);
1401 if (cpumask_weight(cm) == 0) { 1402 if (cpumask_weight(cm) == 0) {
1402 cpumask_setall(cm); 1403 cpumask_setall(cm);
1403 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) 1404 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++)
1404 cpumask_clear_cpu(cpu, cm); 1405 cpumask_clear_cpu(cpu, cm);
1405 WARN_ON_ONCE(cpumask_weight(cm) == 0); 1406 WARN_ON_ONCE(cpumask_weight(cm) == 0);
1406 } 1407 }
1407 set_cpus_allowed_ptr(t, cm); 1408 set_cpus_allowed_ptr(t, cm);
1408 free_cpumask_var(cm); 1409 free_cpumask_var(cm);
1409 } 1410 }
1410 1411
1411 static struct smp_hotplug_thread rcu_cpu_thread_spec = { 1412 static struct smp_hotplug_thread rcu_cpu_thread_spec = {
1412 .store = &rcu_cpu_kthread_task, 1413 .store = &rcu_cpu_kthread_task,
1413 .thread_should_run = rcu_cpu_kthread_should_run, 1414 .thread_should_run = rcu_cpu_kthread_should_run,
1414 .thread_fn = rcu_cpu_kthread, 1415 .thread_fn = rcu_cpu_kthread,
1415 .thread_comm = "rcuc/%u", 1416 .thread_comm = "rcuc/%u",
1416 .setup = rcu_cpu_kthread_setup, 1417 .setup = rcu_cpu_kthread_setup,
1417 .park = rcu_cpu_kthread_park, 1418 .park = rcu_cpu_kthread_park,
1418 }; 1419 };
1419 1420
1420 /* 1421 /*
1421 * Spawn all kthreads -- called as soon as the scheduler is running. 1422 * Spawn all kthreads -- called as soon as the scheduler is running.
1422 */ 1423 */
1423 static int __init rcu_spawn_kthreads(void) 1424 static int __init rcu_spawn_kthreads(void)
1424 { 1425 {
1425 struct rcu_node *rnp; 1426 struct rcu_node *rnp;
1426 int cpu; 1427 int cpu;
1427 1428
1428 rcu_scheduler_fully_active = 1; 1429 rcu_scheduler_fully_active = 1;
1429 for_each_possible_cpu(cpu) 1430 for_each_possible_cpu(cpu)
1430 per_cpu(rcu_cpu_has_work, cpu) = 0; 1431 per_cpu(rcu_cpu_has_work, cpu) = 0;
1431 BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); 1432 BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
1432 rnp = rcu_get_root(rcu_state); 1433 rnp = rcu_get_root(rcu_state);
1433 (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); 1434 (void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
1434 if (NUM_RCU_NODES > 1) { 1435 if (NUM_RCU_NODES > 1) {
1435 rcu_for_each_leaf_node(rcu_state, rnp) 1436 rcu_for_each_leaf_node(rcu_state, rnp)
1436 (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); 1437 (void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
1437 } 1438 }
1438 return 0; 1439 return 0;
1439 } 1440 }
1440 early_initcall(rcu_spawn_kthreads); 1441 early_initcall(rcu_spawn_kthreads);
1441 1442
1442 static void __cpuinit rcu_prepare_kthreads(int cpu) 1443 static void __cpuinit rcu_prepare_kthreads(int cpu)
1443 { 1444 {
1444 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 1445 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
1445 struct rcu_node *rnp = rdp->mynode; 1446 struct rcu_node *rnp = rdp->mynode;
1446 1447
1447 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ 1448 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
1448 if (rcu_scheduler_fully_active) 1449 if (rcu_scheduler_fully_active)
1449 (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); 1450 (void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
1450 } 1451 }
1451 1452
1452 #else /* #ifdef CONFIG_RCU_BOOST */ 1453 #else /* #ifdef CONFIG_RCU_BOOST */
1453 1454
1454 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) 1455 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1455 { 1456 {
1456 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1457 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1457 } 1458 }
1458 1459
1459 static void invoke_rcu_callbacks_kthread(void) 1460 static void invoke_rcu_callbacks_kthread(void)
1460 { 1461 {
1461 WARN_ON_ONCE(1); 1462 WARN_ON_ONCE(1);
1462 } 1463 }
1463 1464
1464 static bool rcu_is_callbacks_kthread(void) 1465 static bool rcu_is_callbacks_kthread(void)
1465 { 1466 {
1466 return false; 1467 return false;
1467 } 1468 }
1468 1469
1469 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) 1470 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1470 { 1471 {
1471 } 1472 }
1472 1473
1473 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) 1474 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1474 { 1475 {
1475 } 1476 }
1476 1477
1477 static int __init rcu_scheduler_really_started(void) 1478 static int __init rcu_scheduler_really_started(void)
1478 { 1479 {
1479 rcu_scheduler_fully_active = 1; 1480 rcu_scheduler_fully_active = 1;
1480 return 0; 1481 return 0;
1481 } 1482 }
1482 early_initcall(rcu_scheduler_really_started); 1483 early_initcall(rcu_scheduler_really_started);
1483 1484
1484 static void __cpuinit rcu_prepare_kthreads(int cpu) 1485 static void __cpuinit rcu_prepare_kthreads(int cpu)
1485 { 1486 {
1486 } 1487 }
1487 1488
1488 #endif /* #else #ifdef CONFIG_RCU_BOOST */ 1489 #endif /* #else #ifdef CONFIG_RCU_BOOST */
1489 1490
1490 #if !defined(CONFIG_RCU_FAST_NO_HZ) 1491 #if !defined(CONFIG_RCU_FAST_NO_HZ)
1491 1492
1492 /* 1493 /*
1493 * Check to see if any future RCU-related work will need to be done 1494 * Check to see if any future RCU-related work will need to be done
1494 * by the current CPU, even if none need be done immediately, returning 1495 * by the current CPU, even if none need be done immediately, returning
1495 * 1 if so. This function is part of the RCU implementation; it is -not- 1496 * 1 if so. This function is part of the RCU implementation; it is -not-
1496 * an exported member of the RCU API. 1497 * an exported member of the RCU API.
1497 * 1498 *
1498 * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs 1499 * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
1499 * any flavor of RCU. 1500 * any flavor of RCU.
1500 */ 1501 */
1501 int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) 1502 int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
1502 { 1503 {
1503 *delta_jiffies = ULONG_MAX; 1504 *delta_jiffies = ULONG_MAX;
1504 return rcu_cpu_has_callbacks(cpu); 1505 return rcu_cpu_has_callbacks(cpu);
1505 } 1506 }
1506 1507
1507 /* 1508 /*
1508 * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it. 1509 * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it.
1509 */ 1510 */
1510 static void rcu_prepare_for_idle_init(int cpu) 1511 static void rcu_prepare_for_idle_init(int cpu)
1511 { 1512 {
1512 } 1513 }
1513 1514
1514 /* 1515 /*
1515 * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up 1516 * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
1516 * after it. 1517 * after it.
1517 */ 1518 */
1518 static void rcu_cleanup_after_idle(int cpu) 1519 static void rcu_cleanup_after_idle(int cpu)
1519 { 1520 {
1520 } 1521 }
1521 1522
1522 /* 1523 /*
1523 * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n, 1524 * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n,
1524 * is nothing. 1525 * is nothing.
1525 */ 1526 */
1526 static void rcu_prepare_for_idle(int cpu) 1527 static void rcu_prepare_for_idle(int cpu)
1527 { 1528 {
1528 } 1529 }
1529 1530
1530 /* 1531 /*
1531 * Don't bother keeping a running count of the number of RCU callbacks 1532 * Don't bother keeping a running count of the number of RCU callbacks
1532 * posted because CONFIG_RCU_FAST_NO_HZ=n. 1533 * posted because CONFIG_RCU_FAST_NO_HZ=n.
1533 */ 1534 */
1534 static void rcu_idle_count_callbacks_posted(void) 1535 static void rcu_idle_count_callbacks_posted(void)
1535 { 1536 {
1536 } 1537 }
1537 1538
1538 #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 1539 #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
1539 1540
1540 /* 1541 /*
1541 * This code is invoked when a CPU goes idle, at which point we want 1542 * This code is invoked when a CPU goes idle, at which point we want
1542 * to have the CPU do everything required for RCU so that it can enter 1543 * to have the CPU do everything required for RCU so that it can enter
1543 * the energy-efficient dyntick-idle mode. This is handled by a 1544 * the energy-efficient dyntick-idle mode. This is handled by a
1544 * state machine implemented by rcu_prepare_for_idle() below. 1545 * state machine implemented by rcu_prepare_for_idle() below.
1545 * 1546 *
1546 * The following three proprocessor symbols control this state machine: 1547 * The following three proprocessor symbols control this state machine:
1547 * 1548 *
1548 * RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt 1549 * RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt
1549 * to satisfy RCU. Beyond this point, it is better to incur a periodic 1550 * to satisfy RCU. Beyond this point, it is better to incur a periodic
1550 * scheduling-clock interrupt than to loop through the state machine 1551 * scheduling-clock interrupt than to loop through the state machine
1551 * at full power. 1552 * at full power.
1552 * RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are 1553 * RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are
1553 * optional if RCU does not need anything immediately from this 1554 * optional if RCU does not need anything immediately from this
1554 * CPU, even if this CPU still has RCU callbacks queued. The first 1555 * CPU, even if this CPU still has RCU callbacks queued. The first
1555 * times through the state machine are mandatory: we need to give 1556 * times through the state machine are mandatory: we need to give
1556 * the state machine a chance to communicate a quiescent state 1557 * the state machine a chance to communicate a quiescent state
1557 * to the RCU core. 1558 * to the RCU core.
1558 * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted 1559 * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
1559 * to sleep in dyntick-idle mode with RCU callbacks pending. This 1560 * to sleep in dyntick-idle mode with RCU callbacks pending. This
1560 * is sized to be roughly one RCU grace period. Those energy-efficiency 1561 * is sized to be roughly one RCU grace period. Those energy-efficiency
1561 * benchmarkers who might otherwise be tempted to set this to a large 1562 * benchmarkers who might otherwise be tempted to set this to a large
1562 * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your 1563 * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
1563 * system. And if you are -that- concerned about energy efficiency, 1564 * system. And if you are -that- concerned about energy efficiency,
1564 * just power the system down and be done with it! 1565 * just power the system down and be done with it!
1565 * RCU_IDLE_LAZY_GP_DELAY gives the number of jiffies that a CPU is 1566 * RCU_IDLE_LAZY_GP_DELAY gives the number of jiffies that a CPU is
1566 * permitted to sleep in dyntick-idle mode with only lazy RCU 1567 * permitted to sleep in dyntick-idle mode with only lazy RCU
1567 * callbacks pending. Setting this too high can OOM your system. 1568 * callbacks pending. Setting this too high can OOM your system.
1568 * 1569 *
1569 * The values below work well in practice. If future workloads require 1570 * The values below work well in practice. If future workloads require
1570 * adjustment, they can be converted into kernel config parameters, though 1571 * adjustment, they can be converted into kernel config parameters, though
1571 * making the state machine smarter might be a better option. 1572 * making the state machine smarter might be a better option.
1572 */ 1573 */
1573 #define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ 1574 #define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */
1574 #define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ 1575 #define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */
1575 #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ 1576 #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */
1576 #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ 1577 #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */
1577 1578
1578 extern int tick_nohz_enabled; 1579 extern int tick_nohz_enabled;
1579 1580
1580 /* 1581 /*
1581 * Does the specified flavor of RCU have non-lazy callbacks pending on 1582 * Does the specified flavor of RCU have non-lazy callbacks pending on
1582 * the specified CPU? Both RCU flavor and CPU are specified by the 1583 * the specified CPU? Both RCU flavor and CPU are specified by the
1583 * rcu_data structure. 1584 * rcu_data structure.
1584 */ 1585 */
1585 static bool __rcu_cpu_has_nonlazy_callbacks(struct rcu_data *rdp) 1586 static bool __rcu_cpu_has_nonlazy_callbacks(struct rcu_data *rdp)
1586 { 1587 {
1587 return rdp->qlen != rdp->qlen_lazy; 1588 return rdp->qlen != rdp->qlen_lazy;
1588 } 1589 }
1589 1590
1590 #ifdef CONFIG_TREE_PREEMPT_RCU 1591 #ifdef CONFIG_TREE_PREEMPT_RCU
1591 1592
1592 /* 1593 /*
1593 * Are there non-lazy RCU-preempt callbacks? (There cannot be if there 1594 * Are there non-lazy RCU-preempt callbacks? (There cannot be if there
1594 * is no RCU-preempt in the kernel.) 1595 * is no RCU-preempt in the kernel.)
1595 */ 1596 */
1596 static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) 1597 static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu)
1597 { 1598 {
1598 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 1599 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
1599 1600
1600 return __rcu_cpu_has_nonlazy_callbacks(rdp); 1601 return __rcu_cpu_has_nonlazy_callbacks(rdp);
1601 } 1602 }
1602 1603
1603 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 1604 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1604 1605
1605 static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) 1606 static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu)
1606 { 1607 {
1607 return 0; 1608 return 0;
1608 } 1609 }
1609 1610
1610 #endif /* else #ifdef CONFIG_TREE_PREEMPT_RCU */ 1611 #endif /* else #ifdef CONFIG_TREE_PREEMPT_RCU */
1611 1612
1612 /* 1613 /*
1613 * Does any flavor of RCU have non-lazy callbacks on the specified CPU? 1614 * Does any flavor of RCU have non-lazy callbacks on the specified CPU?
1614 */ 1615 */
1615 static bool rcu_cpu_has_nonlazy_callbacks(int cpu) 1616 static bool rcu_cpu_has_nonlazy_callbacks(int cpu)
1616 { 1617 {
1617 return __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_sched_data, cpu)) || 1618 return __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_sched_data, cpu)) ||
1618 __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_bh_data, cpu)) || 1619 __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_bh_data, cpu)) ||
1619 rcu_preempt_cpu_has_nonlazy_callbacks(cpu); 1620 rcu_preempt_cpu_has_nonlazy_callbacks(cpu);
1620 } 1621 }
1621 1622
1622 /* 1623 /*
1623 * Allow the CPU to enter dyntick-idle mode if either: (1) There are no 1624 * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
1624 * callbacks on this CPU, (2) this CPU has not yet attempted to enter 1625 * callbacks on this CPU, (2) this CPU has not yet attempted to enter
1625 * dyntick-idle mode, or (3) this CPU is in the process of attempting to 1626 * dyntick-idle mode, or (3) this CPU is in the process of attempting to
1626 * enter dyntick-idle mode. Otherwise, if we have recently tried and failed 1627 * enter dyntick-idle mode. Otherwise, if we have recently tried and failed
1627 * to enter dyntick-idle mode, we refuse to try to enter it. After all, 1628 * to enter dyntick-idle mode, we refuse to try to enter it. After all,
1628 * it is better to incur scheduling-clock interrupts than to spin 1629 * it is better to incur scheduling-clock interrupts than to spin
1629 * continuously for the same time duration! 1630 * continuously for the same time duration!
1630 * 1631 *
1631 * The delta_jiffies argument is used to store the time when RCU is 1632 * The delta_jiffies argument is used to store the time when RCU is
1632 * going to need the CPU again if it still has callbacks. The reason 1633 * going to need the CPU again if it still has callbacks. The reason
1633 * for this is that rcu_prepare_for_idle() might need to post a timer, 1634 * for this is that rcu_prepare_for_idle() might need to post a timer,
1634 * but if so, it will do so after tick_nohz_stop_sched_tick() has set 1635 * but if so, it will do so after tick_nohz_stop_sched_tick() has set
1635 * the wakeup time for this CPU. This means that RCU's timer can be 1636 * the wakeup time for this CPU. This means that RCU's timer can be
1636 * delayed until the wakeup time, which defeats the purpose of posting 1637 * delayed until the wakeup time, which defeats the purpose of posting
1637 * a timer. 1638 * a timer.
1638 */ 1639 */
1639 int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) 1640 int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
1640 { 1641 {
1641 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1642 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1642 1643
1643 /* Flag a new idle sojourn to the idle-entry state machine. */ 1644 /* Flag a new idle sojourn to the idle-entry state machine. */
1644 rdtp->idle_first_pass = 1; 1645 rdtp->idle_first_pass = 1;
1645 /* If no callbacks, RCU doesn't need the CPU. */ 1646 /* If no callbacks, RCU doesn't need the CPU. */
1646 if (!rcu_cpu_has_callbacks(cpu)) { 1647 if (!rcu_cpu_has_callbacks(cpu)) {
1647 *delta_jiffies = ULONG_MAX; 1648 *delta_jiffies = ULONG_MAX;
1648 return 0; 1649 return 0;
1649 } 1650 }
1650 if (rdtp->dyntick_holdoff == jiffies) { 1651 if (rdtp->dyntick_holdoff == jiffies) {
1651 /* RCU recently tried and failed, so don't try again. */ 1652 /* RCU recently tried and failed, so don't try again. */
1652 *delta_jiffies = 1; 1653 *delta_jiffies = 1;
1653 return 1; 1654 return 1;
1654 } 1655 }
1655 /* Set up for the possibility that RCU will post a timer. */ 1656 /* Set up for the possibility that RCU will post a timer. */
1656 if (rcu_cpu_has_nonlazy_callbacks(cpu)) { 1657 if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
1657 *delta_jiffies = round_up(RCU_IDLE_GP_DELAY + jiffies, 1658 *delta_jiffies = round_up(RCU_IDLE_GP_DELAY + jiffies,
1658 RCU_IDLE_GP_DELAY) - jiffies; 1659 RCU_IDLE_GP_DELAY) - jiffies;
1659 } else { 1660 } else {
1660 *delta_jiffies = jiffies + RCU_IDLE_LAZY_GP_DELAY; 1661 *delta_jiffies = jiffies + RCU_IDLE_LAZY_GP_DELAY;
1661 *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies; 1662 *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies;
1662 } 1663 }
1663 return 0; 1664 return 0;
1664 } 1665 }
1665 1666
1666 /* 1667 /*
1667 * Handler for smp_call_function_single(). The only point of this 1668 * Handler for smp_call_function_single(). The only point of this
1668 * handler is to wake the CPU up, so the handler does only tracing. 1669 * handler is to wake the CPU up, so the handler does only tracing.
1669 */ 1670 */
1670 void rcu_idle_demigrate(void *unused) 1671 void rcu_idle_demigrate(void *unused)
1671 { 1672 {
1672 trace_rcu_prep_idle("Demigrate"); 1673 trace_rcu_prep_idle("Demigrate");
1673 } 1674 }
1674 1675
1675 /* 1676 /*
1676 * Timer handler used to force CPU to start pushing its remaining RCU 1677 * Timer handler used to force CPU to start pushing its remaining RCU
1677 * callbacks in the case where it entered dyntick-idle mode with callbacks 1678 * callbacks in the case where it entered dyntick-idle mode with callbacks
1678 * pending. The hander doesn't really need to do anything because the 1679 * pending. The hander doesn't really need to do anything because the
1679 * real work is done upon re-entry to idle, or by the next scheduling-clock 1680 * real work is done upon re-entry to idle, or by the next scheduling-clock
1680 * interrupt should idle not be re-entered. 1681 * interrupt should idle not be re-entered.
1681 * 1682 *
1682 * One special case: the timer gets migrated without awakening the CPU 1683 * One special case: the timer gets migrated without awakening the CPU
1683 * on which the timer was scheduled on. In this case, we must wake up 1684 * on which the timer was scheduled on. In this case, we must wake up
1684 * that CPU. We do so with smp_call_function_single(). 1685 * that CPU. We do so with smp_call_function_single().
1685 */ 1686 */
1686 static void rcu_idle_gp_timer_func(unsigned long cpu_in) 1687 static void rcu_idle_gp_timer_func(unsigned long cpu_in)
1687 { 1688 {
1688 int cpu = (int)cpu_in; 1689 int cpu = (int)cpu_in;
1689 1690
1690 trace_rcu_prep_idle("Timer"); 1691 trace_rcu_prep_idle("Timer");
1691 if (cpu != smp_processor_id()) 1692 if (cpu != smp_processor_id())
1692 smp_call_function_single(cpu, rcu_idle_demigrate, NULL, 0); 1693 smp_call_function_single(cpu, rcu_idle_demigrate, NULL, 0);
1693 else 1694 else
1694 WARN_ON_ONCE(1); /* Getting here can hang the system... */ 1695 WARN_ON_ONCE(1); /* Getting here can hang the system... */
1695 } 1696 }
1696 1697
1697 /* 1698 /*
1698 * Initialize the timer used to pull CPUs out of dyntick-idle mode. 1699 * Initialize the timer used to pull CPUs out of dyntick-idle mode.
1699 */ 1700 */
1700 static void rcu_prepare_for_idle_init(int cpu) 1701 static void rcu_prepare_for_idle_init(int cpu)
1701 { 1702 {
1702 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1703 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1703 1704
1704 rdtp->dyntick_holdoff = jiffies - 1; 1705 rdtp->dyntick_holdoff = jiffies - 1;
1705 setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu); 1706 setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu);
1706 rdtp->idle_gp_timer_expires = jiffies - 1; 1707 rdtp->idle_gp_timer_expires = jiffies - 1;
1707 rdtp->idle_first_pass = 1; 1708 rdtp->idle_first_pass = 1;
1708 } 1709 }
1709 1710
1710 /* 1711 /*
1711 * Clean up for exit from idle. Because we are exiting from idle, there 1712 * Clean up for exit from idle. Because we are exiting from idle, there
1712 * is no longer any point to ->idle_gp_timer, so cancel it. This will 1713 * is no longer any point to ->idle_gp_timer, so cancel it. This will
1713 * do nothing if this timer is not active, so just cancel it unconditionally. 1714 * do nothing if this timer is not active, so just cancel it unconditionally.
1714 */ 1715 */
1715 static void rcu_cleanup_after_idle(int cpu) 1716 static void rcu_cleanup_after_idle(int cpu)
1716 { 1717 {
1717 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1718 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1718 1719
1719 del_timer(&rdtp->idle_gp_timer); 1720 del_timer(&rdtp->idle_gp_timer);
1720 trace_rcu_prep_idle("Cleanup after idle"); 1721 trace_rcu_prep_idle("Cleanup after idle");
1721 rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled); 1722 rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled);
1722 } 1723 }
1723 1724
1724 /* 1725 /*
1725 * Check to see if any RCU-related work can be done by the current CPU, 1726 * Check to see if any RCU-related work can be done by the current CPU,
1726 * and if so, schedule a softirq to get it done. This function is part 1727 * and if so, schedule a softirq to get it done. This function is part
1727 * of the RCU implementation; it is -not- an exported member of the RCU API. 1728 * of the RCU implementation; it is -not- an exported member of the RCU API.
1728 * 1729 *
1729 * The idea is for the current CPU to clear out all work required by the 1730 * The idea is for the current CPU to clear out all work required by the
1730 * RCU core for the current grace period, so that this CPU can be permitted 1731 * RCU core for the current grace period, so that this CPU can be permitted
1731 * to enter dyntick-idle mode. In some cases, it will need to be awakened 1732 * to enter dyntick-idle mode. In some cases, it will need to be awakened
1732 * at the end of the grace period by whatever CPU ends the grace period. 1733 * at the end of the grace period by whatever CPU ends the grace period.
1733 * This allows CPUs to go dyntick-idle more quickly, and to reduce the 1734 * This allows CPUs to go dyntick-idle more quickly, and to reduce the
1734 * number of wakeups by a modest integer factor. 1735 * number of wakeups by a modest integer factor.
1735 * 1736 *
1736 * Because it is not legal to invoke rcu_process_callbacks() with irqs 1737 * Because it is not legal to invoke rcu_process_callbacks() with irqs
1737 * disabled, we do one pass of force_quiescent_state(), then do a 1738 * disabled, we do one pass of force_quiescent_state(), then do a
1738 * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked 1739 * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked
1739 * later. The ->dyntick_drain field controls the sequencing. 1740 * later. The ->dyntick_drain field controls the sequencing.
1740 * 1741 *
1741 * The caller must have disabled interrupts. 1742 * The caller must have disabled interrupts.
1742 */ 1743 */
1743 static void rcu_prepare_for_idle(int cpu) 1744 static void rcu_prepare_for_idle(int cpu)
1744 { 1745 {
1745 struct timer_list *tp; 1746 struct timer_list *tp;
1746 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1747 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1747 int tne; 1748 int tne;
1748 1749
1749 /* Handle nohz enablement switches conservatively. */ 1750 /* Handle nohz enablement switches conservatively. */
1750 tne = ACCESS_ONCE(tick_nohz_enabled); 1751 tne = ACCESS_ONCE(tick_nohz_enabled);
1751 if (tne != rdtp->tick_nohz_enabled_snap) { 1752 if (tne != rdtp->tick_nohz_enabled_snap) {
1752 if (rcu_cpu_has_callbacks(cpu)) 1753 if (rcu_cpu_has_callbacks(cpu))
1753 invoke_rcu_core(); /* force nohz to see update. */ 1754 invoke_rcu_core(); /* force nohz to see update. */
1754 rdtp->tick_nohz_enabled_snap = tne; 1755 rdtp->tick_nohz_enabled_snap = tne;
1755 return; 1756 return;
1756 } 1757 }
1757 if (!tne) 1758 if (!tne)
1758 return; 1759 return;
1759 1760
1760 /* Adaptive-tick mode, where usermode execution is idle to RCU. */ 1761 /* Adaptive-tick mode, where usermode execution is idle to RCU. */
1761 if (!is_idle_task(current)) { 1762 if (!is_idle_task(current)) {
1762 rdtp->dyntick_holdoff = jiffies - 1; 1763 rdtp->dyntick_holdoff = jiffies - 1;
1763 if (rcu_cpu_has_nonlazy_callbacks(cpu)) { 1764 if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
1764 trace_rcu_prep_idle("User dyntick with callbacks"); 1765 trace_rcu_prep_idle("User dyntick with callbacks");
1765 rdtp->idle_gp_timer_expires = 1766 rdtp->idle_gp_timer_expires =
1766 round_up(jiffies + RCU_IDLE_GP_DELAY, 1767 round_up(jiffies + RCU_IDLE_GP_DELAY,
1767 RCU_IDLE_GP_DELAY); 1768 RCU_IDLE_GP_DELAY);
1768 } else if (rcu_cpu_has_callbacks(cpu)) { 1769 } else if (rcu_cpu_has_callbacks(cpu)) {
1769 rdtp->idle_gp_timer_expires = 1770 rdtp->idle_gp_timer_expires =
1770 round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY); 1771 round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY);
1771 trace_rcu_prep_idle("User dyntick with lazy callbacks"); 1772 trace_rcu_prep_idle("User dyntick with lazy callbacks");
1772 } else { 1773 } else {
1773 return; 1774 return;
1774 } 1775 }
1775 tp = &rdtp->idle_gp_timer; 1776 tp = &rdtp->idle_gp_timer;
1776 mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); 1777 mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
1777 return; 1778 return;
1778 } 1779 }
1779 1780
1780 /* 1781 /*
1781 * If this is an idle re-entry, for example, due to use of 1782 * If this is an idle re-entry, for example, due to use of
1782 * RCU_NONIDLE() or the new idle-loop tracing API within the idle 1783 * RCU_NONIDLE() or the new idle-loop tracing API within the idle
1783 * loop, then don't take any state-machine actions, unless the 1784 * loop, then don't take any state-machine actions, unless the
1784 * momentary exit from idle queued additional non-lazy callbacks. 1785 * momentary exit from idle queued additional non-lazy callbacks.
1785 * Instead, repost the ->idle_gp_timer if this CPU has callbacks 1786 * Instead, repost the ->idle_gp_timer if this CPU has callbacks
1786 * pending. 1787 * pending.
1787 */ 1788 */
1788 if (!rdtp->idle_first_pass && 1789 if (!rdtp->idle_first_pass &&
1789 (rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) { 1790 (rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) {
1790 if (rcu_cpu_has_callbacks(cpu)) { 1791 if (rcu_cpu_has_callbacks(cpu)) {
1791 tp = &rdtp->idle_gp_timer; 1792 tp = &rdtp->idle_gp_timer;
1792 mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); 1793 mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
1793 } 1794 }
1794 return; 1795 return;
1795 } 1796 }
1796 rdtp->idle_first_pass = 0; 1797 rdtp->idle_first_pass = 0;
1797 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1; 1798 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1;
1798 1799
1799 /* 1800 /*
1800 * If there are no callbacks on this CPU, enter dyntick-idle mode. 1801 * If there are no callbacks on this CPU, enter dyntick-idle mode.
1801 * Also reset state to avoid prejudicing later attempts. 1802 * Also reset state to avoid prejudicing later attempts.
1802 */ 1803 */
1803 if (!rcu_cpu_has_callbacks(cpu)) { 1804 if (!rcu_cpu_has_callbacks(cpu)) {
1804 rdtp->dyntick_holdoff = jiffies - 1; 1805 rdtp->dyntick_holdoff = jiffies - 1;
1805 rdtp->dyntick_drain = 0; 1806 rdtp->dyntick_drain = 0;
1806 trace_rcu_prep_idle("No callbacks"); 1807 trace_rcu_prep_idle("No callbacks");
1807 return; 1808 return;
1808 } 1809 }
1809 1810
1810 /* 1811 /*
1811 * If in holdoff mode, just return. We will presumably have 1812 * If in holdoff mode, just return. We will presumably have
1812 * refrained from disabling the scheduling-clock tick. 1813 * refrained from disabling the scheduling-clock tick.
1813 */ 1814 */
1814 if (rdtp->dyntick_holdoff == jiffies) { 1815 if (rdtp->dyntick_holdoff == jiffies) {
1815 trace_rcu_prep_idle("In holdoff"); 1816 trace_rcu_prep_idle("In holdoff");
1816 return; 1817 return;
1817 } 1818 }
1818 1819
1819 /* Check and update the ->dyntick_drain sequencing. */ 1820 /* Check and update the ->dyntick_drain sequencing. */
1820 if (rdtp->dyntick_drain <= 0) { 1821 if (rdtp->dyntick_drain <= 0) {
1821 /* First time through, initialize the counter. */ 1822 /* First time through, initialize the counter. */
1822 rdtp->dyntick_drain = RCU_IDLE_FLUSHES; 1823 rdtp->dyntick_drain = RCU_IDLE_FLUSHES;
1823 } else if (rdtp->dyntick_drain <= RCU_IDLE_OPT_FLUSHES && 1824 } else if (rdtp->dyntick_drain <= RCU_IDLE_OPT_FLUSHES &&
1824 !rcu_pending(cpu) && 1825 !rcu_pending(cpu) &&
1825 !local_softirq_pending()) { 1826 !local_softirq_pending()) {
1826 /* Can we go dyntick-idle despite still having callbacks? */ 1827 /* Can we go dyntick-idle despite still having callbacks? */
1827 rdtp->dyntick_drain = 0; 1828 rdtp->dyntick_drain = 0;
1828 rdtp->dyntick_holdoff = jiffies; 1829 rdtp->dyntick_holdoff = jiffies;
1829 if (rcu_cpu_has_nonlazy_callbacks(cpu)) { 1830 if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
1830 trace_rcu_prep_idle("Dyntick with callbacks"); 1831 trace_rcu_prep_idle("Dyntick with callbacks");
1831 rdtp->idle_gp_timer_expires = 1832 rdtp->idle_gp_timer_expires =
1832 round_up(jiffies + RCU_IDLE_GP_DELAY, 1833 round_up(jiffies + RCU_IDLE_GP_DELAY,
1833 RCU_IDLE_GP_DELAY); 1834 RCU_IDLE_GP_DELAY);
1834 } else { 1835 } else {
1835 rdtp->idle_gp_timer_expires = 1836 rdtp->idle_gp_timer_expires =
1836 round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY); 1837 round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY);
1837 trace_rcu_prep_idle("Dyntick with lazy callbacks"); 1838 trace_rcu_prep_idle("Dyntick with lazy callbacks");
1838 } 1839 }
1839 tp = &rdtp->idle_gp_timer; 1840 tp = &rdtp->idle_gp_timer;
1840 mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); 1841 mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
1841 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; 1842 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
1842 return; /* Nothing more to do immediately. */ 1843 return; /* Nothing more to do immediately. */
1843 } else if (--(rdtp->dyntick_drain) <= 0) { 1844 } else if (--(rdtp->dyntick_drain) <= 0) {
1844 /* We have hit the limit, so time to give up. */ 1845 /* We have hit the limit, so time to give up. */
1845 rdtp->dyntick_holdoff = jiffies; 1846 rdtp->dyntick_holdoff = jiffies;
1846 trace_rcu_prep_idle("Begin holdoff"); 1847 trace_rcu_prep_idle("Begin holdoff");
1847 invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ 1848 invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */
1848 return; 1849 return;
1849 } 1850 }
1850 1851
1851 /* 1852 /*
1852 * Do one step of pushing the remaining RCU callbacks through 1853 * Do one step of pushing the remaining RCU callbacks through
1853 * the RCU core state machine. 1854 * the RCU core state machine.
1854 */ 1855 */
1855 #ifdef CONFIG_TREE_PREEMPT_RCU 1856 #ifdef CONFIG_TREE_PREEMPT_RCU
1856 if (per_cpu(rcu_preempt_data, cpu).nxtlist) { 1857 if (per_cpu(rcu_preempt_data, cpu).nxtlist) {
1857 rcu_preempt_qs(cpu); 1858 rcu_preempt_qs(cpu);
1858 force_quiescent_state(&rcu_preempt_state); 1859 force_quiescent_state(&rcu_preempt_state);
1859 } 1860 }
1860 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 1861 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1861 if (per_cpu(rcu_sched_data, cpu).nxtlist) { 1862 if (per_cpu(rcu_sched_data, cpu).nxtlist) {
1862 rcu_sched_qs(cpu); 1863 rcu_sched_qs(cpu);
1863 force_quiescent_state(&rcu_sched_state); 1864 force_quiescent_state(&rcu_sched_state);
1864 } 1865 }
1865 if (per_cpu(rcu_bh_data, cpu).nxtlist) { 1866 if (per_cpu(rcu_bh_data, cpu).nxtlist) {
1866 rcu_bh_qs(cpu); 1867 rcu_bh_qs(cpu);
1867 force_quiescent_state(&rcu_bh_state); 1868 force_quiescent_state(&rcu_bh_state);
1868 } 1869 }
1869 1870
1870 /* 1871 /*
1871 * If RCU callbacks are still pending, RCU still needs this CPU. 1872 * If RCU callbacks are still pending, RCU still needs this CPU.
1872 * So try forcing the callbacks through the grace period. 1873 * So try forcing the callbacks through the grace period.
1873 */ 1874 */
1874 if (rcu_cpu_has_callbacks(cpu)) { 1875 if (rcu_cpu_has_callbacks(cpu)) {
1875 trace_rcu_prep_idle("More callbacks"); 1876 trace_rcu_prep_idle("More callbacks");
1876 invoke_rcu_core(); 1877 invoke_rcu_core();
1877 } else { 1878 } else {
1878 trace_rcu_prep_idle("Callbacks drained"); 1879 trace_rcu_prep_idle("Callbacks drained");
1879 } 1880 }
1880 } 1881 }
1881 1882
1882 /* 1883 /*
1883 * Keep a running count of the number of non-lazy callbacks posted 1884 * Keep a running count of the number of non-lazy callbacks posted
1884 * on this CPU. This running counter (which is never decremented) allows 1885 * on this CPU. This running counter (which is never decremented) allows
1885 * rcu_prepare_for_idle() to detect when something out of the idle loop 1886 * rcu_prepare_for_idle() to detect when something out of the idle loop
1886 * posts a callback, even if an equal number of callbacks are invoked. 1887 * posts a callback, even if an equal number of callbacks are invoked.
1887 * Of course, callbacks should only be posted from within a trace event 1888 * Of course, callbacks should only be posted from within a trace event
1888 * designed to be called from idle or from within RCU_NONIDLE(). 1889 * designed to be called from idle or from within RCU_NONIDLE().
1889 */ 1890 */
1890 static void rcu_idle_count_callbacks_posted(void) 1891 static void rcu_idle_count_callbacks_posted(void)
1891 { 1892 {
1892 __this_cpu_add(rcu_dynticks.nonlazy_posted, 1); 1893 __this_cpu_add(rcu_dynticks.nonlazy_posted, 1);
1893 } 1894 }
1894 1895
1895 /* 1896 /*
1896 * Data for flushing lazy RCU callbacks at OOM time. 1897 * Data for flushing lazy RCU callbacks at OOM time.
1897 */ 1898 */
1898 static atomic_t oom_callback_count; 1899 static atomic_t oom_callback_count;
1899 static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq); 1900 static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq);
1900 1901
1901 /* 1902 /*
1902 * RCU OOM callback -- decrement the outstanding count and deliver the 1903 * RCU OOM callback -- decrement the outstanding count and deliver the
1903 * wake-up if we are the last one. 1904 * wake-up if we are the last one.
1904 */ 1905 */
1905 static void rcu_oom_callback(struct rcu_head *rhp) 1906 static void rcu_oom_callback(struct rcu_head *rhp)
1906 { 1907 {
1907 if (atomic_dec_and_test(&oom_callback_count)) 1908 if (atomic_dec_and_test(&oom_callback_count))
1908 wake_up(&oom_callback_wq); 1909 wake_up(&oom_callback_wq);
1909 } 1910 }
1910 1911
1911 /* 1912 /*
1912 * Post an rcu_oom_notify callback on the current CPU if it has at 1913 * Post an rcu_oom_notify callback on the current CPU if it has at
1913 * least one lazy callback. This will unnecessarily post callbacks 1914 * least one lazy callback. This will unnecessarily post callbacks
1914 * to CPUs that already have a non-lazy callback at the end of their 1915 * to CPUs that already have a non-lazy callback at the end of their
1915 * callback list, but this is an infrequent operation, so accept some 1916 * callback list, but this is an infrequent operation, so accept some
1916 * extra overhead to keep things simple. 1917 * extra overhead to keep things simple.
1917 */ 1918 */
1918 static void rcu_oom_notify_cpu(void *unused) 1919 static void rcu_oom_notify_cpu(void *unused)
1919 { 1920 {
1920 struct rcu_state *rsp; 1921 struct rcu_state *rsp;
1921 struct rcu_data *rdp; 1922 struct rcu_data *rdp;
1922 1923
1923 for_each_rcu_flavor(rsp) { 1924 for_each_rcu_flavor(rsp) {
1924 rdp = __this_cpu_ptr(rsp->rda); 1925 rdp = __this_cpu_ptr(rsp->rda);
1925 if (rdp->qlen_lazy != 0) { 1926 if (rdp->qlen_lazy != 0) {
1926 atomic_inc(&oom_callback_count); 1927 atomic_inc(&oom_callback_count);
1927 rsp->call(&rdp->oom_head, rcu_oom_callback); 1928 rsp->call(&rdp->oom_head, rcu_oom_callback);
1928 } 1929 }
1929 } 1930 }
1930 } 1931 }
1931 1932
1932 /* 1933 /*
1933 * If low on memory, ensure that each CPU has a non-lazy callback. 1934 * If low on memory, ensure that each CPU has a non-lazy callback.
1934 * This will wake up CPUs that have only lazy callbacks, in turn 1935 * This will wake up CPUs that have only lazy callbacks, in turn
1935 * ensuring that they free up the corresponding memory in a timely manner. 1936 * ensuring that they free up the corresponding memory in a timely manner.
1936 * Because an uncertain amount of memory will be freed in some uncertain 1937 * Because an uncertain amount of memory will be freed in some uncertain
1937 * timeframe, we do not claim to have freed anything. 1938 * timeframe, we do not claim to have freed anything.
1938 */ 1939 */
1939 static int rcu_oom_notify(struct notifier_block *self, 1940 static int rcu_oom_notify(struct notifier_block *self,
1940 unsigned long notused, void *nfreed) 1941 unsigned long notused, void *nfreed)
1941 { 1942 {
1942 int cpu; 1943 int cpu;
1943 1944
1944 /* Wait for callbacks from earlier instance to complete. */ 1945 /* Wait for callbacks from earlier instance to complete. */
1945 wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0); 1946 wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0);
1946 1947
1947 /* 1948 /*
1948 * Prevent premature wakeup: ensure that all increments happen 1949 * Prevent premature wakeup: ensure that all increments happen
1949 * before there is a chance of the counter reaching zero. 1950 * before there is a chance of the counter reaching zero.
1950 */ 1951 */
1951 atomic_set(&oom_callback_count, 1); 1952 atomic_set(&oom_callback_count, 1);
1952 1953
1953 get_online_cpus(); 1954 get_online_cpus();
1954 for_each_online_cpu(cpu) { 1955 for_each_online_cpu(cpu) {
1955 smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1); 1956 smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
1956 cond_resched(); 1957 cond_resched();
1957 } 1958 }
1958 put_online_cpus(); 1959 put_online_cpus();
1959 1960
1960 /* Unconditionally decrement: no need to wake ourselves up. */ 1961 /* Unconditionally decrement: no need to wake ourselves up. */
1961 atomic_dec(&oom_callback_count); 1962 atomic_dec(&oom_callback_count);
1962 1963
1963 return NOTIFY_OK; 1964 return NOTIFY_OK;
1964 } 1965 }
1965 1966
1966 static struct notifier_block rcu_oom_nb = { 1967 static struct notifier_block rcu_oom_nb = {
1967 .notifier_call = rcu_oom_notify 1968 .notifier_call = rcu_oom_notify
1968 }; 1969 };
1969 1970
1970 static int __init rcu_register_oom_notifier(void) 1971 static int __init rcu_register_oom_notifier(void)
1971 { 1972 {
1972 register_oom_notifier(&rcu_oom_nb); 1973 register_oom_notifier(&rcu_oom_nb);
1973 return 0; 1974 return 0;
1974 } 1975 }
1975 early_initcall(rcu_register_oom_notifier); 1976 early_initcall(rcu_register_oom_notifier);
1976 1977
1977 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 1978 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
1978 1979
1979 #ifdef CONFIG_RCU_CPU_STALL_INFO 1980 #ifdef CONFIG_RCU_CPU_STALL_INFO
1980 1981
1981 #ifdef CONFIG_RCU_FAST_NO_HZ 1982 #ifdef CONFIG_RCU_FAST_NO_HZ
1982 1983
1983 static void print_cpu_stall_fast_no_hz(char *cp, int cpu) 1984 static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
1984 { 1985 {
1985 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1986 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1986 struct timer_list *tltp = &rdtp->idle_gp_timer; 1987 struct timer_list *tltp = &rdtp->idle_gp_timer;
1987 char c; 1988 char c;
1988 1989
1989 c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.'; 1990 c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.';
1990 if (timer_pending(tltp)) 1991 if (timer_pending(tltp))
1991 sprintf(cp, "drain=%d %c timer=%lu", 1992 sprintf(cp, "drain=%d %c timer=%lu",
1992 rdtp->dyntick_drain, c, tltp->expires - jiffies); 1993 rdtp->dyntick_drain, c, tltp->expires - jiffies);
1993 else 1994 else
1994 sprintf(cp, "drain=%d %c timer not pending", 1995 sprintf(cp, "drain=%d %c timer not pending",
1995 rdtp->dyntick_drain, c); 1996 rdtp->dyntick_drain, c);
1996 } 1997 }
1997 1998
1998 #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 1999 #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
1999 2000
2000 static void print_cpu_stall_fast_no_hz(char *cp, int cpu) 2001 static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
2001 { 2002 {
2002 *cp = '\0'; 2003 *cp = '\0';
2003 } 2004 }
2004 2005
2005 #endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */ 2006 #endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
2006 2007
2007 /* Initiate the stall-info list. */ 2008 /* Initiate the stall-info list. */
2008 static void print_cpu_stall_info_begin(void) 2009 static void print_cpu_stall_info_begin(void)
2009 { 2010 {
2010 printk(KERN_CONT "\n"); 2011 printk(KERN_CONT "\n");
2011 } 2012 }
2012 2013
2013 /* 2014 /*
2014 * Print out diagnostic information for the specified stalled CPU. 2015 * Print out diagnostic information for the specified stalled CPU.
2015 * 2016 *
2016 * If the specified CPU is aware of the current RCU grace period 2017 * If the specified CPU is aware of the current RCU grace period
2017 * (flavor specified by rsp), then print the number of scheduling 2018 * (flavor specified by rsp), then print the number of scheduling
2018 * clock interrupts the CPU has taken during the time that it has 2019 * clock interrupts the CPU has taken during the time that it has
2019 * been aware. Otherwise, print the number of RCU grace periods 2020 * been aware. Otherwise, print the number of RCU grace periods
2020 * that this CPU is ignorant of, for example, "1" if the CPU was 2021 * that this CPU is ignorant of, for example, "1" if the CPU was
2021 * aware of the previous grace period. 2022 * aware of the previous grace period.
2022 * 2023 *
2023 * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info. 2024 * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.
2024 */ 2025 */
2025 static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) 2026 static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
2026 { 2027 {
2027 char fast_no_hz[72]; 2028 char fast_no_hz[72];
2028 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2029 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2029 struct rcu_dynticks *rdtp = rdp->dynticks; 2030 struct rcu_dynticks *rdtp = rdp->dynticks;
2030 char *ticks_title; 2031 char *ticks_title;
2031 unsigned long ticks_value; 2032 unsigned long ticks_value;
2032 2033
2033 if (rsp->gpnum == rdp->gpnum) { 2034 if (rsp->gpnum == rdp->gpnum) {
2034 ticks_title = "ticks this GP"; 2035 ticks_title = "ticks this GP";
2035 ticks_value = rdp->ticks_this_gp; 2036 ticks_value = rdp->ticks_this_gp;
2036 } else { 2037 } else {
2037 ticks_title = "GPs behind"; 2038 ticks_title = "GPs behind";
2038 ticks_value = rsp->gpnum - rdp->gpnum; 2039 ticks_value = rsp->gpnum - rdp->gpnum;
2039 } 2040 }
2040 print_cpu_stall_fast_no_hz(fast_no_hz, cpu); 2041 print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
2041 printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d %s\n", 2042 printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d %s\n",
2042 cpu, ticks_value, ticks_title, 2043 cpu, ticks_value, ticks_title,
2043 atomic_read(&rdtp->dynticks) & 0xfff, 2044 atomic_read(&rdtp->dynticks) & 0xfff,
2044 rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, 2045 rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
2045 fast_no_hz); 2046 fast_no_hz);
2046 } 2047 }
2047 2048
2048 /* Terminate the stall-info list. */ 2049 /* Terminate the stall-info list. */
2049 static void print_cpu_stall_info_end(void) 2050 static void print_cpu_stall_info_end(void)
2050 { 2051 {
2051 printk(KERN_ERR "\t"); 2052 printk(KERN_ERR "\t");
2052 } 2053 }
2053 2054
2054 /* Zero ->ticks_this_gp for all flavors of RCU. */ 2055 /* Zero ->ticks_this_gp for all flavors of RCU. */
2055 static void zero_cpu_stall_ticks(struct rcu_data *rdp) 2056 static void zero_cpu_stall_ticks(struct rcu_data *rdp)
2056 { 2057 {
2057 rdp->ticks_this_gp = 0; 2058 rdp->ticks_this_gp = 0;
2058 } 2059 }
2059 2060
2060 /* Increment ->ticks_this_gp for all flavors of RCU. */ 2061 /* Increment ->ticks_this_gp for all flavors of RCU. */
2061 static void increment_cpu_stall_ticks(void) 2062 static void increment_cpu_stall_ticks(void)
2062 { 2063 {
2063 struct rcu_state *rsp; 2064 struct rcu_state *rsp;
2064 2065
2065 for_each_rcu_flavor(rsp) 2066 for_each_rcu_flavor(rsp)
2066 __this_cpu_ptr(rsp->rda)->ticks_this_gp++; 2067 __this_cpu_ptr(rsp->rda)->ticks_this_gp++;
2067 } 2068 }
2068 2069
2069 #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ 2070 #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
2070 2071
2071 static void print_cpu_stall_info_begin(void) 2072 static void print_cpu_stall_info_begin(void)
2072 { 2073 {
2073 printk(KERN_CONT " {"); 2074 printk(KERN_CONT " {");
2074 } 2075 }
2075 2076
2076 static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) 2077 static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
2077 { 2078 {
2078 printk(KERN_CONT " %d", cpu); 2079 printk(KERN_CONT " %d", cpu);
2079 } 2080 }
2080 2081
2081 static void print_cpu_stall_info_end(void) 2082 static void print_cpu_stall_info_end(void)
2082 { 2083 {
2083 printk(KERN_CONT "} "); 2084 printk(KERN_CONT "} ");
2084 } 2085 }
2085 2086
2086 static void zero_cpu_stall_ticks(struct rcu_data *rdp) 2087 static void zero_cpu_stall_ticks(struct rcu_data *rdp)
2087 { 2088 {
2088 } 2089 }
2089 2090
2090 static void increment_cpu_stall_ticks(void) 2091 static void increment_cpu_stall_ticks(void)
2091 { 2092 {
2092 } 2093 }
2093 2094
2094 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */ 2095 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
2095 2096