Commit 26845c2860cebebe6ce2d9d01ae3cb3db84b7e29

Authored by Paul E. McKenney
1 parent c68de2097a

rcu: print boot-time console messages if RCU configs out of ordinary

Print boot-time messages if tracing is enabled, if fanout is set
to non-default values, if exact fanout is specified, if accelerated
dyntick-idle grace periods have been enabled, if RCU-lockdep is enabled,
if rcutorture has been boot-time enabled, if the CPU stall detector has
been disabled, or if four-level hierarchy has been enabled.

This is all for TREE_RCU and TREE_PREEMPT_RCU.  TINY_RCU will be handled
separately, if at all.

Suggested-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

Showing 2 changed files with 42 additions and 8 deletions Inline Diff

1 /* 1 /*
2 * Read-Copy Update mechanism for mutual exclusion 2 * Read-Copy Update mechanism for mutual exclusion
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * 17 *
18 * Copyright IBM Corporation, 2008 18 * Copyright IBM Corporation, 2008
19 * 19 *
20 * Authors: Dipankar Sarma <dipankar@in.ibm.com> 20 * Authors: Dipankar Sarma <dipankar@in.ibm.com>
21 * Manfred Spraul <manfred@colorfullife.com> 21 * Manfred Spraul <manfred@colorfullife.com>
22 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version 22 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version
23 * 23 *
24 * Based on the original work by Paul McKenney <paulmck@us.ibm.com> 24 * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
25 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 25 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
26 * 26 *
27 * For detailed explanation of Read-Copy Update mechanism see - 27 * For detailed explanation of Read-Copy Update mechanism see -
28 * Documentation/RCU 28 * Documentation/RCU
29 */ 29 */
30 #include <linux/types.h> 30 #include <linux/types.h>
31 #include <linux/kernel.h> 31 #include <linux/kernel.h>
32 #include <linux/init.h> 32 #include <linux/init.h>
33 #include <linux/spinlock.h> 33 #include <linux/spinlock.h>
34 #include <linux/smp.h> 34 #include <linux/smp.h>
35 #include <linux/rcupdate.h> 35 #include <linux/rcupdate.h>
36 #include <linux/interrupt.h> 36 #include <linux/interrupt.h>
37 #include <linux/sched.h> 37 #include <linux/sched.h>
38 #include <linux/nmi.h> 38 #include <linux/nmi.h>
39 #include <asm/atomic.h> 39 #include <asm/atomic.h>
40 #include <linux/bitops.h> 40 #include <linux/bitops.h>
41 #include <linux/module.h> 41 #include <linux/module.h>
42 #include <linux/completion.h> 42 #include <linux/completion.h>
43 #include <linux/moduleparam.h> 43 #include <linux/moduleparam.h>
44 #include <linux/percpu.h> 44 #include <linux/percpu.h>
45 #include <linux/notifier.h> 45 #include <linux/notifier.h>
46 #include <linux/cpu.h> 46 #include <linux/cpu.h>
47 #include <linux/mutex.h> 47 #include <linux/mutex.h>
48 #include <linux/time.h> 48 #include <linux/time.h>
49 #include <linux/kernel_stat.h> 49 #include <linux/kernel_stat.h>
50 50
51 #include "rcutree.h" 51 #include "rcutree.h"
52 52
53 /* Data structures. */ 53 /* Data structures. */
54 54
55 static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; 55 static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
56 56
57 #define RCU_STATE_INITIALIZER(name) { \ 57 #define RCU_STATE_INITIALIZER(name) { \
58 .level = { &name.node[0] }, \ 58 .level = { &name.node[0] }, \
59 .levelcnt = { \ 59 .levelcnt = { \
60 NUM_RCU_LVL_0, /* root of hierarchy. */ \ 60 NUM_RCU_LVL_0, /* root of hierarchy. */ \
61 NUM_RCU_LVL_1, \ 61 NUM_RCU_LVL_1, \
62 NUM_RCU_LVL_2, \ 62 NUM_RCU_LVL_2, \
63 NUM_RCU_LVL_3, \ 63 NUM_RCU_LVL_3, \
64 NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ 64 NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \
65 }, \ 65 }, \
66 .signaled = RCU_GP_IDLE, \ 66 .signaled = RCU_GP_IDLE, \
67 .gpnum = -300, \ 67 .gpnum = -300, \
68 .completed = -300, \ 68 .completed = -300, \
69 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&name.onofflock), \ 69 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&name.onofflock), \
70 .orphan_cbs_list = NULL, \ 70 .orphan_cbs_list = NULL, \
71 .orphan_cbs_tail = &name.orphan_cbs_list, \ 71 .orphan_cbs_tail = &name.orphan_cbs_list, \
72 .orphan_qlen = 0, \ 72 .orphan_qlen = 0, \
73 .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&name.fqslock), \ 73 .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&name.fqslock), \
74 .n_force_qs = 0, \ 74 .n_force_qs = 0, \
75 .n_force_qs_ngp = 0, \ 75 .n_force_qs_ngp = 0, \
76 } 76 }
77 77
78 struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state); 78 struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state);
79 DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); 79 DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
80 80
81 struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); 81 struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
82 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 82 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
83 83
84 int rcu_scheduler_active __read_mostly; 84 int rcu_scheduler_active __read_mostly;
85 EXPORT_SYMBOL_GPL(rcu_scheduler_active); 85 EXPORT_SYMBOL_GPL(rcu_scheduler_active);
86 86
87 /* 87 /*
88 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s 88 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
89 * permit this function to be invoked without holding the root rcu_node 89 * permit this function to be invoked without holding the root rcu_node
90 * structure's ->lock, but of course results can be subject to change. 90 * structure's ->lock, but of course results can be subject to change.
91 */ 91 */
92 static int rcu_gp_in_progress(struct rcu_state *rsp) 92 static int rcu_gp_in_progress(struct rcu_state *rsp)
93 { 93 {
94 return ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum); 94 return ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum);
95 } 95 }
96 96
97 /* 97 /*
98 * Note a quiescent state. Because we do not need to know 98 * Note a quiescent state. Because we do not need to know
99 * how many quiescent states passed, just if there was at least 99 * how many quiescent states passed, just if there was at least
100 * one since the start of the grace period, this just sets a flag. 100 * one since the start of the grace period, this just sets a flag.
101 */ 101 */
102 void rcu_sched_qs(int cpu) 102 void rcu_sched_qs(int cpu)
103 { 103 {
104 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); 104 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
105 105
106 rdp->passed_quiesc_completed = rdp->gpnum - 1; 106 rdp->passed_quiesc_completed = rdp->gpnum - 1;
107 barrier(); 107 barrier();
108 rdp->passed_quiesc = 1; 108 rdp->passed_quiesc = 1;
109 } 109 }
110 110
111 void rcu_bh_qs(int cpu) 111 void rcu_bh_qs(int cpu)
112 { 112 {
113 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); 113 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
114 114
115 rdp->passed_quiesc_completed = rdp->gpnum - 1; 115 rdp->passed_quiesc_completed = rdp->gpnum - 1;
116 barrier(); 116 barrier();
117 rdp->passed_quiesc = 1; 117 rdp->passed_quiesc = 1;
118 } 118 }
119 119
120 /* 120 /*
121 * Note a context switch. This is a quiescent state for RCU-sched, 121 * Note a context switch. This is a quiescent state for RCU-sched,
122 * and requires special handling for preemptible RCU. 122 * and requires special handling for preemptible RCU.
123 */ 123 */
124 void rcu_note_context_switch(int cpu) 124 void rcu_note_context_switch(int cpu)
125 { 125 {
126 rcu_sched_qs(cpu); 126 rcu_sched_qs(cpu);
127 rcu_preempt_note_context_switch(cpu); 127 rcu_preempt_note_context_switch(cpu);
128 } 128 }
129 129
130 #ifdef CONFIG_NO_HZ 130 #ifdef CONFIG_NO_HZ
131 DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 131 DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
132 .dynticks_nesting = 1, 132 .dynticks_nesting = 1,
133 .dynticks = 1, 133 .dynticks = 1,
134 }; 134 };
135 #endif /* #ifdef CONFIG_NO_HZ */ 135 #endif /* #ifdef CONFIG_NO_HZ */
136 136
137 static int blimit = 10; /* Maximum callbacks per softirq. */ 137 static int blimit = 10; /* Maximum callbacks per softirq. */
138 static int qhimark = 10000; /* If this many pending, ignore blimit. */ 138 static int qhimark = 10000; /* If this many pending, ignore blimit. */
139 static int qlowmark = 100; /* Once only this many pending, use blimit. */ 139 static int qlowmark = 100; /* Once only this many pending, use blimit. */
140 140
141 module_param(blimit, int, 0); 141 module_param(blimit, int, 0);
142 module_param(qhimark, int, 0); 142 module_param(qhimark, int, 0);
143 module_param(qlowmark, int, 0); 143 module_param(qlowmark, int, 0);
144 144
145 static void force_quiescent_state(struct rcu_state *rsp, int relaxed); 145 static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
146 static int rcu_pending(int cpu); 146 static int rcu_pending(int cpu);
147 147
148 /* 148 /*
149 * Return the number of RCU-sched batches processed thus far for debug & stats. 149 * Return the number of RCU-sched batches processed thus far for debug & stats.
150 */ 150 */
151 long rcu_batches_completed_sched(void) 151 long rcu_batches_completed_sched(void)
152 { 152 {
153 return rcu_sched_state.completed; 153 return rcu_sched_state.completed;
154 } 154 }
155 EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); 155 EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
156 156
157 /* 157 /*
158 * Return the number of RCU BH batches processed thus far for debug & stats. 158 * Return the number of RCU BH batches processed thus far for debug & stats.
159 */ 159 */
160 long rcu_batches_completed_bh(void) 160 long rcu_batches_completed_bh(void)
161 { 161 {
162 return rcu_bh_state.completed; 162 return rcu_bh_state.completed;
163 } 163 }
164 EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); 164 EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
165 165
166 /* 166 /*
167 * Force a quiescent state for RCU BH. 167 * Force a quiescent state for RCU BH.
168 */ 168 */
169 void rcu_bh_force_quiescent_state(void) 169 void rcu_bh_force_quiescent_state(void)
170 { 170 {
171 force_quiescent_state(&rcu_bh_state, 0); 171 force_quiescent_state(&rcu_bh_state, 0);
172 } 172 }
173 EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); 173 EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
174 174
175 /* 175 /*
176 * Force a quiescent state for RCU-sched. 176 * Force a quiescent state for RCU-sched.
177 */ 177 */
178 void rcu_sched_force_quiescent_state(void) 178 void rcu_sched_force_quiescent_state(void)
179 { 179 {
180 force_quiescent_state(&rcu_sched_state, 0); 180 force_quiescent_state(&rcu_sched_state, 0);
181 } 181 }
182 EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); 182 EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
183 183
184 /* 184 /*
185 * Does the CPU have callbacks ready to be invoked? 185 * Does the CPU have callbacks ready to be invoked?
186 */ 186 */
187 static int 187 static int
188 cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp) 188 cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
189 { 189 {
190 return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]; 190 return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL];
191 } 191 }
192 192
193 /* 193 /*
194 * Does the current CPU require a yet-as-unscheduled grace period? 194 * Does the current CPU require a yet-as-unscheduled grace period?
195 */ 195 */
196 static int 196 static int
197 cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) 197 cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
198 { 198 {
199 return *rdp->nxttail[RCU_DONE_TAIL] && !rcu_gp_in_progress(rsp); 199 return *rdp->nxttail[RCU_DONE_TAIL] && !rcu_gp_in_progress(rsp);
200 } 200 }
201 201
202 /* 202 /*
203 * Return the root node of the specified rcu_state structure. 203 * Return the root node of the specified rcu_state structure.
204 */ 204 */
205 static struct rcu_node *rcu_get_root(struct rcu_state *rsp) 205 static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
206 { 206 {
207 return &rsp->node[0]; 207 return &rsp->node[0];
208 } 208 }
209 209
210 #ifdef CONFIG_SMP 210 #ifdef CONFIG_SMP
211 211
212 /* 212 /*
213 * If the specified CPU is offline, tell the caller that it is in 213 * If the specified CPU is offline, tell the caller that it is in
214 * a quiescent state. Otherwise, whack it with a reschedule IPI. 214 * a quiescent state. Otherwise, whack it with a reschedule IPI.
215 * Grace periods can end up waiting on an offline CPU when that 215 * Grace periods can end up waiting on an offline CPU when that
216 * CPU is in the process of coming online -- it will be added to the 216 * CPU is in the process of coming online -- it will be added to the
217 * rcu_node bitmasks before it actually makes it online. The same thing 217 * rcu_node bitmasks before it actually makes it online. The same thing
218 * can happen while a CPU is in the process of coming online. Because this 218 * can happen while a CPU is in the process of coming online. Because this
219 * race is quite rare, we check for it after detecting that the grace 219 * race is quite rare, we check for it after detecting that the grace
220 * period has been delayed rather than checking each and every CPU 220 * period has been delayed rather than checking each and every CPU
221 * each and every time we start a new grace period. 221 * each and every time we start a new grace period.
222 */ 222 */
223 static int rcu_implicit_offline_qs(struct rcu_data *rdp) 223 static int rcu_implicit_offline_qs(struct rcu_data *rdp)
224 { 224 {
225 /* 225 /*
226 * If the CPU is offline, it is in a quiescent state. We can 226 * If the CPU is offline, it is in a quiescent state. We can
227 * trust its state not to change because interrupts are disabled. 227 * trust its state not to change because interrupts are disabled.
228 */ 228 */
229 if (cpu_is_offline(rdp->cpu)) { 229 if (cpu_is_offline(rdp->cpu)) {
230 rdp->offline_fqs++; 230 rdp->offline_fqs++;
231 return 1; 231 return 1;
232 } 232 }
233 233
234 /* If preemptable RCU, no point in sending reschedule IPI. */ 234 /* If preemptable RCU, no point in sending reschedule IPI. */
235 if (rdp->preemptable) 235 if (rdp->preemptable)
236 return 0; 236 return 0;
237 237
238 /* The CPU is online, so send it a reschedule IPI. */ 238 /* The CPU is online, so send it a reschedule IPI. */
239 if (rdp->cpu != smp_processor_id()) 239 if (rdp->cpu != smp_processor_id())
240 smp_send_reschedule(rdp->cpu); 240 smp_send_reschedule(rdp->cpu);
241 else 241 else
242 set_need_resched(); 242 set_need_resched();
243 rdp->resched_ipi++; 243 rdp->resched_ipi++;
244 return 0; 244 return 0;
245 } 245 }
246 246
247 #endif /* #ifdef CONFIG_SMP */ 247 #endif /* #ifdef CONFIG_SMP */
248 248
249 #ifdef CONFIG_NO_HZ 249 #ifdef CONFIG_NO_HZ
250 250
251 /** 251 /**
252 * rcu_enter_nohz - inform RCU that current CPU is entering nohz 252 * rcu_enter_nohz - inform RCU that current CPU is entering nohz
253 * 253 *
254 * Enter nohz mode, in other words, -leave- the mode in which RCU 254 * Enter nohz mode, in other words, -leave- the mode in which RCU
255 * read-side critical sections can occur. (Though RCU read-side 255 * read-side critical sections can occur. (Though RCU read-side
256 * critical sections can occur in irq handlers in nohz mode, a possibility 256 * critical sections can occur in irq handlers in nohz mode, a possibility
257 * handled by rcu_irq_enter() and rcu_irq_exit()). 257 * handled by rcu_irq_enter() and rcu_irq_exit()).
258 */ 258 */
259 void rcu_enter_nohz(void) 259 void rcu_enter_nohz(void)
260 { 260 {
261 unsigned long flags; 261 unsigned long flags;
262 struct rcu_dynticks *rdtp; 262 struct rcu_dynticks *rdtp;
263 263
264 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ 264 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
265 local_irq_save(flags); 265 local_irq_save(flags);
266 rdtp = &__get_cpu_var(rcu_dynticks); 266 rdtp = &__get_cpu_var(rcu_dynticks);
267 rdtp->dynticks++; 267 rdtp->dynticks++;
268 rdtp->dynticks_nesting--; 268 rdtp->dynticks_nesting--;
269 WARN_ON_ONCE(rdtp->dynticks & 0x1); 269 WARN_ON_ONCE(rdtp->dynticks & 0x1);
270 local_irq_restore(flags); 270 local_irq_restore(flags);
271 } 271 }
272 272
273 /* 273 /*
274 * rcu_exit_nohz - inform RCU that current CPU is leaving nohz 274 * rcu_exit_nohz - inform RCU that current CPU is leaving nohz
275 * 275 *
276 * Exit nohz mode, in other words, -enter- the mode in which RCU 276 * Exit nohz mode, in other words, -enter- the mode in which RCU
277 * read-side critical sections normally occur. 277 * read-side critical sections normally occur.
278 */ 278 */
279 void rcu_exit_nohz(void) 279 void rcu_exit_nohz(void)
280 { 280 {
281 unsigned long flags; 281 unsigned long flags;
282 struct rcu_dynticks *rdtp; 282 struct rcu_dynticks *rdtp;
283 283
284 local_irq_save(flags); 284 local_irq_save(flags);
285 rdtp = &__get_cpu_var(rcu_dynticks); 285 rdtp = &__get_cpu_var(rcu_dynticks);
286 rdtp->dynticks++; 286 rdtp->dynticks++;
287 rdtp->dynticks_nesting++; 287 rdtp->dynticks_nesting++;
288 WARN_ON_ONCE(!(rdtp->dynticks & 0x1)); 288 WARN_ON_ONCE(!(rdtp->dynticks & 0x1));
289 local_irq_restore(flags); 289 local_irq_restore(flags);
290 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ 290 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
291 } 291 }
292 292
293 /** 293 /**
294 * rcu_nmi_enter - inform RCU of entry to NMI context 294 * rcu_nmi_enter - inform RCU of entry to NMI context
295 * 295 *
296 * If the CPU was idle with dynamic ticks active, and there is no 296 * If the CPU was idle with dynamic ticks active, and there is no
297 * irq handler running, this updates rdtp->dynticks_nmi to let the 297 * irq handler running, this updates rdtp->dynticks_nmi to let the
298 * RCU grace-period handling know that the CPU is active. 298 * RCU grace-period handling know that the CPU is active.
299 */ 299 */
300 void rcu_nmi_enter(void) 300 void rcu_nmi_enter(void)
301 { 301 {
302 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 302 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
303 303
304 if (rdtp->dynticks & 0x1) 304 if (rdtp->dynticks & 0x1)
305 return; 305 return;
306 rdtp->dynticks_nmi++; 306 rdtp->dynticks_nmi++;
307 WARN_ON_ONCE(!(rdtp->dynticks_nmi & 0x1)); 307 WARN_ON_ONCE(!(rdtp->dynticks_nmi & 0x1));
308 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ 308 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
309 } 309 }
310 310
311 /** 311 /**
312 * rcu_nmi_exit - inform RCU of exit from NMI context 312 * rcu_nmi_exit - inform RCU of exit from NMI context
313 * 313 *
314 * If the CPU was idle with dynamic ticks active, and there is no 314 * If the CPU was idle with dynamic ticks active, and there is no
315 * irq handler running, this updates rdtp->dynticks_nmi to let the 315 * irq handler running, this updates rdtp->dynticks_nmi to let the
316 * RCU grace-period handling know that the CPU is no longer active. 316 * RCU grace-period handling know that the CPU is no longer active.
317 */ 317 */
318 void rcu_nmi_exit(void) 318 void rcu_nmi_exit(void)
319 { 319 {
320 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 320 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
321 321
322 if (rdtp->dynticks & 0x1) 322 if (rdtp->dynticks & 0x1)
323 return; 323 return;
324 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ 324 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
325 rdtp->dynticks_nmi++; 325 rdtp->dynticks_nmi++;
326 WARN_ON_ONCE(rdtp->dynticks_nmi & 0x1); 326 WARN_ON_ONCE(rdtp->dynticks_nmi & 0x1);
327 } 327 }
328 328
329 /** 329 /**
330 * rcu_irq_enter - inform RCU of entry to hard irq context 330 * rcu_irq_enter - inform RCU of entry to hard irq context
331 * 331 *
332 * If the CPU was idle with dynamic ticks active, this updates the 332 * If the CPU was idle with dynamic ticks active, this updates the
333 * rdtp->dynticks to let the RCU handling know that the CPU is active. 333 * rdtp->dynticks to let the RCU handling know that the CPU is active.
334 */ 334 */
335 void rcu_irq_enter(void) 335 void rcu_irq_enter(void)
336 { 336 {
337 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 337 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
338 338
339 if (rdtp->dynticks_nesting++) 339 if (rdtp->dynticks_nesting++)
340 return; 340 return;
341 rdtp->dynticks++; 341 rdtp->dynticks++;
342 WARN_ON_ONCE(!(rdtp->dynticks & 0x1)); 342 WARN_ON_ONCE(!(rdtp->dynticks & 0x1));
343 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ 343 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
344 } 344 }
345 345
346 /** 346 /**
347 * rcu_irq_exit - inform RCU of exit from hard irq context 347 * rcu_irq_exit - inform RCU of exit from hard irq context
348 * 348 *
349 * If the CPU was idle with dynamic ticks active, update the rdp->dynticks 349 * If the CPU was idle with dynamic ticks active, update the rdp->dynticks
350 * to put let the RCU handling be aware that the CPU is going back to idle 350 * to put let the RCU handling be aware that the CPU is going back to idle
351 * with no ticks. 351 * with no ticks.
352 */ 352 */
353 void rcu_irq_exit(void) 353 void rcu_irq_exit(void)
354 { 354 {
355 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 355 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
356 356
357 if (--rdtp->dynticks_nesting) 357 if (--rdtp->dynticks_nesting)
358 return; 358 return;
359 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ 359 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
360 rdtp->dynticks++; 360 rdtp->dynticks++;
361 WARN_ON_ONCE(rdtp->dynticks & 0x1); 361 WARN_ON_ONCE(rdtp->dynticks & 0x1);
362 362
363 /* If the interrupt queued a callback, get out of dyntick mode. */ 363 /* If the interrupt queued a callback, get out of dyntick mode. */
364 if (__get_cpu_var(rcu_sched_data).nxtlist || 364 if (__get_cpu_var(rcu_sched_data).nxtlist ||
365 __get_cpu_var(rcu_bh_data).nxtlist) 365 __get_cpu_var(rcu_bh_data).nxtlist)
366 set_need_resched(); 366 set_need_resched();
367 } 367 }
368 368
369 #ifdef CONFIG_SMP 369 #ifdef CONFIG_SMP
370 370
371 /* 371 /*
372 * Snapshot the specified CPU's dynticks counter so that we can later 372 * Snapshot the specified CPU's dynticks counter so that we can later
373 * credit them with an implicit quiescent state. Return 1 if this CPU 373 * credit them with an implicit quiescent state. Return 1 if this CPU
374 * is in dynticks idle mode, which is an extended quiescent state. 374 * is in dynticks idle mode, which is an extended quiescent state.
375 */ 375 */
376 static int dyntick_save_progress_counter(struct rcu_data *rdp) 376 static int dyntick_save_progress_counter(struct rcu_data *rdp)
377 { 377 {
378 int ret; 378 int ret;
379 int snap; 379 int snap;
380 int snap_nmi; 380 int snap_nmi;
381 381
382 snap = rdp->dynticks->dynticks; 382 snap = rdp->dynticks->dynticks;
383 snap_nmi = rdp->dynticks->dynticks_nmi; 383 snap_nmi = rdp->dynticks->dynticks_nmi;
384 smp_mb(); /* Order sampling of snap with end of grace period. */ 384 smp_mb(); /* Order sampling of snap with end of grace period. */
385 rdp->dynticks_snap = snap; 385 rdp->dynticks_snap = snap;
386 rdp->dynticks_nmi_snap = snap_nmi; 386 rdp->dynticks_nmi_snap = snap_nmi;
387 ret = ((snap & 0x1) == 0) && ((snap_nmi & 0x1) == 0); 387 ret = ((snap & 0x1) == 0) && ((snap_nmi & 0x1) == 0);
388 if (ret) 388 if (ret)
389 rdp->dynticks_fqs++; 389 rdp->dynticks_fqs++;
390 return ret; 390 return ret;
391 } 391 }
392 392
393 /* 393 /*
394 * Return true if the specified CPU has passed through a quiescent 394 * Return true if the specified CPU has passed through a quiescent
395 * state by virtue of being in or having passed through an dynticks 395 * state by virtue of being in or having passed through an dynticks
396 * idle state since the last call to dyntick_save_progress_counter() 396 * idle state since the last call to dyntick_save_progress_counter()
397 * for this same CPU. 397 * for this same CPU.
398 */ 398 */
399 static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) 399 static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
400 { 400 {
401 long curr; 401 long curr;
402 long curr_nmi; 402 long curr_nmi;
403 long snap; 403 long snap;
404 long snap_nmi; 404 long snap_nmi;
405 405
406 curr = rdp->dynticks->dynticks; 406 curr = rdp->dynticks->dynticks;
407 snap = rdp->dynticks_snap; 407 snap = rdp->dynticks_snap;
408 curr_nmi = rdp->dynticks->dynticks_nmi; 408 curr_nmi = rdp->dynticks->dynticks_nmi;
409 snap_nmi = rdp->dynticks_nmi_snap; 409 snap_nmi = rdp->dynticks_nmi_snap;
410 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ 410 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
411 411
412 /* 412 /*
413 * If the CPU passed through or entered a dynticks idle phase with 413 * If the CPU passed through or entered a dynticks idle phase with
414 * no active irq/NMI handlers, then we can safely pretend that the CPU 414 * no active irq/NMI handlers, then we can safely pretend that the CPU
415 * already acknowledged the request to pass through a quiescent 415 * already acknowledged the request to pass through a quiescent
416 * state. Either way, that CPU cannot possibly be in an RCU 416 * state. Either way, that CPU cannot possibly be in an RCU
417 * read-side critical section that started before the beginning 417 * read-side critical section that started before the beginning
418 * of the current RCU grace period. 418 * of the current RCU grace period.
419 */ 419 */
420 if ((curr != snap || (curr & 0x1) == 0) && 420 if ((curr != snap || (curr & 0x1) == 0) &&
421 (curr_nmi != snap_nmi || (curr_nmi & 0x1) == 0)) { 421 (curr_nmi != snap_nmi || (curr_nmi & 0x1) == 0)) {
422 rdp->dynticks_fqs++; 422 rdp->dynticks_fqs++;
423 return 1; 423 return 1;
424 } 424 }
425 425
426 /* Go check for the CPU being offline. */ 426 /* Go check for the CPU being offline. */
427 return rcu_implicit_offline_qs(rdp); 427 return rcu_implicit_offline_qs(rdp);
428 } 428 }
429 429
430 #endif /* #ifdef CONFIG_SMP */ 430 #endif /* #ifdef CONFIG_SMP */
431 431
432 #else /* #ifdef CONFIG_NO_HZ */ 432 #else /* #ifdef CONFIG_NO_HZ */
433 433
434 #ifdef CONFIG_SMP 434 #ifdef CONFIG_SMP
435 435
436 static int dyntick_save_progress_counter(struct rcu_data *rdp) 436 static int dyntick_save_progress_counter(struct rcu_data *rdp)
437 { 437 {
438 return 0; 438 return 0;
439 } 439 }
440 440
441 static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) 441 static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
442 { 442 {
443 return rcu_implicit_offline_qs(rdp); 443 return rcu_implicit_offline_qs(rdp);
444 } 444 }
445 445
446 #endif /* #ifdef CONFIG_SMP */ 446 #endif /* #ifdef CONFIG_SMP */
447 447
448 #endif /* #else #ifdef CONFIG_NO_HZ */ 448 #endif /* #else #ifdef CONFIG_NO_HZ */
449 449
450 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR 450 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
451 451
452 int rcu_cpu_stall_panicking __read_mostly; 452 int rcu_cpu_stall_panicking __read_mostly;
453 453
454 static void record_gp_stall_check_time(struct rcu_state *rsp) 454 static void record_gp_stall_check_time(struct rcu_state *rsp)
455 { 455 {
456 rsp->gp_start = jiffies; 456 rsp->gp_start = jiffies;
457 rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK; 457 rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
458 } 458 }
459 459
460 static void print_other_cpu_stall(struct rcu_state *rsp) 460 static void print_other_cpu_stall(struct rcu_state *rsp)
461 { 461 {
462 int cpu; 462 int cpu;
463 long delta; 463 long delta;
464 unsigned long flags; 464 unsigned long flags;
465 struct rcu_node *rnp = rcu_get_root(rsp); 465 struct rcu_node *rnp = rcu_get_root(rsp);
466 466
467 /* Only let one CPU complain about others per time interval. */ 467 /* Only let one CPU complain about others per time interval. */
468 468
469 raw_spin_lock_irqsave(&rnp->lock, flags); 469 raw_spin_lock_irqsave(&rnp->lock, flags);
470 delta = jiffies - rsp->jiffies_stall; 470 delta = jiffies - rsp->jiffies_stall;
471 if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) { 471 if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
472 raw_spin_unlock_irqrestore(&rnp->lock, flags); 472 raw_spin_unlock_irqrestore(&rnp->lock, flags);
473 return; 473 return;
474 } 474 }
475 rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK; 475 rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
476 476
477 /* 477 /*
478 * Now rat on any tasks that got kicked up to the root rcu_node 478 * Now rat on any tasks that got kicked up to the root rcu_node
479 * due to CPU offlining. 479 * due to CPU offlining.
480 */ 480 */
481 rcu_print_task_stall(rnp); 481 rcu_print_task_stall(rnp);
482 raw_spin_unlock_irqrestore(&rnp->lock, flags); 482 raw_spin_unlock_irqrestore(&rnp->lock, flags);
483 483
484 /* OK, time to rat on our buddy... */ 484 /* OK, time to rat on our buddy... */
485 485
486 printk(KERN_ERR "INFO: RCU detected CPU stalls:"); 486 printk(KERN_ERR "INFO: RCU detected CPU stalls:");
487 rcu_for_each_leaf_node(rsp, rnp) { 487 rcu_for_each_leaf_node(rsp, rnp) {
488 raw_spin_lock_irqsave(&rnp->lock, flags); 488 raw_spin_lock_irqsave(&rnp->lock, flags);
489 rcu_print_task_stall(rnp); 489 rcu_print_task_stall(rnp);
490 raw_spin_unlock_irqrestore(&rnp->lock, flags); 490 raw_spin_unlock_irqrestore(&rnp->lock, flags);
491 if (rnp->qsmask == 0) 491 if (rnp->qsmask == 0)
492 continue; 492 continue;
493 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) 493 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
494 if (rnp->qsmask & (1UL << cpu)) 494 if (rnp->qsmask & (1UL << cpu))
495 printk(" %d", rnp->grplo + cpu); 495 printk(" %d", rnp->grplo + cpu);
496 } 496 }
497 printk(" (detected by %d, t=%ld jiffies)\n", 497 printk(" (detected by %d, t=%ld jiffies)\n",
498 smp_processor_id(), (long)(jiffies - rsp->gp_start)); 498 smp_processor_id(), (long)(jiffies - rsp->gp_start));
499 trigger_all_cpu_backtrace(); 499 trigger_all_cpu_backtrace();
500 500
501 /* If so configured, complain about tasks blocking the grace period. */ 501 /* If so configured, complain about tasks blocking the grace period. */
502 502
503 rcu_print_detail_task_stall(rsp); 503 rcu_print_detail_task_stall(rsp);
504 504
505 force_quiescent_state(rsp, 0); /* Kick them all. */ 505 force_quiescent_state(rsp, 0); /* Kick them all. */
506 } 506 }
507 507
508 static void print_cpu_stall(struct rcu_state *rsp) 508 static void print_cpu_stall(struct rcu_state *rsp)
509 { 509 {
510 unsigned long flags; 510 unsigned long flags;
511 struct rcu_node *rnp = rcu_get_root(rsp); 511 struct rcu_node *rnp = rcu_get_root(rsp);
512 512
513 printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu jiffies)\n", 513 printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu jiffies)\n",
514 smp_processor_id(), jiffies - rsp->gp_start); 514 smp_processor_id(), jiffies - rsp->gp_start);
515 trigger_all_cpu_backtrace(); 515 trigger_all_cpu_backtrace();
516 516
517 raw_spin_lock_irqsave(&rnp->lock, flags); 517 raw_spin_lock_irqsave(&rnp->lock, flags);
518 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) 518 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
519 rsp->jiffies_stall = 519 rsp->jiffies_stall =
520 jiffies + RCU_SECONDS_TILL_STALL_RECHECK; 520 jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
521 raw_spin_unlock_irqrestore(&rnp->lock, flags); 521 raw_spin_unlock_irqrestore(&rnp->lock, flags);
522 522
523 set_need_resched(); /* kick ourselves to get things going. */ 523 set_need_resched(); /* kick ourselves to get things going. */
524 } 524 }
525 525
526 static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) 526 static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
527 { 527 {
528 long delta; 528 long delta;
529 struct rcu_node *rnp; 529 struct rcu_node *rnp;
530 530
531 if (rcu_cpu_stall_panicking) 531 if (rcu_cpu_stall_panicking)
532 return; 532 return;
533 delta = jiffies - rsp->jiffies_stall; 533 delta = jiffies - rsp->jiffies_stall;
534 rnp = rdp->mynode; 534 rnp = rdp->mynode;
535 if ((rnp->qsmask & rdp->grpmask) && delta >= 0) { 535 if ((rnp->qsmask & rdp->grpmask) && delta >= 0) {
536 536
537 /* We haven't checked in, so go dump stack. */ 537 /* We haven't checked in, so go dump stack. */
538 print_cpu_stall(rsp); 538 print_cpu_stall(rsp);
539 539
540 } else if (rcu_gp_in_progress(rsp) && delta >= RCU_STALL_RAT_DELAY) { 540 } else if (rcu_gp_in_progress(rsp) && delta >= RCU_STALL_RAT_DELAY) {
541 541
542 /* They had two time units to dump stack, so complain. */ 542 /* They had two time units to dump stack, so complain. */
543 print_other_cpu_stall(rsp); 543 print_other_cpu_stall(rsp);
544 } 544 }
545 } 545 }
546 546
547 static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) 547 static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
548 { 548 {
549 rcu_cpu_stall_panicking = 1; 549 rcu_cpu_stall_panicking = 1;
550 return NOTIFY_DONE; 550 return NOTIFY_DONE;
551 } 551 }
552 552
553 static struct notifier_block rcu_panic_block = { 553 static struct notifier_block rcu_panic_block = {
554 .notifier_call = rcu_panic, 554 .notifier_call = rcu_panic,
555 }; 555 };
556 556
557 static void __init check_cpu_stall_init(void) 557 static void __init check_cpu_stall_init(void)
558 { 558 {
559 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); 559 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
560 } 560 }
561 561
562 #else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 562 #else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
563 563
564 static void record_gp_stall_check_time(struct rcu_state *rsp) 564 static void record_gp_stall_check_time(struct rcu_state *rsp)
565 { 565 {
566 } 566 }
567 567
568 static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) 568 static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
569 { 569 {
570 } 570 }
571 571
572 static void __init check_cpu_stall_init(void) 572 static void __init check_cpu_stall_init(void)
573 { 573 {
574 } 574 }
575 575
576 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 576 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
577 577
578 /* 578 /*
579 * Update CPU-local rcu_data state to record the newly noticed grace period. 579 * Update CPU-local rcu_data state to record the newly noticed grace period.
580 * This is used both when we started the grace period and when we notice 580 * This is used both when we started the grace period and when we notice
581 * that someone else started the grace period. The caller must hold the 581 * that someone else started the grace period. The caller must hold the
582 * ->lock of the leaf rcu_node structure corresponding to the current CPU, 582 * ->lock of the leaf rcu_node structure corresponding to the current CPU,
583 * and must have irqs disabled. 583 * and must have irqs disabled.
584 */ 584 */
585 static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) 585 static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
586 { 586 {
587 if (rdp->gpnum != rnp->gpnum) { 587 if (rdp->gpnum != rnp->gpnum) {
588 rdp->qs_pending = 1; 588 rdp->qs_pending = 1;
589 rdp->passed_quiesc = 0; 589 rdp->passed_quiesc = 0;
590 rdp->gpnum = rnp->gpnum; 590 rdp->gpnum = rnp->gpnum;
591 } 591 }
592 } 592 }
593 593
594 static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp) 594 static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp)
595 { 595 {
596 unsigned long flags; 596 unsigned long flags;
597 struct rcu_node *rnp; 597 struct rcu_node *rnp;
598 598
599 local_irq_save(flags); 599 local_irq_save(flags);
600 rnp = rdp->mynode; 600 rnp = rdp->mynode;
601 if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */ 601 if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */
602 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ 602 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
603 local_irq_restore(flags); 603 local_irq_restore(flags);
604 return; 604 return;
605 } 605 }
606 __note_new_gpnum(rsp, rnp, rdp); 606 __note_new_gpnum(rsp, rnp, rdp);
607 raw_spin_unlock_irqrestore(&rnp->lock, flags); 607 raw_spin_unlock_irqrestore(&rnp->lock, flags);
608 } 608 }
609 609
610 /* 610 /*
611 * Did someone else start a new RCU grace period start since we last 611 * Did someone else start a new RCU grace period start since we last
612 * checked? Update local state appropriately if so. Must be called 612 * checked? Update local state appropriately if so. Must be called
613 * on the CPU corresponding to rdp. 613 * on the CPU corresponding to rdp.
614 */ 614 */
615 static int 615 static int
616 check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp) 616 check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp)
617 { 617 {
618 unsigned long flags; 618 unsigned long flags;
619 int ret = 0; 619 int ret = 0;
620 620
621 local_irq_save(flags); 621 local_irq_save(flags);
622 if (rdp->gpnum != rsp->gpnum) { 622 if (rdp->gpnum != rsp->gpnum) {
623 note_new_gpnum(rsp, rdp); 623 note_new_gpnum(rsp, rdp);
624 ret = 1; 624 ret = 1;
625 } 625 }
626 local_irq_restore(flags); 626 local_irq_restore(flags);
627 return ret; 627 return ret;
628 } 628 }
629 629
630 /* 630 /*
631 * Advance this CPU's callbacks, but only if the current grace period 631 * Advance this CPU's callbacks, but only if the current grace period
632 * has ended. This may be called only from the CPU to whom the rdp 632 * has ended. This may be called only from the CPU to whom the rdp
633 * belongs. In addition, the corresponding leaf rcu_node structure's 633 * belongs. In addition, the corresponding leaf rcu_node structure's
634 * ->lock must be held by the caller, with irqs disabled. 634 * ->lock must be held by the caller, with irqs disabled.
635 */ 635 */
636 static void 636 static void
637 __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) 637 __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
638 { 638 {
639 /* Did another grace period end? */ 639 /* Did another grace period end? */
640 if (rdp->completed != rnp->completed) { 640 if (rdp->completed != rnp->completed) {
641 641
642 /* Advance callbacks. No harm if list empty. */ 642 /* Advance callbacks. No harm if list empty. */
643 rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL]; 643 rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL];
644 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL]; 644 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL];
645 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 645 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
646 646
647 /* Remember that we saw this grace-period completion. */ 647 /* Remember that we saw this grace-period completion. */
648 rdp->completed = rnp->completed; 648 rdp->completed = rnp->completed;
649 } 649 }
650 } 650 }
651 651
652 /* 652 /*
653 * Advance this CPU's callbacks, but only if the current grace period 653 * Advance this CPU's callbacks, but only if the current grace period
654 * has ended. This may be called only from the CPU to whom the rdp 654 * has ended. This may be called only from the CPU to whom the rdp
655 * belongs. 655 * belongs.
656 */ 656 */
657 static void 657 static void
658 rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp) 658 rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
659 { 659 {
660 unsigned long flags; 660 unsigned long flags;
661 struct rcu_node *rnp; 661 struct rcu_node *rnp;
662 662
663 local_irq_save(flags); 663 local_irq_save(flags);
664 rnp = rdp->mynode; 664 rnp = rdp->mynode;
665 if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */ 665 if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */
666 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ 666 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
667 local_irq_restore(flags); 667 local_irq_restore(flags);
668 return; 668 return;
669 } 669 }
670 __rcu_process_gp_end(rsp, rnp, rdp); 670 __rcu_process_gp_end(rsp, rnp, rdp);
671 raw_spin_unlock_irqrestore(&rnp->lock, flags); 671 raw_spin_unlock_irqrestore(&rnp->lock, flags);
672 } 672 }
673 673
674 /* 674 /*
675 * Do per-CPU grace-period initialization for running CPU. The caller 675 * Do per-CPU grace-period initialization for running CPU. The caller
676 * must hold the lock of the leaf rcu_node structure corresponding to 676 * must hold the lock of the leaf rcu_node structure corresponding to
677 * this CPU. 677 * this CPU.
678 */ 678 */
679 static void 679 static void
680 rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) 680 rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
681 { 681 {
682 /* Prior grace period ended, so advance callbacks for current CPU. */ 682 /* Prior grace period ended, so advance callbacks for current CPU. */
683 __rcu_process_gp_end(rsp, rnp, rdp); 683 __rcu_process_gp_end(rsp, rnp, rdp);
684 684
685 /* 685 /*
686 * Because this CPU just now started the new grace period, we know 686 * Because this CPU just now started the new grace period, we know
687 * that all of its callbacks will be covered by this upcoming grace 687 * that all of its callbacks will be covered by this upcoming grace
688 * period, even the ones that were registered arbitrarily recently. 688 * period, even the ones that were registered arbitrarily recently.
689 * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL. 689 * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL.
690 * 690 *
691 * Other CPUs cannot be sure exactly when the grace period started. 691 * Other CPUs cannot be sure exactly when the grace period started.
692 * Therefore, their recently registered callbacks must pass through 692 * Therefore, their recently registered callbacks must pass through
693 * an additional RCU_NEXT_READY stage, so that they will be handled 693 * an additional RCU_NEXT_READY stage, so that they will be handled
694 * by the next RCU grace period. 694 * by the next RCU grace period.
695 */ 695 */
696 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 696 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
697 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 697 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
698 698
699 /* Set state so that this CPU will detect the next quiescent state. */ 699 /* Set state so that this CPU will detect the next quiescent state. */
700 __note_new_gpnum(rsp, rnp, rdp); 700 __note_new_gpnum(rsp, rnp, rdp);
701 } 701 }
702 702
703 /* 703 /*
704 * Start a new RCU grace period if warranted, re-initializing the hierarchy 704 * Start a new RCU grace period if warranted, re-initializing the hierarchy
705 * in preparation for detecting the next grace period. The caller must hold 705 * in preparation for detecting the next grace period. The caller must hold
706 * the root node's ->lock, which is released before return. Hard irqs must 706 * the root node's ->lock, which is released before return. Hard irqs must
707 * be disabled. 707 * be disabled.
708 */ 708 */
709 static void 709 static void
710 rcu_start_gp(struct rcu_state *rsp, unsigned long flags) 710 rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
711 __releases(rcu_get_root(rsp)->lock) 711 __releases(rcu_get_root(rsp)->lock)
712 { 712 {
713 struct rcu_data *rdp = rsp->rda[smp_processor_id()]; 713 struct rcu_data *rdp = rsp->rda[smp_processor_id()];
714 struct rcu_node *rnp = rcu_get_root(rsp); 714 struct rcu_node *rnp = rcu_get_root(rsp);
715 715
716 if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { 716 if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) {
717 if (cpu_needs_another_gp(rsp, rdp)) 717 if (cpu_needs_another_gp(rsp, rdp))
718 rsp->fqs_need_gp = 1; 718 rsp->fqs_need_gp = 1;
719 if (rnp->completed == rsp->completed) { 719 if (rnp->completed == rsp->completed) {
720 raw_spin_unlock_irqrestore(&rnp->lock, flags); 720 raw_spin_unlock_irqrestore(&rnp->lock, flags);
721 return; 721 return;
722 } 722 }
723 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 723 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
724 724
725 /* 725 /*
726 * Propagate new ->completed value to rcu_node structures 726 * Propagate new ->completed value to rcu_node structures
727 * so that other CPUs don't have to wait until the start 727 * so that other CPUs don't have to wait until the start
728 * of the next grace period to process their callbacks. 728 * of the next grace period to process their callbacks.
729 */ 729 */
730 rcu_for_each_node_breadth_first(rsp, rnp) { 730 rcu_for_each_node_breadth_first(rsp, rnp) {
731 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 731 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
732 rnp->completed = rsp->completed; 732 rnp->completed = rsp->completed;
733 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 733 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
734 } 734 }
735 local_irq_restore(flags); 735 local_irq_restore(flags);
736 return; 736 return;
737 } 737 }
738 738
739 /* Advance to a new grace period and initialize state. */ 739 /* Advance to a new grace period and initialize state. */
740 rsp->gpnum++; 740 rsp->gpnum++;
741 WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); 741 WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT);
742 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ 742 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
743 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 743 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
744 record_gp_stall_check_time(rsp); 744 record_gp_stall_check_time(rsp);
745 745
746 /* Special-case the common single-level case. */ 746 /* Special-case the common single-level case. */
747 if (NUM_RCU_NODES == 1) { 747 if (NUM_RCU_NODES == 1) {
748 rcu_preempt_check_blocked_tasks(rnp); 748 rcu_preempt_check_blocked_tasks(rnp);
749 rnp->qsmask = rnp->qsmaskinit; 749 rnp->qsmask = rnp->qsmaskinit;
750 rnp->gpnum = rsp->gpnum; 750 rnp->gpnum = rsp->gpnum;
751 rnp->completed = rsp->completed; 751 rnp->completed = rsp->completed;
752 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ 752 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */
753 rcu_start_gp_per_cpu(rsp, rnp, rdp); 753 rcu_start_gp_per_cpu(rsp, rnp, rdp);
754 raw_spin_unlock_irqrestore(&rnp->lock, flags); 754 raw_spin_unlock_irqrestore(&rnp->lock, flags);
755 return; 755 return;
756 } 756 }
757 757
758 raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */ 758 raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */
759 759
760 760
761 /* Exclude any concurrent CPU-hotplug operations. */ 761 /* Exclude any concurrent CPU-hotplug operations. */
762 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ 762 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
763 763
764 /* 764 /*
765 * Set the quiescent-state-needed bits in all the rcu_node 765 * Set the quiescent-state-needed bits in all the rcu_node
766 * structures for all currently online CPUs in breadth-first 766 * structures for all currently online CPUs in breadth-first
767 * order, starting from the root rcu_node structure. This 767 * order, starting from the root rcu_node structure. This
768 * operation relies on the layout of the hierarchy within the 768 * operation relies on the layout of the hierarchy within the
769 * rsp->node[] array. Note that other CPUs will access only 769 * rsp->node[] array. Note that other CPUs will access only
770 * the leaves of the hierarchy, which still indicate that no 770 * the leaves of the hierarchy, which still indicate that no
771 * grace period is in progress, at least until the corresponding 771 * grace period is in progress, at least until the corresponding
772 * leaf node has been initialized. In addition, we have excluded 772 * leaf node has been initialized. In addition, we have excluded
773 * CPU-hotplug operations. 773 * CPU-hotplug operations.
774 * 774 *
775 * Note that the grace period cannot complete until we finish 775 * Note that the grace period cannot complete until we finish
776 * the initialization process, as there will be at least one 776 * the initialization process, as there will be at least one
777 * qsmask bit set in the root node until that time, namely the 777 * qsmask bit set in the root node until that time, namely the
778 * one corresponding to this CPU, due to the fact that we have 778 * one corresponding to this CPU, due to the fact that we have
779 * irqs disabled. 779 * irqs disabled.
780 */ 780 */
781 rcu_for_each_node_breadth_first(rsp, rnp) { 781 rcu_for_each_node_breadth_first(rsp, rnp) {
782 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 782 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
783 rcu_preempt_check_blocked_tasks(rnp); 783 rcu_preempt_check_blocked_tasks(rnp);
784 rnp->qsmask = rnp->qsmaskinit; 784 rnp->qsmask = rnp->qsmaskinit;
785 rnp->gpnum = rsp->gpnum; 785 rnp->gpnum = rsp->gpnum;
786 rnp->completed = rsp->completed; 786 rnp->completed = rsp->completed;
787 if (rnp == rdp->mynode) 787 if (rnp == rdp->mynode)
788 rcu_start_gp_per_cpu(rsp, rnp, rdp); 788 rcu_start_gp_per_cpu(rsp, rnp, rdp);
789 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 789 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
790 } 790 }
791 791
792 rnp = rcu_get_root(rsp); 792 rnp = rcu_get_root(rsp);
793 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 793 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
794 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ 794 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */
795 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 795 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
796 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 796 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
797 } 797 }
798 798
799 /* 799 /*
800 * Report a full set of quiescent states to the specified rcu_state 800 * Report a full set of quiescent states to the specified rcu_state
801 * data structure. This involves cleaning up after the prior grace 801 * data structure. This involves cleaning up after the prior grace
802 * period and letting rcu_start_gp() start up the next grace period 802 * period and letting rcu_start_gp() start up the next grace period
803 * if one is needed. Note that the caller must hold rnp->lock, as 803 * if one is needed. Note that the caller must hold rnp->lock, as
804 * required by rcu_start_gp(), which will release it. 804 * required by rcu_start_gp(), which will release it.
805 */ 805 */
806 static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) 806 static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
807 __releases(rcu_get_root(rsp)->lock) 807 __releases(rcu_get_root(rsp)->lock)
808 { 808 {
809 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 809 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
810 rsp->completed = rsp->gpnum; 810 rsp->completed = rsp->gpnum;
811 rsp->signaled = RCU_GP_IDLE; 811 rsp->signaled = RCU_GP_IDLE;
812 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ 812 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
813 } 813 }
814 814
815 /* 815 /*
816 * Similar to rcu_report_qs_rdp(), for which it is a helper function. 816 * Similar to rcu_report_qs_rdp(), for which it is a helper function.
817 * Allows quiescent states for a group of CPUs to be reported at one go 817 * Allows quiescent states for a group of CPUs to be reported at one go
818 * to the specified rcu_node structure, though all the CPUs in the group 818 * to the specified rcu_node structure, though all the CPUs in the group
819 * must be represented by the same rcu_node structure (which need not be 819 * must be represented by the same rcu_node structure (which need not be
820 * a leaf rcu_node structure, though it often will be). That structure's 820 * a leaf rcu_node structure, though it often will be). That structure's
821 * lock must be held upon entry, and it is released before return. 821 * lock must be held upon entry, and it is released before return.
822 */ 822 */
823 static void 823 static void
824 rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, 824 rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
825 struct rcu_node *rnp, unsigned long flags) 825 struct rcu_node *rnp, unsigned long flags)
826 __releases(rnp->lock) 826 __releases(rnp->lock)
827 { 827 {
828 struct rcu_node *rnp_c; 828 struct rcu_node *rnp_c;
829 829
830 /* Walk up the rcu_node hierarchy. */ 830 /* Walk up the rcu_node hierarchy. */
831 for (;;) { 831 for (;;) {
832 if (!(rnp->qsmask & mask)) { 832 if (!(rnp->qsmask & mask)) {
833 833
834 /* Our bit has already been cleared, so done. */ 834 /* Our bit has already been cleared, so done. */
835 raw_spin_unlock_irqrestore(&rnp->lock, flags); 835 raw_spin_unlock_irqrestore(&rnp->lock, flags);
836 return; 836 return;
837 } 837 }
838 rnp->qsmask &= ~mask; 838 rnp->qsmask &= ~mask;
839 if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { 839 if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
840 840
841 /* Other bits still set at this level, so done. */ 841 /* Other bits still set at this level, so done. */
842 raw_spin_unlock_irqrestore(&rnp->lock, flags); 842 raw_spin_unlock_irqrestore(&rnp->lock, flags);
843 return; 843 return;
844 } 844 }
845 mask = rnp->grpmask; 845 mask = rnp->grpmask;
846 if (rnp->parent == NULL) { 846 if (rnp->parent == NULL) {
847 847
848 /* No more levels. Exit loop holding root lock. */ 848 /* No more levels. Exit loop holding root lock. */
849 849
850 break; 850 break;
851 } 851 }
852 raw_spin_unlock_irqrestore(&rnp->lock, flags); 852 raw_spin_unlock_irqrestore(&rnp->lock, flags);
853 rnp_c = rnp; 853 rnp_c = rnp;
854 rnp = rnp->parent; 854 rnp = rnp->parent;
855 raw_spin_lock_irqsave(&rnp->lock, flags); 855 raw_spin_lock_irqsave(&rnp->lock, flags);
856 WARN_ON_ONCE(rnp_c->qsmask); 856 WARN_ON_ONCE(rnp_c->qsmask);
857 } 857 }
858 858
859 /* 859 /*
860 * Get here if we are the last CPU to pass through a quiescent 860 * Get here if we are the last CPU to pass through a quiescent
861 * state for this grace period. Invoke rcu_report_qs_rsp() 861 * state for this grace period. Invoke rcu_report_qs_rsp()
862 * to clean up and start the next grace period if one is needed. 862 * to clean up and start the next grace period if one is needed.
863 */ 863 */
864 rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */ 864 rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */
865 } 865 }
866 866
867 /* 867 /*
868 * Record a quiescent state for the specified CPU to that CPU's rcu_data 868 * Record a quiescent state for the specified CPU to that CPU's rcu_data
869 * structure. This must be either called from the specified CPU, or 869 * structure. This must be either called from the specified CPU, or
870 * called when the specified CPU is known to be offline (and when it is 870 * called when the specified CPU is known to be offline (and when it is
871 * also known that no other CPU is concurrently trying to help the offline 871 * also known that no other CPU is concurrently trying to help the offline
872 * CPU). The lastcomp argument is used to make sure we are still in the 872 * CPU). The lastcomp argument is used to make sure we are still in the
873 * grace period of interest. We don't want to end the current grace period 873 * grace period of interest. We don't want to end the current grace period
874 * based on quiescent states detected in an earlier grace period! 874 * based on quiescent states detected in an earlier grace period!
875 */ 875 */
876 static void 876 static void
877 rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) 877 rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp)
878 { 878 {
879 unsigned long flags; 879 unsigned long flags;
880 unsigned long mask; 880 unsigned long mask;
881 struct rcu_node *rnp; 881 struct rcu_node *rnp;
882 882
883 rnp = rdp->mynode; 883 rnp = rdp->mynode;
884 raw_spin_lock_irqsave(&rnp->lock, flags); 884 raw_spin_lock_irqsave(&rnp->lock, flags);
885 if (lastcomp != rnp->completed) { 885 if (lastcomp != rnp->completed) {
886 886
887 /* 887 /*
888 * Someone beat us to it for this grace period, so leave. 888 * Someone beat us to it for this grace period, so leave.
889 * The race with GP start is resolved by the fact that we 889 * The race with GP start is resolved by the fact that we
890 * hold the leaf rcu_node lock, so that the per-CPU bits 890 * hold the leaf rcu_node lock, so that the per-CPU bits
891 * cannot yet be initialized -- so we would simply find our 891 * cannot yet be initialized -- so we would simply find our
892 * CPU's bit already cleared in rcu_report_qs_rnp() if this 892 * CPU's bit already cleared in rcu_report_qs_rnp() if this
893 * race occurred. 893 * race occurred.
894 */ 894 */
895 rdp->passed_quiesc = 0; /* try again later! */ 895 rdp->passed_quiesc = 0; /* try again later! */
896 raw_spin_unlock_irqrestore(&rnp->lock, flags); 896 raw_spin_unlock_irqrestore(&rnp->lock, flags);
897 return; 897 return;
898 } 898 }
899 mask = rdp->grpmask; 899 mask = rdp->grpmask;
900 if ((rnp->qsmask & mask) == 0) { 900 if ((rnp->qsmask & mask) == 0) {
901 raw_spin_unlock_irqrestore(&rnp->lock, flags); 901 raw_spin_unlock_irqrestore(&rnp->lock, flags);
902 } else { 902 } else {
903 rdp->qs_pending = 0; 903 rdp->qs_pending = 0;
904 904
905 /* 905 /*
906 * This GP can't end until cpu checks in, so all of our 906 * This GP can't end until cpu checks in, so all of our
907 * callbacks can be processed during the next GP. 907 * callbacks can be processed during the next GP.
908 */ 908 */
909 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 909 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
910 910
911 rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */ 911 rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
912 } 912 }
913 } 913 }
914 914
915 /* 915 /*
916 * Check to see if there is a new grace period of which this CPU 916 * Check to see if there is a new grace period of which this CPU
917 * is not yet aware, and if so, set up local rcu_data state for it. 917 * is not yet aware, and if so, set up local rcu_data state for it.
918 * Otherwise, see if this CPU has just passed through its first 918 * Otherwise, see if this CPU has just passed through its first
919 * quiescent state for this grace period, and record that fact if so. 919 * quiescent state for this grace period, and record that fact if so.
920 */ 920 */
921 static void 921 static void
922 rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) 922 rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
923 { 923 {
924 /* If there is now a new grace period, record and return. */ 924 /* If there is now a new grace period, record and return. */
925 if (check_for_new_grace_period(rsp, rdp)) 925 if (check_for_new_grace_period(rsp, rdp))
926 return; 926 return;
927 927
928 /* 928 /*
929 * Does this CPU still need to do its part for current grace period? 929 * Does this CPU still need to do its part for current grace period?
930 * If no, return and let the other CPUs do their part as well. 930 * If no, return and let the other CPUs do their part as well.
931 */ 931 */
932 if (!rdp->qs_pending) 932 if (!rdp->qs_pending)
933 return; 933 return;
934 934
935 /* 935 /*
936 * Was there a quiescent state since the beginning of the grace 936 * Was there a quiescent state since the beginning of the grace
937 * period? If no, then exit and wait for the next call. 937 * period? If no, then exit and wait for the next call.
938 */ 938 */
939 if (!rdp->passed_quiesc) 939 if (!rdp->passed_quiesc)
940 return; 940 return;
941 941
942 /* 942 /*
943 * Tell RCU we are done (but rcu_report_qs_rdp() will be the 943 * Tell RCU we are done (but rcu_report_qs_rdp() will be the
944 * judge of that). 944 * judge of that).
945 */ 945 */
946 rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed); 946 rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed);
947 } 947 }
948 948
949 #ifdef CONFIG_HOTPLUG_CPU 949 #ifdef CONFIG_HOTPLUG_CPU
950 950
951 /* 951 /*
952 * Move a dying CPU's RCU callbacks to the ->orphan_cbs_list for the 952 * Move a dying CPU's RCU callbacks to the ->orphan_cbs_list for the
953 * specified flavor of RCU. The callbacks will be adopted by the next 953 * specified flavor of RCU. The callbacks will be adopted by the next
954 * _rcu_barrier() invocation or by the CPU_DEAD notifier, whichever 954 * _rcu_barrier() invocation or by the CPU_DEAD notifier, whichever
955 * comes first. Because this is invoked from the CPU_DYING notifier, 955 * comes first. Because this is invoked from the CPU_DYING notifier,
956 * irqs are already disabled. 956 * irqs are already disabled.
957 */ 957 */
958 static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) 958 static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
959 { 959 {
960 int i; 960 int i;
961 struct rcu_data *rdp = rsp->rda[smp_processor_id()]; 961 struct rcu_data *rdp = rsp->rda[smp_processor_id()];
962 962
963 if (rdp->nxtlist == NULL) 963 if (rdp->nxtlist == NULL)
964 return; /* irqs disabled, so comparison is stable. */ 964 return; /* irqs disabled, so comparison is stable. */
965 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ 965 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
966 *rsp->orphan_cbs_tail = rdp->nxtlist; 966 *rsp->orphan_cbs_tail = rdp->nxtlist;
967 rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL]; 967 rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL];
968 rdp->nxtlist = NULL; 968 rdp->nxtlist = NULL;
969 for (i = 0; i < RCU_NEXT_SIZE; i++) 969 for (i = 0; i < RCU_NEXT_SIZE; i++)
970 rdp->nxttail[i] = &rdp->nxtlist; 970 rdp->nxttail[i] = &rdp->nxtlist;
971 rsp->orphan_qlen += rdp->qlen; 971 rsp->orphan_qlen += rdp->qlen;
972 rdp->qlen = 0; 972 rdp->qlen = 0;
973 raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ 973 raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
974 } 974 }
975 975
976 /* 976 /*
977 * Adopt previously orphaned RCU callbacks. 977 * Adopt previously orphaned RCU callbacks.
978 */ 978 */
979 static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) 979 static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
980 { 980 {
981 unsigned long flags; 981 unsigned long flags;
982 struct rcu_data *rdp; 982 struct rcu_data *rdp;
983 983
984 raw_spin_lock_irqsave(&rsp->onofflock, flags); 984 raw_spin_lock_irqsave(&rsp->onofflock, flags);
985 rdp = rsp->rda[smp_processor_id()]; 985 rdp = rsp->rda[smp_processor_id()];
986 if (rsp->orphan_cbs_list == NULL) { 986 if (rsp->orphan_cbs_list == NULL) {
987 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 987 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
988 return; 988 return;
989 } 989 }
990 *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list; 990 *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list;
991 rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail; 991 rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail;
992 rdp->qlen += rsp->orphan_qlen; 992 rdp->qlen += rsp->orphan_qlen;
993 rsp->orphan_cbs_list = NULL; 993 rsp->orphan_cbs_list = NULL;
994 rsp->orphan_cbs_tail = &rsp->orphan_cbs_list; 994 rsp->orphan_cbs_tail = &rsp->orphan_cbs_list;
995 rsp->orphan_qlen = 0; 995 rsp->orphan_qlen = 0;
996 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 996 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
997 } 997 }
998 998
999 /* 999 /*
1000 * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy 1000 * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy
1001 * and move all callbacks from the outgoing CPU to the current one. 1001 * and move all callbacks from the outgoing CPU to the current one.
1002 */ 1002 */
1003 static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) 1003 static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1004 { 1004 {
1005 unsigned long flags; 1005 unsigned long flags;
1006 unsigned long mask; 1006 unsigned long mask;
1007 int need_report = 0; 1007 int need_report = 0;
1008 struct rcu_data *rdp = rsp->rda[cpu]; 1008 struct rcu_data *rdp = rsp->rda[cpu];
1009 struct rcu_node *rnp; 1009 struct rcu_node *rnp;
1010 1010
1011 /* Exclude any attempts to start a new grace period. */ 1011 /* Exclude any attempts to start a new grace period. */
1012 raw_spin_lock_irqsave(&rsp->onofflock, flags); 1012 raw_spin_lock_irqsave(&rsp->onofflock, flags);
1013 1013
1014 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ 1014 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
1015 rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */ 1015 rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */
1016 mask = rdp->grpmask; /* rnp->grplo is constant. */ 1016 mask = rdp->grpmask; /* rnp->grplo is constant. */
1017 do { 1017 do {
1018 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 1018 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
1019 rnp->qsmaskinit &= ~mask; 1019 rnp->qsmaskinit &= ~mask;
1020 if (rnp->qsmaskinit != 0) { 1020 if (rnp->qsmaskinit != 0) {
1021 if (rnp != rdp->mynode) 1021 if (rnp != rdp->mynode)
1022 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1022 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1023 break; 1023 break;
1024 } 1024 }
1025 if (rnp == rdp->mynode) 1025 if (rnp == rdp->mynode)
1026 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); 1026 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
1027 else 1027 else
1028 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1028 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1029 mask = rnp->grpmask; 1029 mask = rnp->grpmask;
1030 rnp = rnp->parent; 1030 rnp = rnp->parent;
1031 } while (rnp != NULL); 1031 } while (rnp != NULL);
1032 1032
1033 /* 1033 /*
1034 * We still hold the leaf rcu_node structure lock here, and 1034 * We still hold the leaf rcu_node structure lock here, and
1035 * irqs are still disabled. The reason for this subterfuge is 1035 * irqs are still disabled. The reason for this subterfuge is
1036 * because invoking rcu_report_unblock_qs_rnp() with ->onofflock 1036 * because invoking rcu_report_unblock_qs_rnp() with ->onofflock
1037 * held leads to deadlock. 1037 * held leads to deadlock.
1038 */ 1038 */
1039 raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ 1039 raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
1040 rnp = rdp->mynode; 1040 rnp = rdp->mynode;
1041 if (need_report & RCU_OFL_TASKS_NORM_GP) 1041 if (need_report & RCU_OFL_TASKS_NORM_GP)
1042 rcu_report_unblock_qs_rnp(rnp, flags); 1042 rcu_report_unblock_qs_rnp(rnp, flags);
1043 else 1043 else
1044 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1044 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1045 if (need_report & RCU_OFL_TASKS_EXP_GP) 1045 if (need_report & RCU_OFL_TASKS_EXP_GP)
1046 rcu_report_exp_rnp(rsp, rnp); 1046 rcu_report_exp_rnp(rsp, rnp);
1047 1047
1048 rcu_adopt_orphan_cbs(rsp); 1048 rcu_adopt_orphan_cbs(rsp);
1049 } 1049 }
1050 1050
1051 /* 1051 /*
1052 * Remove the specified CPU from the RCU hierarchy and move any pending 1052 * Remove the specified CPU from the RCU hierarchy and move any pending
1053 * callbacks that it might have to the current CPU. This code assumes 1053 * callbacks that it might have to the current CPU. This code assumes
1054 * that at least one CPU in the system will remain running at all times. 1054 * that at least one CPU in the system will remain running at all times.
1055 * Any attempt to offline -all- CPUs is likely to strand RCU callbacks. 1055 * Any attempt to offline -all- CPUs is likely to strand RCU callbacks.
1056 */ 1056 */
1057 static void rcu_offline_cpu(int cpu) 1057 static void rcu_offline_cpu(int cpu)
1058 { 1058 {
1059 __rcu_offline_cpu(cpu, &rcu_sched_state); 1059 __rcu_offline_cpu(cpu, &rcu_sched_state);
1060 __rcu_offline_cpu(cpu, &rcu_bh_state); 1060 __rcu_offline_cpu(cpu, &rcu_bh_state);
1061 rcu_preempt_offline_cpu(cpu); 1061 rcu_preempt_offline_cpu(cpu);
1062 } 1062 }
1063 1063
1064 #else /* #ifdef CONFIG_HOTPLUG_CPU */ 1064 #else /* #ifdef CONFIG_HOTPLUG_CPU */
1065 1065
1066 static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) 1066 static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
1067 { 1067 {
1068 } 1068 }
1069 1069
1070 static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) 1070 static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
1071 { 1071 {
1072 } 1072 }
1073 1073
1074 static void rcu_offline_cpu(int cpu) 1074 static void rcu_offline_cpu(int cpu)
1075 { 1075 {
1076 } 1076 }
1077 1077
1078 #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ 1078 #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
1079 1079
1080 /* 1080 /*
1081 * Invoke any RCU callbacks that have made it to the end of their grace 1081 * Invoke any RCU callbacks that have made it to the end of their grace
1082 * period. Thottle as specified by rdp->blimit. 1082 * period. Thottle as specified by rdp->blimit.
1083 */ 1083 */
1084 static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) 1084 static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1085 { 1085 {
1086 unsigned long flags; 1086 unsigned long flags;
1087 struct rcu_head *next, *list, **tail; 1087 struct rcu_head *next, *list, **tail;
1088 int count; 1088 int count;
1089 1089
1090 /* If no callbacks are ready, just return.*/ 1090 /* If no callbacks are ready, just return.*/
1091 if (!cpu_has_callbacks_ready_to_invoke(rdp)) 1091 if (!cpu_has_callbacks_ready_to_invoke(rdp))
1092 return; 1092 return;
1093 1093
1094 /* 1094 /*
1095 * Extract the list of ready callbacks, disabling to prevent 1095 * Extract the list of ready callbacks, disabling to prevent
1096 * races with call_rcu() from interrupt handlers. 1096 * races with call_rcu() from interrupt handlers.
1097 */ 1097 */
1098 local_irq_save(flags); 1098 local_irq_save(flags);
1099 list = rdp->nxtlist; 1099 list = rdp->nxtlist;
1100 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; 1100 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
1101 *rdp->nxttail[RCU_DONE_TAIL] = NULL; 1101 *rdp->nxttail[RCU_DONE_TAIL] = NULL;
1102 tail = rdp->nxttail[RCU_DONE_TAIL]; 1102 tail = rdp->nxttail[RCU_DONE_TAIL];
1103 for (count = RCU_NEXT_SIZE - 1; count >= 0; count--) 1103 for (count = RCU_NEXT_SIZE - 1; count >= 0; count--)
1104 if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL]) 1104 if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL])
1105 rdp->nxttail[count] = &rdp->nxtlist; 1105 rdp->nxttail[count] = &rdp->nxtlist;
1106 local_irq_restore(flags); 1106 local_irq_restore(flags);
1107 1107
1108 /* Invoke callbacks. */ 1108 /* Invoke callbacks. */
1109 count = 0; 1109 count = 0;
1110 while (list) { 1110 while (list) {
1111 next = list->next; 1111 next = list->next;
1112 prefetch(next); 1112 prefetch(next);
1113 list->func(list); 1113 list->func(list);
1114 list = next; 1114 list = next;
1115 if (++count >= rdp->blimit) 1115 if (++count >= rdp->blimit)
1116 break; 1116 break;
1117 } 1117 }
1118 1118
1119 local_irq_save(flags); 1119 local_irq_save(flags);
1120 1120
1121 /* Update count, and requeue any remaining callbacks. */ 1121 /* Update count, and requeue any remaining callbacks. */
1122 rdp->qlen -= count; 1122 rdp->qlen -= count;
1123 if (list != NULL) { 1123 if (list != NULL) {
1124 *tail = rdp->nxtlist; 1124 *tail = rdp->nxtlist;
1125 rdp->nxtlist = list; 1125 rdp->nxtlist = list;
1126 for (count = 0; count < RCU_NEXT_SIZE; count++) 1126 for (count = 0; count < RCU_NEXT_SIZE; count++)
1127 if (&rdp->nxtlist == rdp->nxttail[count]) 1127 if (&rdp->nxtlist == rdp->nxttail[count])
1128 rdp->nxttail[count] = tail; 1128 rdp->nxttail[count] = tail;
1129 else 1129 else
1130 break; 1130 break;
1131 } 1131 }
1132 1132
1133 /* Reinstate batch limit if we have worked down the excess. */ 1133 /* Reinstate batch limit if we have worked down the excess. */
1134 if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) 1134 if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark)
1135 rdp->blimit = blimit; 1135 rdp->blimit = blimit;
1136 1136
1137 /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ 1137 /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
1138 if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) { 1138 if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) {
1139 rdp->qlen_last_fqs_check = 0; 1139 rdp->qlen_last_fqs_check = 0;
1140 rdp->n_force_qs_snap = rsp->n_force_qs; 1140 rdp->n_force_qs_snap = rsp->n_force_qs;
1141 } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) 1141 } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
1142 rdp->qlen_last_fqs_check = rdp->qlen; 1142 rdp->qlen_last_fqs_check = rdp->qlen;
1143 1143
1144 local_irq_restore(flags); 1144 local_irq_restore(flags);
1145 1145
1146 /* Re-raise the RCU softirq if there are callbacks remaining. */ 1146 /* Re-raise the RCU softirq if there are callbacks remaining. */
1147 if (cpu_has_callbacks_ready_to_invoke(rdp)) 1147 if (cpu_has_callbacks_ready_to_invoke(rdp))
1148 raise_softirq(RCU_SOFTIRQ); 1148 raise_softirq(RCU_SOFTIRQ);
1149 } 1149 }
1150 1150
1151 /* 1151 /*
1152 * Check to see if this CPU is in a non-context-switch quiescent state 1152 * Check to see if this CPU is in a non-context-switch quiescent state
1153 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). 1153 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
1154 * Also schedule the RCU softirq handler. 1154 * Also schedule the RCU softirq handler.
1155 * 1155 *
1156 * This function must be called with hardirqs disabled. It is normally 1156 * This function must be called with hardirqs disabled. It is normally
1157 * invoked from the scheduling-clock interrupt. If rcu_pending returns 1157 * invoked from the scheduling-clock interrupt. If rcu_pending returns
1158 * false, there is no point in invoking rcu_check_callbacks(). 1158 * false, there is no point in invoking rcu_check_callbacks().
1159 */ 1159 */
1160 void rcu_check_callbacks(int cpu, int user) 1160 void rcu_check_callbacks(int cpu, int user)
1161 { 1161 {
1162 if (!rcu_pending(cpu)) 1162 if (!rcu_pending(cpu))
1163 return; /* if nothing for RCU to do. */ 1163 return; /* if nothing for RCU to do. */
1164 if (user || 1164 if (user ||
1165 (idle_cpu(cpu) && rcu_scheduler_active && 1165 (idle_cpu(cpu) && rcu_scheduler_active &&
1166 !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { 1166 !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
1167 1167
1168 /* 1168 /*
1169 * Get here if this CPU took its interrupt from user 1169 * Get here if this CPU took its interrupt from user
1170 * mode or from the idle loop, and if this is not a 1170 * mode or from the idle loop, and if this is not a
1171 * nested interrupt. In this case, the CPU is in 1171 * nested interrupt. In this case, the CPU is in
1172 * a quiescent state, so note it. 1172 * a quiescent state, so note it.
1173 * 1173 *
1174 * No memory barrier is required here because both 1174 * No memory barrier is required here because both
1175 * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local 1175 * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local
1176 * variables that other CPUs neither access nor modify, 1176 * variables that other CPUs neither access nor modify,
1177 * at least not while the corresponding CPU is online. 1177 * at least not while the corresponding CPU is online.
1178 */ 1178 */
1179 1179
1180 rcu_sched_qs(cpu); 1180 rcu_sched_qs(cpu);
1181 rcu_bh_qs(cpu); 1181 rcu_bh_qs(cpu);
1182 1182
1183 } else if (!in_softirq()) { 1183 } else if (!in_softirq()) {
1184 1184
1185 /* 1185 /*
1186 * Get here if this CPU did not take its interrupt from 1186 * Get here if this CPU did not take its interrupt from
1187 * softirq, in other words, if it is not interrupting 1187 * softirq, in other words, if it is not interrupting
1188 * a rcu_bh read-side critical section. This is an _bh 1188 * a rcu_bh read-side critical section. This is an _bh
1189 * critical section, so note it. 1189 * critical section, so note it.
1190 */ 1190 */
1191 1191
1192 rcu_bh_qs(cpu); 1192 rcu_bh_qs(cpu);
1193 } 1193 }
1194 rcu_preempt_check_callbacks(cpu); 1194 rcu_preempt_check_callbacks(cpu);
1195 raise_softirq(RCU_SOFTIRQ); 1195 raise_softirq(RCU_SOFTIRQ);
1196 } 1196 }
1197 1197
1198 #ifdef CONFIG_SMP 1198 #ifdef CONFIG_SMP
1199 1199
1200 /* 1200 /*
1201 * Scan the leaf rcu_node structures, processing dyntick state for any that 1201 * Scan the leaf rcu_node structures, processing dyntick state for any that
1202 * have not yet encountered a quiescent state, using the function specified. 1202 * have not yet encountered a quiescent state, using the function specified.
1203 * The caller must have suppressed start of new grace periods. 1203 * The caller must have suppressed start of new grace periods.
1204 */ 1204 */
1205 static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) 1205 static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
1206 { 1206 {
1207 unsigned long bit; 1207 unsigned long bit;
1208 int cpu; 1208 int cpu;
1209 unsigned long flags; 1209 unsigned long flags;
1210 unsigned long mask; 1210 unsigned long mask;
1211 struct rcu_node *rnp; 1211 struct rcu_node *rnp;
1212 1212
1213 rcu_for_each_leaf_node(rsp, rnp) { 1213 rcu_for_each_leaf_node(rsp, rnp) {
1214 mask = 0; 1214 mask = 0;
1215 raw_spin_lock_irqsave(&rnp->lock, flags); 1215 raw_spin_lock_irqsave(&rnp->lock, flags);
1216 if (!rcu_gp_in_progress(rsp)) { 1216 if (!rcu_gp_in_progress(rsp)) {
1217 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1217 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1218 return; 1218 return;
1219 } 1219 }
1220 if (rnp->qsmask == 0) { 1220 if (rnp->qsmask == 0) {
1221 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1221 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1222 continue; 1222 continue;
1223 } 1223 }
1224 cpu = rnp->grplo; 1224 cpu = rnp->grplo;
1225 bit = 1; 1225 bit = 1;
1226 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { 1226 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
1227 if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu])) 1227 if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu]))
1228 mask |= bit; 1228 mask |= bit;
1229 } 1229 }
1230 if (mask != 0) { 1230 if (mask != 0) {
1231 1231
1232 /* rcu_report_qs_rnp() releases rnp->lock. */ 1232 /* rcu_report_qs_rnp() releases rnp->lock. */
1233 rcu_report_qs_rnp(mask, rsp, rnp, flags); 1233 rcu_report_qs_rnp(mask, rsp, rnp, flags);
1234 continue; 1234 continue;
1235 } 1235 }
1236 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1236 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1237 } 1237 }
1238 } 1238 }
1239 1239
1240 /* 1240 /*
1241 * Force quiescent states on reluctant CPUs, and also detect which 1241 * Force quiescent states on reluctant CPUs, and also detect which
1242 * CPUs are in dyntick-idle mode. 1242 * CPUs are in dyntick-idle mode.
1243 */ 1243 */
1244 static void force_quiescent_state(struct rcu_state *rsp, int relaxed) 1244 static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1245 { 1245 {
1246 unsigned long flags; 1246 unsigned long flags;
1247 struct rcu_node *rnp = rcu_get_root(rsp); 1247 struct rcu_node *rnp = rcu_get_root(rsp);
1248 1248
1249 if (!rcu_gp_in_progress(rsp)) 1249 if (!rcu_gp_in_progress(rsp))
1250 return; /* No grace period in progress, nothing to force. */ 1250 return; /* No grace period in progress, nothing to force. */
1251 if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) { 1251 if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) {
1252 rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ 1252 rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */
1253 return; /* Someone else is already on the job. */ 1253 return; /* Someone else is already on the job. */
1254 } 1254 }
1255 if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) 1255 if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies))
1256 goto unlock_fqs_ret; /* no emergency and done recently. */ 1256 goto unlock_fqs_ret; /* no emergency and done recently. */
1257 rsp->n_force_qs++; 1257 rsp->n_force_qs++;
1258 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 1258 raw_spin_lock(&rnp->lock); /* irqs already disabled */
1259 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 1259 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
1260 if(!rcu_gp_in_progress(rsp)) { 1260 if(!rcu_gp_in_progress(rsp)) {
1261 rsp->n_force_qs_ngp++; 1261 rsp->n_force_qs_ngp++;
1262 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 1262 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
1263 goto unlock_fqs_ret; /* no GP in progress, time updated. */ 1263 goto unlock_fqs_ret; /* no GP in progress, time updated. */
1264 } 1264 }
1265 rsp->fqs_active = 1; 1265 rsp->fqs_active = 1;
1266 switch (rsp->signaled) { 1266 switch (rsp->signaled) {
1267 case RCU_GP_IDLE: 1267 case RCU_GP_IDLE:
1268 case RCU_GP_INIT: 1268 case RCU_GP_INIT:
1269 1269
1270 break; /* grace period idle or initializing, ignore. */ 1270 break; /* grace period idle or initializing, ignore. */
1271 1271
1272 case RCU_SAVE_DYNTICK: 1272 case RCU_SAVE_DYNTICK:
1273 if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) 1273 if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK)
1274 break; /* So gcc recognizes the dead code. */ 1274 break; /* So gcc recognizes the dead code. */
1275 1275
1276 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 1276 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
1277 1277
1278 /* Record dyntick-idle state. */ 1278 /* Record dyntick-idle state. */
1279 force_qs_rnp(rsp, dyntick_save_progress_counter); 1279 force_qs_rnp(rsp, dyntick_save_progress_counter);
1280 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 1280 raw_spin_lock(&rnp->lock); /* irqs already disabled */
1281 if (rcu_gp_in_progress(rsp)) 1281 if (rcu_gp_in_progress(rsp))
1282 rsp->signaled = RCU_FORCE_QS; 1282 rsp->signaled = RCU_FORCE_QS;
1283 break; 1283 break;
1284 1284
1285 case RCU_FORCE_QS: 1285 case RCU_FORCE_QS:
1286 1286
1287 /* Check dyntick-idle state, send IPI to laggarts. */ 1287 /* Check dyntick-idle state, send IPI to laggarts. */
1288 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 1288 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
1289 force_qs_rnp(rsp, rcu_implicit_dynticks_qs); 1289 force_qs_rnp(rsp, rcu_implicit_dynticks_qs);
1290 1290
1291 /* Leave state in case more forcing is required. */ 1291 /* Leave state in case more forcing is required. */
1292 1292
1293 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 1293 raw_spin_lock(&rnp->lock); /* irqs already disabled */
1294 break; 1294 break;
1295 } 1295 }
1296 rsp->fqs_active = 0; 1296 rsp->fqs_active = 0;
1297 if (rsp->fqs_need_gp) { 1297 if (rsp->fqs_need_gp) {
1298 raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */ 1298 raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */
1299 rsp->fqs_need_gp = 0; 1299 rsp->fqs_need_gp = 0;
1300 rcu_start_gp(rsp, flags); /* releases rnp->lock */ 1300 rcu_start_gp(rsp, flags); /* releases rnp->lock */
1301 return; 1301 return;
1302 } 1302 }
1303 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 1303 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
1304 unlock_fqs_ret: 1304 unlock_fqs_ret:
1305 raw_spin_unlock_irqrestore(&rsp->fqslock, flags); 1305 raw_spin_unlock_irqrestore(&rsp->fqslock, flags);
1306 } 1306 }
1307 1307
1308 #else /* #ifdef CONFIG_SMP */ 1308 #else /* #ifdef CONFIG_SMP */
1309 1309
1310 static void force_quiescent_state(struct rcu_state *rsp, int relaxed) 1310 static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1311 { 1311 {
1312 set_need_resched(); 1312 set_need_resched();
1313 } 1313 }
1314 1314
1315 #endif /* #else #ifdef CONFIG_SMP */ 1315 #endif /* #else #ifdef CONFIG_SMP */
1316 1316
1317 /* 1317 /*
1318 * This does the RCU processing work from softirq context for the 1318 * This does the RCU processing work from softirq context for the
1319 * specified rcu_state and rcu_data structures. This may be called 1319 * specified rcu_state and rcu_data structures. This may be called
1320 * only from the CPU to whom the rdp belongs. 1320 * only from the CPU to whom the rdp belongs.
1321 */ 1321 */
1322 static void 1322 static void
1323 __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) 1323 __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1324 { 1324 {
1325 unsigned long flags; 1325 unsigned long flags;
1326 1326
1327 WARN_ON_ONCE(rdp->beenonline == 0); 1327 WARN_ON_ONCE(rdp->beenonline == 0);
1328 1328
1329 /* 1329 /*
1330 * If an RCU GP has gone long enough, go check for dyntick 1330 * If an RCU GP has gone long enough, go check for dyntick
1331 * idle CPUs and, if needed, send resched IPIs. 1331 * idle CPUs and, if needed, send resched IPIs.
1332 */ 1332 */
1333 if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) 1333 if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
1334 force_quiescent_state(rsp, 1); 1334 force_quiescent_state(rsp, 1);
1335 1335
1336 /* 1336 /*
1337 * Advance callbacks in response to end of earlier grace 1337 * Advance callbacks in response to end of earlier grace
1338 * period that some other CPU ended. 1338 * period that some other CPU ended.
1339 */ 1339 */
1340 rcu_process_gp_end(rsp, rdp); 1340 rcu_process_gp_end(rsp, rdp);
1341 1341
1342 /* Update RCU state based on any recent quiescent states. */ 1342 /* Update RCU state based on any recent quiescent states. */
1343 rcu_check_quiescent_state(rsp, rdp); 1343 rcu_check_quiescent_state(rsp, rdp);
1344 1344
1345 /* Does this CPU require a not-yet-started grace period? */ 1345 /* Does this CPU require a not-yet-started grace period? */
1346 if (cpu_needs_another_gp(rsp, rdp)) { 1346 if (cpu_needs_another_gp(rsp, rdp)) {
1347 raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags); 1347 raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags);
1348 rcu_start_gp(rsp, flags); /* releases above lock */ 1348 rcu_start_gp(rsp, flags); /* releases above lock */
1349 } 1349 }
1350 1350
1351 /* If there are callbacks ready, invoke them. */ 1351 /* If there are callbacks ready, invoke them. */
1352 rcu_do_batch(rsp, rdp); 1352 rcu_do_batch(rsp, rdp);
1353 } 1353 }
1354 1354
1355 /* 1355 /*
1356 * Do softirq processing for the current CPU. 1356 * Do softirq processing for the current CPU.
1357 */ 1357 */
1358 static void rcu_process_callbacks(struct softirq_action *unused) 1358 static void rcu_process_callbacks(struct softirq_action *unused)
1359 { 1359 {
1360 /* 1360 /*
1361 * Memory references from any prior RCU read-side critical sections 1361 * Memory references from any prior RCU read-side critical sections
1362 * executed by the interrupted code must be seen before any RCU 1362 * executed by the interrupted code must be seen before any RCU
1363 * grace-period manipulations below. 1363 * grace-period manipulations below.
1364 */ 1364 */
1365 smp_mb(); /* See above block comment. */ 1365 smp_mb(); /* See above block comment. */
1366 1366
1367 __rcu_process_callbacks(&rcu_sched_state, 1367 __rcu_process_callbacks(&rcu_sched_state,
1368 &__get_cpu_var(rcu_sched_data)); 1368 &__get_cpu_var(rcu_sched_data));
1369 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); 1369 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
1370 rcu_preempt_process_callbacks(); 1370 rcu_preempt_process_callbacks();
1371 1371
1372 /* 1372 /*
1373 * Memory references from any later RCU read-side critical sections 1373 * Memory references from any later RCU read-side critical sections
1374 * executed by the interrupted code must be seen after any RCU 1374 * executed by the interrupted code must be seen after any RCU
1375 * grace-period manipulations above. 1375 * grace-period manipulations above.
1376 */ 1376 */
1377 smp_mb(); /* See above block comment. */ 1377 smp_mb(); /* See above block comment. */
1378 1378
1379 /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ 1379 /* If we are last CPU on way to dyntick-idle mode, accelerate it. */
1380 rcu_needs_cpu_flush(); 1380 rcu_needs_cpu_flush();
1381 } 1381 }
1382 1382
1383 static void 1383 static void
1384 __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), 1384 __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1385 struct rcu_state *rsp) 1385 struct rcu_state *rsp)
1386 { 1386 {
1387 unsigned long flags; 1387 unsigned long flags;
1388 struct rcu_data *rdp; 1388 struct rcu_data *rdp;
1389 1389
1390 head->func = func; 1390 head->func = func;
1391 head->next = NULL; 1391 head->next = NULL;
1392 1392
1393 smp_mb(); /* Ensure RCU update seen before callback registry. */ 1393 smp_mb(); /* Ensure RCU update seen before callback registry. */
1394 1394
1395 /* 1395 /*
1396 * Opportunistically note grace-period endings and beginnings. 1396 * Opportunistically note grace-period endings and beginnings.
1397 * Note that we might see a beginning right after we see an 1397 * Note that we might see a beginning right after we see an
1398 * end, but never vice versa, since this CPU has to pass through 1398 * end, but never vice versa, since this CPU has to pass through
1399 * a quiescent state betweentimes. 1399 * a quiescent state betweentimes.
1400 */ 1400 */
1401 local_irq_save(flags); 1401 local_irq_save(flags);
1402 rdp = rsp->rda[smp_processor_id()]; 1402 rdp = rsp->rda[smp_processor_id()];
1403 rcu_process_gp_end(rsp, rdp); 1403 rcu_process_gp_end(rsp, rdp);
1404 check_for_new_grace_period(rsp, rdp); 1404 check_for_new_grace_period(rsp, rdp);
1405 1405
1406 /* Add the callback to our list. */ 1406 /* Add the callback to our list. */
1407 *rdp->nxttail[RCU_NEXT_TAIL] = head; 1407 *rdp->nxttail[RCU_NEXT_TAIL] = head;
1408 rdp->nxttail[RCU_NEXT_TAIL] = &head->next; 1408 rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
1409 1409
1410 /* Start a new grace period if one not already started. */ 1410 /* Start a new grace period if one not already started. */
1411 if (!rcu_gp_in_progress(rsp)) { 1411 if (!rcu_gp_in_progress(rsp)) {
1412 unsigned long nestflag; 1412 unsigned long nestflag;
1413 struct rcu_node *rnp_root = rcu_get_root(rsp); 1413 struct rcu_node *rnp_root = rcu_get_root(rsp);
1414 1414
1415 raw_spin_lock_irqsave(&rnp_root->lock, nestflag); 1415 raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
1416 rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */ 1416 rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */
1417 } 1417 }
1418 1418
1419 /* 1419 /*
1420 * Force the grace period if too many callbacks or too long waiting. 1420 * Force the grace period if too many callbacks or too long waiting.
1421 * Enforce hysteresis, and don't invoke force_quiescent_state() 1421 * Enforce hysteresis, and don't invoke force_quiescent_state()
1422 * if some other CPU has recently done so. Also, don't bother 1422 * if some other CPU has recently done so. Also, don't bother
1423 * invoking force_quiescent_state() if the newly enqueued callback 1423 * invoking force_quiescent_state() if the newly enqueued callback
1424 * is the only one waiting for a grace period to complete. 1424 * is the only one waiting for a grace period to complete.
1425 */ 1425 */
1426 if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { 1426 if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
1427 rdp->blimit = LONG_MAX; 1427 rdp->blimit = LONG_MAX;
1428 if (rsp->n_force_qs == rdp->n_force_qs_snap && 1428 if (rsp->n_force_qs == rdp->n_force_qs_snap &&
1429 *rdp->nxttail[RCU_DONE_TAIL] != head) 1429 *rdp->nxttail[RCU_DONE_TAIL] != head)
1430 force_quiescent_state(rsp, 0); 1430 force_quiescent_state(rsp, 0);
1431 rdp->n_force_qs_snap = rsp->n_force_qs; 1431 rdp->n_force_qs_snap = rsp->n_force_qs;
1432 rdp->qlen_last_fqs_check = rdp->qlen; 1432 rdp->qlen_last_fqs_check = rdp->qlen;
1433 } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) 1433 } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
1434 force_quiescent_state(rsp, 1); 1434 force_quiescent_state(rsp, 1);
1435 local_irq_restore(flags); 1435 local_irq_restore(flags);
1436 } 1436 }
1437 1437
1438 /* 1438 /*
1439 * Queue an RCU-sched callback for invocation after a grace period. 1439 * Queue an RCU-sched callback for invocation after a grace period.
1440 */ 1440 */
1441 void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 1441 void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1442 { 1442 {
1443 __call_rcu(head, func, &rcu_sched_state); 1443 __call_rcu(head, func, &rcu_sched_state);
1444 } 1444 }
1445 EXPORT_SYMBOL_GPL(call_rcu_sched); 1445 EXPORT_SYMBOL_GPL(call_rcu_sched);
1446 1446
1447 /* 1447 /*
1448 * Queue an RCU for invocation after a quicker grace period. 1448 * Queue an RCU for invocation after a quicker grace period.
1449 */ 1449 */
1450 void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 1450 void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1451 { 1451 {
1452 __call_rcu(head, func, &rcu_bh_state); 1452 __call_rcu(head, func, &rcu_bh_state);
1453 } 1453 }
1454 EXPORT_SYMBOL_GPL(call_rcu_bh); 1454 EXPORT_SYMBOL_GPL(call_rcu_bh);
1455 1455
1456 /** 1456 /**
1457 * synchronize_sched - wait until an rcu-sched grace period has elapsed. 1457 * synchronize_sched - wait until an rcu-sched grace period has elapsed.
1458 * 1458 *
1459 * Control will return to the caller some time after a full rcu-sched 1459 * Control will return to the caller some time after a full rcu-sched
1460 * grace period has elapsed, in other words after all currently executing 1460 * grace period has elapsed, in other words after all currently executing
1461 * rcu-sched read-side critical sections have completed. These read-side 1461 * rcu-sched read-side critical sections have completed. These read-side
1462 * critical sections are delimited by rcu_read_lock_sched() and 1462 * critical sections are delimited by rcu_read_lock_sched() and
1463 * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(), 1463 * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(),
1464 * local_irq_disable(), and so on may be used in place of 1464 * local_irq_disable(), and so on may be used in place of
1465 * rcu_read_lock_sched(). 1465 * rcu_read_lock_sched().
1466 * 1466 *
1467 * This means that all preempt_disable code sequences, including NMI and 1467 * This means that all preempt_disable code sequences, including NMI and
1468 * hardware-interrupt handlers, in progress on entry will have completed 1468 * hardware-interrupt handlers, in progress on entry will have completed
1469 * before this primitive returns. However, this does not guarantee that 1469 * before this primitive returns. However, this does not guarantee that
1470 * softirq handlers will have completed, since in some kernels, these 1470 * softirq handlers will have completed, since in some kernels, these
1471 * handlers can run in process context, and can block. 1471 * handlers can run in process context, and can block.
1472 * 1472 *
1473 * This primitive provides the guarantees made by the (now removed) 1473 * This primitive provides the guarantees made by the (now removed)
1474 * synchronize_kernel() API. In contrast, synchronize_rcu() only 1474 * synchronize_kernel() API. In contrast, synchronize_rcu() only
1475 * guarantees that rcu_read_lock() sections will have completed. 1475 * guarantees that rcu_read_lock() sections will have completed.
1476 * In "classic RCU", these two guarantees happen to be one and 1476 * In "classic RCU", these two guarantees happen to be one and
1477 * the same, but can differ in realtime RCU implementations. 1477 * the same, but can differ in realtime RCU implementations.
1478 */ 1478 */
1479 void synchronize_sched(void) 1479 void synchronize_sched(void)
1480 { 1480 {
1481 struct rcu_synchronize rcu; 1481 struct rcu_synchronize rcu;
1482 1482
1483 if (rcu_blocking_is_gp()) 1483 if (rcu_blocking_is_gp())
1484 return; 1484 return;
1485 1485
1486 init_completion(&rcu.completion); 1486 init_completion(&rcu.completion);
1487 /* Will wake me after RCU finished. */ 1487 /* Will wake me after RCU finished. */
1488 call_rcu_sched(&rcu.head, wakeme_after_rcu); 1488 call_rcu_sched(&rcu.head, wakeme_after_rcu);
1489 /* Wait for it. */ 1489 /* Wait for it. */
1490 wait_for_completion(&rcu.completion); 1490 wait_for_completion(&rcu.completion);
1491 } 1491 }
1492 EXPORT_SYMBOL_GPL(synchronize_sched); 1492 EXPORT_SYMBOL_GPL(synchronize_sched);
1493 1493
1494 /** 1494 /**
1495 * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. 1495 * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
1496 * 1496 *
1497 * Control will return to the caller some time after a full rcu_bh grace 1497 * Control will return to the caller some time after a full rcu_bh grace
1498 * period has elapsed, in other words after all currently executing rcu_bh 1498 * period has elapsed, in other words after all currently executing rcu_bh
1499 * read-side critical sections have completed. RCU read-side critical 1499 * read-side critical sections have completed. RCU read-side critical
1500 * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(), 1500 * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
1501 * and may be nested. 1501 * and may be nested.
1502 */ 1502 */
1503 void synchronize_rcu_bh(void) 1503 void synchronize_rcu_bh(void)
1504 { 1504 {
1505 struct rcu_synchronize rcu; 1505 struct rcu_synchronize rcu;
1506 1506
1507 if (rcu_blocking_is_gp()) 1507 if (rcu_blocking_is_gp())
1508 return; 1508 return;
1509 1509
1510 init_completion(&rcu.completion); 1510 init_completion(&rcu.completion);
1511 /* Will wake me after RCU finished. */ 1511 /* Will wake me after RCU finished. */
1512 call_rcu_bh(&rcu.head, wakeme_after_rcu); 1512 call_rcu_bh(&rcu.head, wakeme_after_rcu);
1513 /* Wait for it. */ 1513 /* Wait for it. */
1514 wait_for_completion(&rcu.completion); 1514 wait_for_completion(&rcu.completion);
1515 } 1515 }
1516 EXPORT_SYMBOL_GPL(synchronize_rcu_bh); 1516 EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
1517 1517
1518 /* 1518 /*
1519 * Check to see if there is any immediate RCU-related work to be done 1519 * Check to see if there is any immediate RCU-related work to be done
1520 * by the current CPU, for the specified type of RCU, returning 1 if so. 1520 * by the current CPU, for the specified type of RCU, returning 1 if so.
1521 * The checks are in order of increasing expense: checks that can be 1521 * The checks are in order of increasing expense: checks that can be
1522 * carried out against CPU-local state are performed first. However, 1522 * carried out against CPU-local state are performed first. However,
1523 * we must check for CPU stalls first, else we might not get a chance. 1523 * we must check for CPU stalls first, else we might not get a chance.
1524 */ 1524 */
1525 static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) 1525 static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
1526 { 1526 {
1527 struct rcu_node *rnp = rdp->mynode; 1527 struct rcu_node *rnp = rdp->mynode;
1528 1528
1529 rdp->n_rcu_pending++; 1529 rdp->n_rcu_pending++;
1530 1530
1531 /* Check for CPU stalls, if enabled. */ 1531 /* Check for CPU stalls, if enabled. */
1532 check_cpu_stall(rsp, rdp); 1532 check_cpu_stall(rsp, rdp);
1533 1533
1534 /* Is the RCU core waiting for a quiescent state from this CPU? */ 1534 /* Is the RCU core waiting for a quiescent state from this CPU? */
1535 if (rdp->qs_pending) { 1535 if (rdp->qs_pending) {
1536 1536
1537 /* 1537 /*
1538 * If force_quiescent_state() coming soon and this CPU 1538 * If force_quiescent_state() coming soon and this CPU
1539 * needs a quiescent state, and this is either RCU-sched 1539 * needs a quiescent state, and this is either RCU-sched
1540 * or RCU-bh, force a local reschedule. 1540 * or RCU-bh, force a local reschedule.
1541 */ 1541 */
1542 if (!rdp->preemptable && 1542 if (!rdp->preemptable &&
1543 ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, 1543 ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1,
1544 jiffies)) 1544 jiffies))
1545 set_need_resched(); 1545 set_need_resched();
1546 rdp->n_rp_qs_pending++; 1546 rdp->n_rp_qs_pending++;
1547 return 1; 1547 return 1;
1548 } 1548 }
1549 1549
1550 /* Does this CPU have callbacks ready to invoke? */ 1550 /* Does this CPU have callbacks ready to invoke? */
1551 if (cpu_has_callbacks_ready_to_invoke(rdp)) { 1551 if (cpu_has_callbacks_ready_to_invoke(rdp)) {
1552 rdp->n_rp_cb_ready++; 1552 rdp->n_rp_cb_ready++;
1553 return 1; 1553 return 1;
1554 } 1554 }
1555 1555
1556 /* Has RCU gone idle with this CPU needing another grace period? */ 1556 /* Has RCU gone idle with this CPU needing another grace period? */
1557 if (cpu_needs_another_gp(rsp, rdp)) { 1557 if (cpu_needs_another_gp(rsp, rdp)) {
1558 rdp->n_rp_cpu_needs_gp++; 1558 rdp->n_rp_cpu_needs_gp++;
1559 return 1; 1559 return 1;
1560 } 1560 }
1561 1561
1562 /* Has another RCU grace period completed? */ 1562 /* Has another RCU grace period completed? */
1563 if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */ 1563 if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */
1564 rdp->n_rp_gp_completed++; 1564 rdp->n_rp_gp_completed++;
1565 return 1; 1565 return 1;
1566 } 1566 }
1567 1567
1568 /* Has a new RCU grace period started? */ 1568 /* Has a new RCU grace period started? */
1569 if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */ 1569 if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */
1570 rdp->n_rp_gp_started++; 1570 rdp->n_rp_gp_started++;
1571 return 1; 1571 return 1;
1572 } 1572 }
1573 1573
1574 /* Has an RCU GP gone long enough to send resched IPIs &c? */ 1574 /* Has an RCU GP gone long enough to send resched IPIs &c? */
1575 if (rcu_gp_in_progress(rsp) && 1575 if (rcu_gp_in_progress(rsp) &&
1576 ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) { 1576 ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) {
1577 rdp->n_rp_need_fqs++; 1577 rdp->n_rp_need_fqs++;
1578 return 1; 1578 return 1;
1579 } 1579 }
1580 1580
1581 /* nothing to do */ 1581 /* nothing to do */
1582 rdp->n_rp_need_nothing++; 1582 rdp->n_rp_need_nothing++;
1583 return 0; 1583 return 0;
1584 } 1584 }
1585 1585
1586 /* 1586 /*
1587 * Check to see if there is any immediate RCU-related work to be done 1587 * Check to see if there is any immediate RCU-related work to be done
1588 * by the current CPU, returning 1 if so. This function is part of the 1588 * by the current CPU, returning 1 if so. This function is part of the
1589 * RCU implementation; it is -not- an exported member of the RCU API. 1589 * RCU implementation; it is -not- an exported member of the RCU API.
1590 */ 1590 */
1591 static int rcu_pending(int cpu) 1591 static int rcu_pending(int cpu)
1592 { 1592 {
1593 return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || 1593 return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) ||
1594 __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) || 1594 __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) ||
1595 rcu_preempt_pending(cpu); 1595 rcu_preempt_pending(cpu);
1596 } 1596 }
1597 1597
1598 /* 1598 /*
1599 * Check to see if any future RCU-related work will need to be done 1599 * Check to see if any future RCU-related work will need to be done
1600 * by the current CPU, even if none need be done immediately, returning 1600 * by the current CPU, even if none need be done immediately, returning
1601 * 1 if so. 1601 * 1 if so.
1602 */ 1602 */
1603 static int rcu_needs_cpu_quick_check(int cpu) 1603 static int rcu_needs_cpu_quick_check(int cpu)
1604 { 1604 {
1605 /* RCU callbacks either ready or pending? */ 1605 /* RCU callbacks either ready or pending? */
1606 return per_cpu(rcu_sched_data, cpu).nxtlist || 1606 return per_cpu(rcu_sched_data, cpu).nxtlist ||
1607 per_cpu(rcu_bh_data, cpu).nxtlist || 1607 per_cpu(rcu_bh_data, cpu).nxtlist ||
1608 rcu_preempt_needs_cpu(cpu); 1608 rcu_preempt_needs_cpu(cpu);
1609 } 1609 }
1610 1610
1611 static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; 1611 static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
1612 static atomic_t rcu_barrier_cpu_count; 1612 static atomic_t rcu_barrier_cpu_count;
1613 static DEFINE_MUTEX(rcu_barrier_mutex); 1613 static DEFINE_MUTEX(rcu_barrier_mutex);
1614 static struct completion rcu_barrier_completion; 1614 static struct completion rcu_barrier_completion;
1615 1615
1616 static void rcu_barrier_callback(struct rcu_head *notused) 1616 static void rcu_barrier_callback(struct rcu_head *notused)
1617 { 1617 {
1618 if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 1618 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
1619 complete(&rcu_barrier_completion); 1619 complete(&rcu_barrier_completion);
1620 } 1620 }
1621 1621
1622 /* 1622 /*
1623 * Called with preemption disabled, and from cross-cpu IRQ context. 1623 * Called with preemption disabled, and from cross-cpu IRQ context.
1624 */ 1624 */
1625 static void rcu_barrier_func(void *type) 1625 static void rcu_barrier_func(void *type)
1626 { 1626 {
1627 int cpu = smp_processor_id(); 1627 int cpu = smp_processor_id();
1628 struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); 1628 struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
1629 void (*call_rcu_func)(struct rcu_head *head, 1629 void (*call_rcu_func)(struct rcu_head *head,
1630 void (*func)(struct rcu_head *head)); 1630 void (*func)(struct rcu_head *head));
1631 1631
1632 atomic_inc(&rcu_barrier_cpu_count); 1632 atomic_inc(&rcu_barrier_cpu_count);
1633 call_rcu_func = type; 1633 call_rcu_func = type;
1634 call_rcu_func(head, rcu_barrier_callback); 1634 call_rcu_func(head, rcu_barrier_callback);
1635 } 1635 }
1636 1636
1637 /* 1637 /*
1638 * Orchestrate the specified type of RCU barrier, waiting for all 1638 * Orchestrate the specified type of RCU barrier, waiting for all
1639 * RCU callbacks of the specified type to complete. 1639 * RCU callbacks of the specified type to complete.
1640 */ 1640 */
1641 static void _rcu_barrier(struct rcu_state *rsp, 1641 static void _rcu_barrier(struct rcu_state *rsp,
1642 void (*call_rcu_func)(struct rcu_head *head, 1642 void (*call_rcu_func)(struct rcu_head *head,
1643 void (*func)(struct rcu_head *head))) 1643 void (*func)(struct rcu_head *head)))
1644 { 1644 {
1645 BUG_ON(in_interrupt()); 1645 BUG_ON(in_interrupt());
1646 /* Take mutex to serialize concurrent rcu_barrier() requests. */ 1646 /* Take mutex to serialize concurrent rcu_barrier() requests. */
1647 mutex_lock(&rcu_barrier_mutex); 1647 mutex_lock(&rcu_barrier_mutex);
1648 init_completion(&rcu_barrier_completion); 1648 init_completion(&rcu_barrier_completion);
1649 /* 1649 /*
1650 * Initialize rcu_barrier_cpu_count to 1, then invoke 1650 * Initialize rcu_barrier_cpu_count to 1, then invoke
1651 * rcu_barrier_func() on each CPU, so that each CPU also has 1651 * rcu_barrier_func() on each CPU, so that each CPU also has
1652 * incremented rcu_barrier_cpu_count. Only then is it safe to 1652 * incremented rcu_barrier_cpu_count. Only then is it safe to
1653 * decrement rcu_barrier_cpu_count -- otherwise the first CPU 1653 * decrement rcu_barrier_cpu_count -- otherwise the first CPU
1654 * might complete its grace period before all of the other CPUs 1654 * might complete its grace period before all of the other CPUs
1655 * did their increment, causing this function to return too 1655 * did their increment, causing this function to return too
1656 * early. 1656 * early.
1657 */ 1657 */
1658 atomic_set(&rcu_barrier_cpu_count, 1); 1658 atomic_set(&rcu_barrier_cpu_count, 1);
1659 preempt_disable(); /* stop CPU_DYING from filling orphan_cbs_list */ 1659 preempt_disable(); /* stop CPU_DYING from filling orphan_cbs_list */
1660 rcu_adopt_orphan_cbs(rsp); 1660 rcu_adopt_orphan_cbs(rsp);
1661 on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); 1661 on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1);
1662 preempt_enable(); /* CPU_DYING can again fill orphan_cbs_list */ 1662 preempt_enable(); /* CPU_DYING can again fill orphan_cbs_list */
1663 if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 1663 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
1664 complete(&rcu_barrier_completion); 1664 complete(&rcu_barrier_completion);
1665 wait_for_completion(&rcu_barrier_completion); 1665 wait_for_completion(&rcu_barrier_completion);
1666 mutex_unlock(&rcu_barrier_mutex); 1666 mutex_unlock(&rcu_barrier_mutex);
1667 } 1667 }
1668 1668
1669 /** 1669 /**
1670 * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. 1670 * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
1671 */ 1671 */
1672 void rcu_barrier_bh(void) 1672 void rcu_barrier_bh(void)
1673 { 1673 {
1674 _rcu_barrier(&rcu_bh_state, call_rcu_bh); 1674 _rcu_barrier(&rcu_bh_state, call_rcu_bh);
1675 } 1675 }
1676 EXPORT_SYMBOL_GPL(rcu_barrier_bh); 1676 EXPORT_SYMBOL_GPL(rcu_barrier_bh);
1677 1677
1678 /** 1678 /**
1679 * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. 1679 * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
1680 */ 1680 */
1681 void rcu_barrier_sched(void) 1681 void rcu_barrier_sched(void)
1682 { 1682 {
1683 _rcu_barrier(&rcu_sched_state, call_rcu_sched); 1683 _rcu_barrier(&rcu_sched_state, call_rcu_sched);
1684 } 1684 }
1685 EXPORT_SYMBOL_GPL(rcu_barrier_sched); 1685 EXPORT_SYMBOL_GPL(rcu_barrier_sched);
1686 1686
1687 /* 1687 /*
1688 * Do boot-time initialization of a CPU's per-CPU RCU data. 1688 * Do boot-time initialization of a CPU's per-CPU RCU data.
1689 */ 1689 */
1690 static void __init 1690 static void __init
1691 rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) 1691 rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
1692 { 1692 {
1693 unsigned long flags; 1693 unsigned long flags;
1694 int i; 1694 int i;
1695 struct rcu_data *rdp = rsp->rda[cpu]; 1695 struct rcu_data *rdp = rsp->rda[cpu];
1696 struct rcu_node *rnp = rcu_get_root(rsp); 1696 struct rcu_node *rnp = rcu_get_root(rsp);
1697 1697
1698 /* Set up local state, ensuring consistent view of global state. */ 1698 /* Set up local state, ensuring consistent view of global state. */
1699 raw_spin_lock_irqsave(&rnp->lock, flags); 1699 raw_spin_lock_irqsave(&rnp->lock, flags);
1700 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); 1700 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
1701 rdp->nxtlist = NULL; 1701 rdp->nxtlist = NULL;
1702 for (i = 0; i < RCU_NEXT_SIZE; i++) 1702 for (i = 0; i < RCU_NEXT_SIZE; i++)
1703 rdp->nxttail[i] = &rdp->nxtlist; 1703 rdp->nxttail[i] = &rdp->nxtlist;
1704 rdp->qlen = 0; 1704 rdp->qlen = 0;
1705 #ifdef CONFIG_NO_HZ 1705 #ifdef CONFIG_NO_HZ
1706 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 1706 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
1707 #endif /* #ifdef CONFIG_NO_HZ */ 1707 #endif /* #ifdef CONFIG_NO_HZ */
1708 rdp->cpu = cpu; 1708 rdp->cpu = cpu;
1709 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1709 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1710 } 1710 }
1711 1711
1712 /* 1712 /*
1713 * Initialize a CPU's per-CPU RCU data. Note that only one online or 1713 * Initialize a CPU's per-CPU RCU data. Note that only one online or
1714 * offline event can be happening at a given time. Note also that we 1714 * offline event can be happening at a given time. Note also that we
1715 * can accept some slop in the rsp->completed access due to the fact 1715 * can accept some slop in the rsp->completed access due to the fact
1716 * that this CPU cannot possibly have any RCU callbacks in flight yet. 1716 * that this CPU cannot possibly have any RCU callbacks in flight yet.
1717 */ 1717 */
1718 static void __cpuinit 1718 static void __cpuinit
1719 rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) 1719 rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
1720 { 1720 {
1721 unsigned long flags; 1721 unsigned long flags;
1722 unsigned long mask; 1722 unsigned long mask;
1723 struct rcu_data *rdp = rsp->rda[cpu]; 1723 struct rcu_data *rdp = rsp->rda[cpu];
1724 struct rcu_node *rnp = rcu_get_root(rsp); 1724 struct rcu_node *rnp = rcu_get_root(rsp);
1725 1725
1726 /* Set up local state, ensuring consistent view of global state. */ 1726 /* Set up local state, ensuring consistent view of global state. */
1727 raw_spin_lock_irqsave(&rnp->lock, flags); 1727 raw_spin_lock_irqsave(&rnp->lock, flags);
1728 rdp->passed_quiesc = 0; /* We could be racing with new GP, */ 1728 rdp->passed_quiesc = 0; /* We could be racing with new GP, */
1729 rdp->qs_pending = 1; /* so set up to respond to current GP. */ 1729 rdp->qs_pending = 1; /* so set up to respond to current GP. */
1730 rdp->beenonline = 1; /* We have now been online. */ 1730 rdp->beenonline = 1; /* We have now been online. */
1731 rdp->preemptable = preemptable; 1731 rdp->preemptable = preemptable;
1732 rdp->qlen_last_fqs_check = 0; 1732 rdp->qlen_last_fqs_check = 0;
1733 rdp->n_force_qs_snap = rsp->n_force_qs; 1733 rdp->n_force_qs_snap = rsp->n_force_qs;
1734 rdp->blimit = blimit; 1734 rdp->blimit = blimit;
1735 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1735 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1736 1736
1737 /* 1737 /*
1738 * A new grace period might start here. If so, we won't be part 1738 * A new grace period might start here. If so, we won't be part
1739 * of it, but that is OK, as we are currently in a quiescent state. 1739 * of it, but that is OK, as we are currently in a quiescent state.
1740 */ 1740 */
1741 1741
1742 /* Exclude any attempts to start a new GP on large systems. */ 1742 /* Exclude any attempts to start a new GP on large systems. */
1743 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ 1743 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
1744 1744
1745 /* Add CPU to rcu_node bitmasks. */ 1745 /* Add CPU to rcu_node bitmasks. */
1746 rnp = rdp->mynode; 1746 rnp = rdp->mynode;
1747 mask = rdp->grpmask; 1747 mask = rdp->grpmask;
1748 do { 1748 do {
1749 /* Exclude any attempts to start a new GP on small systems. */ 1749 /* Exclude any attempts to start a new GP on small systems. */
1750 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 1750 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
1751 rnp->qsmaskinit |= mask; 1751 rnp->qsmaskinit |= mask;
1752 mask = rnp->grpmask; 1752 mask = rnp->grpmask;
1753 if (rnp == rdp->mynode) { 1753 if (rnp == rdp->mynode) {
1754 rdp->gpnum = rnp->completed; /* if GP in progress... */ 1754 rdp->gpnum = rnp->completed; /* if GP in progress... */
1755 rdp->completed = rnp->completed; 1755 rdp->completed = rnp->completed;
1756 rdp->passed_quiesc_completed = rnp->completed - 1; 1756 rdp->passed_quiesc_completed = rnp->completed - 1;
1757 } 1757 }
1758 raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ 1758 raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
1759 rnp = rnp->parent; 1759 rnp = rnp->parent;
1760 } while (rnp != NULL && !(rnp->qsmaskinit & mask)); 1760 } while (rnp != NULL && !(rnp->qsmaskinit & mask));
1761 1761
1762 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 1762 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
1763 } 1763 }
1764 1764
1765 static void __cpuinit rcu_online_cpu(int cpu) 1765 static void __cpuinit rcu_online_cpu(int cpu)
1766 { 1766 {
1767 rcu_init_percpu_data(cpu, &rcu_sched_state, 0); 1767 rcu_init_percpu_data(cpu, &rcu_sched_state, 0);
1768 rcu_init_percpu_data(cpu, &rcu_bh_state, 0); 1768 rcu_init_percpu_data(cpu, &rcu_bh_state, 0);
1769 rcu_preempt_init_percpu_data(cpu); 1769 rcu_preempt_init_percpu_data(cpu);
1770 } 1770 }
1771 1771
1772 /* 1772 /*
1773 * Handle CPU online/offline notification events. 1773 * Handle CPU online/offline notification events.
1774 */ 1774 */
1775 static int __cpuinit rcu_cpu_notify(struct notifier_block *self, 1775 static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
1776 unsigned long action, void *hcpu) 1776 unsigned long action, void *hcpu)
1777 { 1777 {
1778 long cpu = (long)hcpu; 1778 long cpu = (long)hcpu;
1779 1779
1780 switch (action) { 1780 switch (action) {
1781 case CPU_UP_PREPARE: 1781 case CPU_UP_PREPARE:
1782 case CPU_UP_PREPARE_FROZEN: 1782 case CPU_UP_PREPARE_FROZEN:
1783 rcu_online_cpu(cpu); 1783 rcu_online_cpu(cpu);
1784 break; 1784 break;
1785 case CPU_DYING: 1785 case CPU_DYING:
1786 case CPU_DYING_FROZEN: 1786 case CPU_DYING_FROZEN:
1787 /* 1787 /*
1788 * preempt_disable() in _rcu_barrier() prevents stop_machine(), 1788 * preempt_disable() in _rcu_barrier() prevents stop_machine(),
1789 * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);" 1789 * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);"
1790 * returns, all online cpus have queued rcu_barrier_func(). 1790 * returns, all online cpus have queued rcu_barrier_func().
1791 * The dying CPU clears its cpu_online_mask bit and 1791 * The dying CPU clears its cpu_online_mask bit and
1792 * moves all of its RCU callbacks to ->orphan_cbs_list 1792 * moves all of its RCU callbacks to ->orphan_cbs_list
1793 * in the context of stop_machine(), so subsequent calls 1793 * in the context of stop_machine(), so subsequent calls
1794 * to _rcu_barrier() will adopt these callbacks and only 1794 * to _rcu_barrier() will adopt these callbacks and only
1795 * then queue rcu_barrier_func() on all remaining CPUs. 1795 * then queue rcu_barrier_func() on all remaining CPUs.
1796 */ 1796 */
1797 rcu_send_cbs_to_orphanage(&rcu_bh_state); 1797 rcu_send_cbs_to_orphanage(&rcu_bh_state);
1798 rcu_send_cbs_to_orphanage(&rcu_sched_state); 1798 rcu_send_cbs_to_orphanage(&rcu_sched_state);
1799 rcu_preempt_send_cbs_to_orphanage(); 1799 rcu_preempt_send_cbs_to_orphanage();
1800 break; 1800 break;
1801 case CPU_DEAD: 1801 case CPU_DEAD:
1802 case CPU_DEAD_FROZEN: 1802 case CPU_DEAD_FROZEN:
1803 case CPU_UP_CANCELED: 1803 case CPU_UP_CANCELED:
1804 case CPU_UP_CANCELED_FROZEN: 1804 case CPU_UP_CANCELED_FROZEN:
1805 rcu_offline_cpu(cpu); 1805 rcu_offline_cpu(cpu);
1806 break; 1806 break;
1807 default: 1807 default:
1808 break; 1808 break;
1809 } 1809 }
1810 return NOTIFY_OK; 1810 return NOTIFY_OK;
1811 } 1811 }
1812 1812
1813 /* 1813 /*
1814 * This function is invoked towards the end of the scheduler's initialization 1814 * This function is invoked towards the end of the scheduler's initialization
1815 * process. Before this is called, the idle task might contain 1815 * process. Before this is called, the idle task might contain
1816 * RCU read-side critical sections (during which time, this idle 1816 * RCU read-side critical sections (during which time, this idle
1817 * task is booting the system). After this function is called, the 1817 * task is booting the system). After this function is called, the
1818 * idle tasks are prohibited from containing RCU read-side critical 1818 * idle tasks are prohibited from containing RCU read-side critical
1819 * sections. This function also enables RCU lockdep checking. 1819 * sections. This function also enables RCU lockdep checking.
1820 */ 1820 */
1821 void rcu_scheduler_starting(void) 1821 void rcu_scheduler_starting(void)
1822 { 1822 {
1823 WARN_ON(num_online_cpus() != 1); 1823 WARN_ON(num_online_cpus() != 1);
1824 WARN_ON(nr_context_switches() > 0); 1824 WARN_ON(nr_context_switches() > 0);
1825 rcu_scheduler_active = 1; 1825 rcu_scheduler_active = 1;
1826 } 1826 }
1827 1827
1828 /* 1828 /*
1829 * Compute the per-level fanout, either using the exact fanout specified 1829 * Compute the per-level fanout, either using the exact fanout specified
1830 * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. 1830 * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT.
1831 */ 1831 */
1832 #ifdef CONFIG_RCU_FANOUT_EXACT 1832 #ifdef CONFIG_RCU_FANOUT_EXACT
1833 static void __init rcu_init_levelspread(struct rcu_state *rsp) 1833 static void __init rcu_init_levelspread(struct rcu_state *rsp)
1834 { 1834 {
1835 int i; 1835 int i;
1836 1836
1837 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) 1837 for (i = NUM_RCU_LVLS - 1; i >= 0; i--)
1838 rsp->levelspread[i] = CONFIG_RCU_FANOUT; 1838 rsp->levelspread[i] = CONFIG_RCU_FANOUT;
1839 } 1839 }
1840 #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ 1840 #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
1841 static void __init rcu_init_levelspread(struct rcu_state *rsp) 1841 static void __init rcu_init_levelspread(struct rcu_state *rsp)
1842 { 1842 {
1843 int ccur; 1843 int ccur;
1844 int cprv; 1844 int cprv;
1845 int i; 1845 int i;
1846 1846
1847 cprv = NR_CPUS; 1847 cprv = NR_CPUS;
1848 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { 1848 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
1849 ccur = rsp->levelcnt[i]; 1849 ccur = rsp->levelcnt[i];
1850 rsp->levelspread[i] = (cprv + ccur - 1) / ccur; 1850 rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
1851 cprv = ccur; 1851 cprv = ccur;
1852 } 1852 }
1853 } 1853 }
1854 #endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */ 1854 #endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */
1855 1855
1856 /* 1856 /*
1857 * Helper function for rcu_init() that initializes one rcu_state structure. 1857 * Helper function for rcu_init() that initializes one rcu_state structure.
1858 */ 1858 */
1859 static void __init rcu_init_one(struct rcu_state *rsp) 1859 static void __init rcu_init_one(struct rcu_state *rsp)
1860 { 1860 {
1861 static char *buf[] = { "rcu_node_level_0", 1861 static char *buf[] = { "rcu_node_level_0",
1862 "rcu_node_level_1", 1862 "rcu_node_level_1",
1863 "rcu_node_level_2", 1863 "rcu_node_level_2",
1864 "rcu_node_level_3" }; /* Match MAX_RCU_LVLS */ 1864 "rcu_node_level_3" }; /* Match MAX_RCU_LVLS */
1865 int cpustride = 1; 1865 int cpustride = 1;
1866 int i; 1866 int i;
1867 int j; 1867 int j;
1868 struct rcu_node *rnp; 1868 struct rcu_node *rnp;
1869 1869
1870 BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */ 1870 BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */
1871 1871
1872 /* Initialize the level-tracking arrays. */ 1872 /* Initialize the level-tracking arrays. */
1873 1873
1874 for (i = 1; i < NUM_RCU_LVLS; i++) 1874 for (i = 1; i < NUM_RCU_LVLS; i++)
1875 rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; 1875 rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
1876 rcu_init_levelspread(rsp); 1876 rcu_init_levelspread(rsp);
1877 1877
1878 /* Initialize the elements themselves, starting from the leaves. */ 1878 /* Initialize the elements themselves, starting from the leaves. */
1879 1879
1880 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { 1880 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
1881 cpustride *= rsp->levelspread[i]; 1881 cpustride *= rsp->levelspread[i];
1882 rnp = rsp->level[i]; 1882 rnp = rsp->level[i];
1883 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { 1883 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
1884 raw_spin_lock_init(&rnp->lock); 1884 raw_spin_lock_init(&rnp->lock);
1885 lockdep_set_class_and_name(&rnp->lock, 1885 lockdep_set_class_and_name(&rnp->lock,
1886 &rcu_node_class[i], buf[i]); 1886 &rcu_node_class[i], buf[i]);
1887 rnp->gpnum = 0; 1887 rnp->gpnum = 0;
1888 rnp->qsmask = 0; 1888 rnp->qsmask = 0;
1889 rnp->qsmaskinit = 0; 1889 rnp->qsmaskinit = 0;
1890 rnp->grplo = j * cpustride; 1890 rnp->grplo = j * cpustride;
1891 rnp->grphi = (j + 1) * cpustride - 1; 1891 rnp->grphi = (j + 1) * cpustride - 1;
1892 if (rnp->grphi >= NR_CPUS) 1892 if (rnp->grphi >= NR_CPUS)
1893 rnp->grphi = NR_CPUS - 1; 1893 rnp->grphi = NR_CPUS - 1;
1894 if (i == 0) { 1894 if (i == 0) {
1895 rnp->grpnum = 0; 1895 rnp->grpnum = 0;
1896 rnp->grpmask = 0; 1896 rnp->grpmask = 0;
1897 rnp->parent = NULL; 1897 rnp->parent = NULL;
1898 } else { 1898 } else {
1899 rnp->grpnum = j % rsp->levelspread[i - 1]; 1899 rnp->grpnum = j % rsp->levelspread[i - 1];
1900 rnp->grpmask = 1UL << rnp->grpnum; 1900 rnp->grpmask = 1UL << rnp->grpnum;
1901 rnp->parent = rsp->level[i - 1] + 1901 rnp->parent = rsp->level[i - 1] +
1902 j / rsp->levelspread[i - 1]; 1902 j / rsp->levelspread[i - 1];
1903 } 1903 }
1904 rnp->level = i; 1904 rnp->level = i;
1905 INIT_LIST_HEAD(&rnp->blocked_tasks[0]); 1905 INIT_LIST_HEAD(&rnp->blocked_tasks[0]);
1906 INIT_LIST_HEAD(&rnp->blocked_tasks[1]); 1906 INIT_LIST_HEAD(&rnp->blocked_tasks[1]);
1907 INIT_LIST_HEAD(&rnp->blocked_tasks[2]); 1907 INIT_LIST_HEAD(&rnp->blocked_tasks[2]);
1908 INIT_LIST_HEAD(&rnp->blocked_tasks[3]); 1908 INIT_LIST_HEAD(&rnp->blocked_tasks[3]);
1909 } 1909 }
1910 } 1910 }
1911 1911
1912 rnp = rsp->level[NUM_RCU_LVLS - 1]; 1912 rnp = rsp->level[NUM_RCU_LVLS - 1];
1913 for_each_possible_cpu(i) { 1913 for_each_possible_cpu(i) {
1914 if (i > rnp->grphi) 1914 if (i > rnp->grphi)
1915 rnp++; 1915 rnp++;
1916 rsp->rda[i]->mynode = rnp; 1916 rsp->rda[i]->mynode = rnp;
1917 rcu_boot_init_percpu_data(i, rsp); 1917 rcu_boot_init_percpu_data(i, rsp);
1918 } 1918 }
1919 } 1919 }
1920 1920
1921 /* 1921 /*
1922 * Helper macro for __rcu_init() and __rcu_init_preempt(). To be used 1922 * Helper macro for __rcu_init() and __rcu_init_preempt(). To be used
1923 * nowhere else! Assigns leaf node pointers into each CPU's rcu_data 1923 * nowhere else! Assigns leaf node pointers into each CPU's rcu_data
1924 * structure. 1924 * structure.
1925 */ 1925 */
1926 #define RCU_INIT_FLAVOR(rsp, rcu_data) \ 1926 #define RCU_INIT_FLAVOR(rsp, rcu_data) \
1927 do { \ 1927 do { \
1928 int i; \ 1928 int i; \
1929 \ 1929 \
1930 for_each_possible_cpu(i) { \ 1930 for_each_possible_cpu(i) { \
1931 (rsp)->rda[i] = &per_cpu(rcu_data, i); \ 1931 (rsp)->rda[i] = &per_cpu(rcu_data, i); \
1932 } \ 1932 } \
1933 rcu_init_one(rsp); \ 1933 rcu_init_one(rsp); \
1934 } while (0) 1934 } while (0)
1935 1935
1936 void __init rcu_init(void) 1936 void __init rcu_init(void)
1937 { 1937 {
1938 int cpu; 1938 int cpu;
1939 1939
1940 rcu_bootup_announce(); 1940 rcu_bootup_announce();
1941 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
1942 printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
1943 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
1944 #if NUM_RCU_LVL_4 != 0
1945 printk(KERN_INFO "Experimental four-level hierarchy is enabled.\n");
1946 #endif /* #if NUM_RCU_LVL_4 != 0 */
1947 RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data); 1941 RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data);
1948 RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data); 1942 RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data);
1949 __rcu_init_preempt(); 1943 __rcu_init_preempt();
1950 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 1944 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
1951 1945
1952 /* 1946 /*
1953 * We don't need protection against CPU-hotplug here because 1947 * We don't need protection against CPU-hotplug here because
1954 * this is called early in boot, before either interrupts 1948 * this is called early in boot, before either interrupts
1955 * or the scheduler are operational. 1949 * or the scheduler are operational.
1956 */ 1950 */
1957 cpu_notifier(rcu_cpu_notify, 0); 1951 cpu_notifier(rcu_cpu_notify, 0);
1958 for_each_online_cpu(cpu) 1952 for_each_online_cpu(cpu)
1959 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); 1953 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
1960 check_cpu_stall_init(); 1954 check_cpu_stall_init();
1961 } 1955 }
1962 1956
1963 #include "rcutree_plugin.h" 1957 #include "rcutree_plugin.h"
1964 1958
kernel/rcutree_plugin.h
1 /* 1 /*
2 * Read-Copy Update mechanism for mutual exclusion (tree-based version) 2 * Read-Copy Update mechanism for mutual exclusion (tree-based version)
3 * Internal non-public definitions that provide either classic 3 * Internal non-public definitions that provide either classic
4 * or preemptable semantics. 4 * or preemptable semantics.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or 8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version. 9 * (at your option) any later version.
10 * 10 *
11 * This program is distributed in the hope that it will be useful, 11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details. 14 * GNU General Public License for more details.
15 * 15 *
16 * You should have received a copy of the GNU General Public License 16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software 17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 * 19 *
20 * Copyright Red Hat, 2009 20 * Copyright Red Hat, 2009
21 * Copyright IBM Corporation, 2009 21 * Copyright IBM Corporation, 2009
22 * 22 *
23 * Author: Ingo Molnar <mingo@elte.hu> 23 * Author: Ingo Molnar <mingo@elte.hu>
24 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> 24 * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
25 */ 25 */
26 26
27 #include <linux/delay.h> 27 #include <linux/delay.h>
28 28
29 /*
30 * Check the RCU kernel configuration parameters and print informative
31 * messages about anything out of the ordinary. If you like #ifdef, you
32 * will love this function.
33 */
34 static void __init rcu_bootup_announce_oddness(void)
35 {
36 #ifdef CONFIG_RCU_TRACE
37 printk(KERN_INFO "\tRCU debugfs-based tracing is enabled.\n");
38 #endif
39 #if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)
40 printk(KERN_INFO "\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
41 CONFIG_RCU_FANOUT);
42 #endif
43 #ifdef CONFIG_RCU_FANOUT_EXACT
44 printk(KERN_INFO "\tHierarchical RCU autobalancing is disabled.\n");
45 #endif
46 #ifdef CONFIG_RCU_FAST_NO_HZ
47 printk(KERN_INFO
48 "\tRCU dyntick-idle grace-period acceleration is enabled.\n");
49 #endif
50 #ifdef CONFIG_PROVE_RCU
51 printk(KERN_INFO "\tRCU lockdep checking is enabled.\n");
52 #endif
53 #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE
54 printk(KERN_INFO "\tRCU torture testing starts during boot.\n");
55 #endif
56 #ifndef CONFIG_RCU_CPU_STALL_DETECTOR
57 printk(KERN_INFO
58 "\tRCU-based detection of stalled CPUs is disabled.\n");
59 #endif
60 #ifndef CONFIG_RCU_CPU_STALL_VERBOSE
61 printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n");
62 #endif
63 #if NUM_RCU_LVL_4 != 0
64 printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n");
65 #endif
66 }
67
29 #ifdef CONFIG_TREE_PREEMPT_RCU 68 #ifdef CONFIG_TREE_PREEMPT_RCU
30 69
31 struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); 70 struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
32 DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); 71 DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
33 72
34 static int rcu_preempted_readers_exp(struct rcu_node *rnp); 73 static int rcu_preempted_readers_exp(struct rcu_node *rnp);
35 74
36 /* 75 /*
37 * Tell them what RCU they are running. 76 * Tell them what RCU they are running.
38 */ 77 */
39 static void __init rcu_bootup_announce(void) 78 static void __init rcu_bootup_announce(void)
40 { 79 {
41 printk(KERN_INFO 80 printk(KERN_INFO "Preemptable hierarchical RCU implementation.\n");
42 "Experimental preemptable hierarchical RCU implementation.\n"); 81 rcu_bootup_announce_oddness();
43 } 82 }
44 83
45 /* 84 /*
46 * Return the number of RCU-preempt batches processed thus far 85 * Return the number of RCU-preempt batches processed thus far
47 * for debug and statistics. 86 * for debug and statistics.
48 */ 87 */
49 long rcu_batches_completed_preempt(void) 88 long rcu_batches_completed_preempt(void)
50 { 89 {
51 return rcu_preempt_state.completed; 90 return rcu_preempt_state.completed;
52 } 91 }
53 EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt); 92 EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
54 93
55 /* 94 /*
56 * Return the number of RCU batches processed thus far for debug & stats. 95 * Return the number of RCU batches processed thus far for debug & stats.
57 */ 96 */
58 long rcu_batches_completed(void) 97 long rcu_batches_completed(void)
59 { 98 {
60 return rcu_batches_completed_preempt(); 99 return rcu_batches_completed_preempt();
61 } 100 }
62 EXPORT_SYMBOL_GPL(rcu_batches_completed); 101 EXPORT_SYMBOL_GPL(rcu_batches_completed);
63 102
64 /* 103 /*
65 * Force a quiescent state for preemptible RCU. 104 * Force a quiescent state for preemptible RCU.
66 */ 105 */
67 void rcu_force_quiescent_state(void) 106 void rcu_force_quiescent_state(void)
68 { 107 {
69 force_quiescent_state(&rcu_preempt_state, 0); 108 force_quiescent_state(&rcu_preempt_state, 0);
70 } 109 }
71 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 110 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
72 111
73 /* 112 /*
74 * Record a preemptable-RCU quiescent state for the specified CPU. Note 113 * Record a preemptable-RCU quiescent state for the specified CPU. Note
75 * that this just means that the task currently running on the CPU is 114 * that this just means that the task currently running on the CPU is
76 * not in a quiescent state. There might be any number of tasks blocked 115 * not in a quiescent state. There might be any number of tasks blocked
77 * while in an RCU read-side critical section. 116 * while in an RCU read-side critical section.
78 * 117 *
79 * Unlike the other rcu_*_qs() functions, callers to this function 118 * Unlike the other rcu_*_qs() functions, callers to this function
80 * must disable irqs in order to protect the assignment to 119 * must disable irqs in order to protect the assignment to
81 * ->rcu_read_unlock_special. 120 * ->rcu_read_unlock_special.
82 */ 121 */
83 static void rcu_preempt_qs(int cpu) 122 static void rcu_preempt_qs(int cpu)
84 { 123 {
85 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 124 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
86 125
87 rdp->passed_quiesc_completed = rdp->gpnum - 1; 126 rdp->passed_quiesc_completed = rdp->gpnum - 1;
88 barrier(); 127 barrier();
89 rdp->passed_quiesc = 1; 128 rdp->passed_quiesc = 1;
90 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 129 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
91 } 130 }
92 131
93 /* 132 /*
94 * We have entered the scheduler, and the current task might soon be 133 * We have entered the scheduler, and the current task might soon be
95 * context-switched away from. If this task is in an RCU read-side 134 * context-switched away from. If this task is in an RCU read-side
96 * critical section, we will no longer be able to rely on the CPU to 135 * critical section, we will no longer be able to rely on the CPU to
97 * record that fact, so we enqueue the task on the appropriate entry 136 * record that fact, so we enqueue the task on the appropriate entry
98 * of the blocked_tasks[] array. The task will dequeue itself when 137 * of the blocked_tasks[] array. The task will dequeue itself when
99 * it exits the outermost enclosing RCU read-side critical section. 138 * it exits the outermost enclosing RCU read-side critical section.
100 * Therefore, the current grace period cannot be permitted to complete 139 * Therefore, the current grace period cannot be permitted to complete
101 * until the blocked_tasks[] entry indexed by the low-order bit of 140 * until the blocked_tasks[] entry indexed by the low-order bit of
102 * rnp->gpnum empties. 141 * rnp->gpnum empties.
103 * 142 *
104 * Caller must disable preemption. 143 * Caller must disable preemption.
105 */ 144 */
106 static void rcu_preempt_note_context_switch(int cpu) 145 static void rcu_preempt_note_context_switch(int cpu)
107 { 146 {
108 struct task_struct *t = current; 147 struct task_struct *t = current;
109 unsigned long flags; 148 unsigned long flags;
110 int phase; 149 int phase;
111 struct rcu_data *rdp; 150 struct rcu_data *rdp;
112 struct rcu_node *rnp; 151 struct rcu_node *rnp;
113 152
114 if (t->rcu_read_lock_nesting && 153 if (t->rcu_read_lock_nesting &&
115 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { 154 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
116 155
117 /* Possibly blocking in an RCU read-side critical section. */ 156 /* Possibly blocking in an RCU read-side critical section. */
118 rdp = rcu_preempt_state.rda[cpu]; 157 rdp = rcu_preempt_state.rda[cpu];
119 rnp = rdp->mynode; 158 rnp = rdp->mynode;
120 raw_spin_lock_irqsave(&rnp->lock, flags); 159 raw_spin_lock_irqsave(&rnp->lock, flags);
121 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 160 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
122 t->rcu_blocked_node = rnp; 161 t->rcu_blocked_node = rnp;
123 162
124 /* 163 /*
125 * If this CPU has already checked in, then this task 164 * If this CPU has already checked in, then this task
126 * will hold up the next grace period rather than the 165 * will hold up the next grace period rather than the
127 * current grace period. Queue the task accordingly. 166 * current grace period. Queue the task accordingly.
128 * If the task is queued for the current grace period 167 * If the task is queued for the current grace period
129 * (i.e., this CPU has not yet passed through a quiescent 168 * (i.e., this CPU has not yet passed through a quiescent
130 * state for the current grace period), then as long 169 * state for the current grace period), then as long
131 * as that task remains queued, the current grace period 170 * as that task remains queued, the current grace period
132 * cannot end. 171 * cannot end.
133 * 172 *
134 * But first, note that the current CPU must still be 173 * But first, note that the current CPU must still be
135 * on line! 174 * on line!
136 */ 175 */
137 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); 176 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
138 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); 177 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
139 phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1; 178 phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1;
140 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); 179 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
141 raw_spin_unlock_irqrestore(&rnp->lock, flags); 180 raw_spin_unlock_irqrestore(&rnp->lock, flags);
142 } 181 }
143 182
144 /* 183 /*
145 * Either we were not in an RCU read-side critical section to 184 * Either we were not in an RCU read-side critical section to
146 * begin with, or we have now recorded that critical section 185 * begin with, or we have now recorded that critical section
147 * globally. Either way, we can now note a quiescent state 186 * globally. Either way, we can now note a quiescent state
148 * for this CPU. Again, if we were in an RCU read-side critical 187 * for this CPU. Again, if we were in an RCU read-side critical
149 * section, and if that critical section was blocking the current 188 * section, and if that critical section was blocking the current
150 * grace period, then the fact that the task has been enqueued 189 * grace period, then the fact that the task has been enqueued
151 * means that we continue to block the current grace period. 190 * means that we continue to block the current grace period.
152 */ 191 */
153 local_irq_save(flags); 192 local_irq_save(flags);
154 rcu_preempt_qs(cpu); 193 rcu_preempt_qs(cpu);
155 local_irq_restore(flags); 194 local_irq_restore(flags);
156 } 195 }
157 196
158 /* 197 /*
159 * Tree-preemptable RCU implementation for rcu_read_lock(). 198 * Tree-preemptable RCU implementation for rcu_read_lock().
160 * Just increment ->rcu_read_lock_nesting, shared state will be updated 199 * Just increment ->rcu_read_lock_nesting, shared state will be updated
161 * if we block. 200 * if we block.
162 */ 201 */
163 void __rcu_read_lock(void) 202 void __rcu_read_lock(void)
164 { 203 {
165 ACCESS_ONCE(current->rcu_read_lock_nesting)++; 204 ACCESS_ONCE(current->rcu_read_lock_nesting)++;
166 barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */ 205 barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */
167 } 206 }
168 EXPORT_SYMBOL_GPL(__rcu_read_lock); 207 EXPORT_SYMBOL_GPL(__rcu_read_lock);
169 208
170 /* 209 /*
171 * Check for preempted RCU readers blocking the current grace period 210 * Check for preempted RCU readers blocking the current grace period
172 * for the specified rcu_node structure. If the caller needs a reliable 211 * for the specified rcu_node structure. If the caller needs a reliable
173 * answer, it must hold the rcu_node's ->lock. 212 * answer, it must hold the rcu_node's ->lock.
174 */ 213 */
175 static int rcu_preempted_readers(struct rcu_node *rnp) 214 static int rcu_preempted_readers(struct rcu_node *rnp)
176 { 215 {
177 int phase = rnp->gpnum & 0x1; 216 int phase = rnp->gpnum & 0x1;
178 217
179 return !list_empty(&rnp->blocked_tasks[phase]) || 218 return !list_empty(&rnp->blocked_tasks[phase]) ||
180 !list_empty(&rnp->blocked_tasks[phase + 2]); 219 !list_empty(&rnp->blocked_tasks[phase + 2]);
181 } 220 }
182 221
183 /* 222 /*
184 * Record a quiescent state for all tasks that were previously queued 223 * Record a quiescent state for all tasks that were previously queued
185 * on the specified rcu_node structure and that were blocking the current 224 * on the specified rcu_node structure and that were blocking the current
186 * RCU grace period. The caller must hold the specified rnp->lock with 225 * RCU grace period. The caller must hold the specified rnp->lock with
187 * irqs disabled, and this lock is released upon return, but irqs remain 226 * irqs disabled, and this lock is released upon return, but irqs remain
188 * disabled. 227 * disabled.
189 */ 228 */
190 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) 229 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
191 __releases(rnp->lock) 230 __releases(rnp->lock)
192 { 231 {
193 unsigned long mask; 232 unsigned long mask;
194 struct rcu_node *rnp_p; 233 struct rcu_node *rnp_p;
195 234
196 if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { 235 if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
197 raw_spin_unlock_irqrestore(&rnp->lock, flags); 236 raw_spin_unlock_irqrestore(&rnp->lock, flags);
198 return; /* Still need more quiescent states! */ 237 return; /* Still need more quiescent states! */
199 } 238 }
200 239
201 rnp_p = rnp->parent; 240 rnp_p = rnp->parent;
202 if (rnp_p == NULL) { 241 if (rnp_p == NULL) {
203 /* 242 /*
204 * Either there is only one rcu_node in the tree, 243 * Either there is only one rcu_node in the tree,
205 * or tasks were kicked up to root rcu_node due to 244 * or tasks were kicked up to root rcu_node due to
206 * CPUs going offline. 245 * CPUs going offline.
207 */ 246 */
208 rcu_report_qs_rsp(&rcu_preempt_state, flags); 247 rcu_report_qs_rsp(&rcu_preempt_state, flags);
209 return; 248 return;
210 } 249 }
211 250
212 /* Report up the rest of the hierarchy. */ 251 /* Report up the rest of the hierarchy. */
213 mask = rnp->grpmask; 252 mask = rnp->grpmask;
214 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 253 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
215 raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */ 254 raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */
216 rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags); 255 rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
217 } 256 }
218 257
219 /* 258 /*
220 * Handle special cases during rcu_read_unlock(), such as needing to 259 * Handle special cases during rcu_read_unlock(), such as needing to
221 * notify RCU core processing or task having blocked during the RCU 260 * notify RCU core processing or task having blocked during the RCU
222 * read-side critical section. 261 * read-side critical section.
223 */ 262 */
224 static void rcu_read_unlock_special(struct task_struct *t) 263 static void rcu_read_unlock_special(struct task_struct *t)
225 { 264 {
226 int empty; 265 int empty;
227 int empty_exp; 266 int empty_exp;
228 unsigned long flags; 267 unsigned long flags;
229 struct rcu_node *rnp; 268 struct rcu_node *rnp;
230 int special; 269 int special;
231 270
232 /* NMI handlers cannot block and cannot safely manipulate state. */ 271 /* NMI handlers cannot block and cannot safely manipulate state. */
233 if (in_nmi()) 272 if (in_nmi())
234 return; 273 return;
235 274
236 local_irq_save(flags); 275 local_irq_save(flags);
237 276
238 /* 277 /*
239 * If RCU core is waiting for this CPU to exit critical section, 278 * If RCU core is waiting for this CPU to exit critical section,
240 * let it know that we have done so. 279 * let it know that we have done so.
241 */ 280 */
242 special = t->rcu_read_unlock_special; 281 special = t->rcu_read_unlock_special;
243 if (special & RCU_READ_UNLOCK_NEED_QS) { 282 if (special & RCU_READ_UNLOCK_NEED_QS) {
244 rcu_preempt_qs(smp_processor_id()); 283 rcu_preempt_qs(smp_processor_id());
245 } 284 }
246 285
247 /* Hardware IRQ handlers cannot block. */ 286 /* Hardware IRQ handlers cannot block. */
248 if (in_irq()) { 287 if (in_irq()) {
249 local_irq_restore(flags); 288 local_irq_restore(flags);
250 return; 289 return;
251 } 290 }
252 291
253 /* Clean up if blocked during RCU read-side critical section. */ 292 /* Clean up if blocked during RCU read-side critical section. */
254 if (special & RCU_READ_UNLOCK_BLOCKED) { 293 if (special & RCU_READ_UNLOCK_BLOCKED) {
255 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; 294 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
256 295
257 /* 296 /*
258 * Remove this task from the list it blocked on. The 297 * Remove this task from the list it blocked on. The
259 * task can migrate while we acquire the lock, but at 298 * task can migrate while we acquire the lock, but at
260 * most one time. So at most two passes through loop. 299 * most one time. So at most two passes through loop.
261 */ 300 */
262 for (;;) { 301 for (;;) {
263 rnp = t->rcu_blocked_node; 302 rnp = t->rcu_blocked_node;
264 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 303 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
265 if (rnp == t->rcu_blocked_node) 304 if (rnp == t->rcu_blocked_node)
266 break; 305 break;
267 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 306 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
268 } 307 }
269 empty = !rcu_preempted_readers(rnp); 308 empty = !rcu_preempted_readers(rnp);
270 empty_exp = !rcu_preempted_readers_exp(rnp); 309 empty_exp = !rcu_preempted_readers_exp(rnp);
271 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ 310 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
272 list_del_init(&t->rcu_node_entry); 311 list_del_init(&t->rcu_node_entry);
273 t->rcu_blocked_node = NULL; 312 t->rcu_blocked_node = NULL;
274 313
275 /* 314 /*
276 * If this was the last task on the current list, and if 315 * If this was the last task on the current list, and if
277 * we aren't waiting on any CPUs, report the quiescent state. 316 * we aren't waiting on any CPUs, report the quiescent state.
278 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. 317 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock.
279 */ 318 */
280 if (empty) 319 if (empty)
281 raw_spin_unlock_irqrestore(&rnp->lock, flags); 320 raw_spin_unlock_irqrestore(&rnp->lock, flags);
282 else 321 else
283 rcu_report_unblock_qs_rnp(rnp, flags); 322 rcu_report_unblock_qs_rnp(rnp, flags);
284 323
285 /* 324 /*
286 * If this was the last task on the expedited lists, 325 * If this was the last task on the expedited lists,
287 * then we need to report up the rcu_node hierarchy. 326 * then we need to report up the rcu_node hierarchy.
288 */ 327 */
289 if (!empty_exp && !rcu_preempted_readers_exp(rnp)) 328 if (!empty_exp && !rcu_preempted_readers_exp(rnp))
290 rcu_report_exp_rnp(&rcu_preempt_state, rnp); 329 rcu_report_exp_rnp(&rcu_preempt_state, rnp);
291 } else { 330 } else {
292 local_irq_restore(flags); 331 local_irq_restore(flags);
293 } 332 }
294 } 333 }
295 334
296 /* 335 /*
297 * Tree-preemptable RCU implementation for rcu_read_unlock(). 336 * Tree-preemptable RCU implementation for rcu_read_unlock().
298 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost 337 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
299 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then 338 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
300 * invoke rcu_read_unlock_special() to clean up after a context switch 339 * invoke rcu_read_unlock_special() to clean up after a context switch
301 * in an RCU read-side critical section and other special cases. 340 * in an RCU read-side critical section and other special cases.
302 */ 341 */
303 void __rcu_read_unlock(void) 342 void __rcu_read_unlock(void)
304 { 343 {
305 struct task_struct *t = current; 344 struct task_struct *t = current;
306 345
307 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ 346 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */
308 if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 && 347 if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 &&
309 unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) 348 unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
310 rcu_read_unlock_special(t); 349 rcu_read_unlock_special(t);
311 #ifdef CONFIG_PROVE_LOCKING 350 #ifdef CONFIG_PROVE_LOCKING
312 WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0); 351 WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0);
313 #endif /* #ifdef CONFIG_PROVE_LOCKING */ 352 #endif /* #ifdef CONFIG_PROVE_LOCKING */
314 } 353 }
315 EXPORT_SYMBOL_GPL(__rcu_read_unlock); 354 EXPORT_SYMBOL_GPL(__rcu_read_unlock);
316 355
317 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR 356 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
318 357
319 #ifdef CONFIG_RCU_CPU_STALL_VERBOSE 358 #ifdef CONFIG_RCU_CPU_STALL_VERBOSE
320 359
321 /* 360 /*
322 * Dump detailed information for all tasks blocking the current RCU 361 * Dump detailed information for all tasks blocking the current RCU
323 * grace period on the specified rcu_node structure. 362 * grace period on the specified rcu_node structure.
324 */ 363 */
325 static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) 364 static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
326 { 365 {
327 unsigned long flags; 366 unsigned long flags;
328 struct list_head *lp; 367 struct list_head *lp;
329 int phase; 368 int phase;
330 struct task_struct *t; 369 struct task_struct *t;
331 370
332 if (rcu_preempted_readers(rnp)) { 371 if (rcu_preempted_readers(rnp)) {
333 raw_spin_lock_irqsave(&rnp->lock, flags); 372 raw_spin_lock_irqsave(&rnp->lock, flags);
334 phase = rnp->gpnum & 0x1; 373 phase = rnp->gpnum & 0x1;
335 lp = &rnp->blocked_tasks[phase]; 374 lp = &rnp->blocked_tasks[phase];
336 list_for_each_entry(t, lp, rcu_node_entry) 375 list_for_each_entry(t, lp, rcu_node_entry)
337 sched_show_task(t); 376 sched_show_task(t);
338 raw_spin_unlock_irqrestore(&rnp->lock, flags); 377 raw_spin_unlock_irqrestore(&rnp->lock, flags);
339 } 378 }
340 } 379 }
341 380
342 /* 381 /*
343 * Dump detailed information for all tasks blocking the current RCU 382 * Dump detailed information for all tasks blocking the current RCU
344 * grace period. 383 * grace period.
345 */ 384 */
346 static void rcu_print_detail_task_stall(struct rcu_state *rsp) 385 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
347 { 386 {
348 struct rcu_node *rnp = rcu_get_root(rsp); 387 struct rcu_node *rnp = rcu_get_root(rsp);
349 388
350 rcu_print_detail_task_stall_rnp(rnp); 389 rcu_print_detail_task_stall_rnp(rnp);
351 rcu_for_each_leaf_node(rsp, rnp) 390 rcu_for_each_leaf_node(rsp, rnp)
352 rcu_print_detail_task_stall_rnp(rnp); 391 rcu_print_detail_task_stall_rnp(rnp);
353 } 392 }
354 393
355 #else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ 394 #else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
356 395
357 static void rcu_print_detail_task_stall(struct rcu_state *rsp) 396 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
358 { 397 {
359 } 398 }
360 399
361 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ 400 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
362 401
363 /* 402 /*
364 * Scan the current list of tasks blocked within RCU read-side critical 403 * Scan the current list of tasks blocked within RCU read-side critical
365 * sections, printing out the tid of each. 404 * sections, printing out the tid of each.
366 */ 405 */
367 static void rcu_print_task_stall(struct rcu_node *rnp) 406 static void rcu_print_task_stall(struct rcu_node *rnp)
368 { 407 {
369 struct list_head *lp; 408 struct list_head *lp;
370 int phase; 409 int phase;
371 struct task_struct *t; 410 struct task_struct *t;
372 411
373 if (rcu_preempted_readers(rnp)) { 412 if (rcu_preempted_readers(rnp)) {
374 phase = rnp->gpnum & 0x1; 413 phase = rnp->gpnum & 0x1;
375 lp = &rnp->blocked_tasks[phase]; 414 lp = &rnp->blocked_tasks[phase];
376 list_for_each_entry(t, lp, rcu_node_entry) 415 list_for_each_entry(t, lp, rcu_node_entry)
377 printk(" P%d", t->pid); 416 printk(" P%d", t->pid);
378 } 417 }
379 } 418 }
380 419
381 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 420 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
382 421
383 /* 422 /*
384 * Check that the list of blocked tasks for the newly completed grace 423 * Check that the list of blocked tasks for the newly completed grace
385 * period is in fact empty. It is a serious bug to complete a grace 424 * period is in fact empty. It is a serious bug to complete a grace
386 * period that still has RCU readers blocked! This function must be 425 * period that still has RCU readers blocked! This function must be
387 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock 426 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
388 * must be held by the caller. 427 * must be held by the caller.
389 */ 428 */
390 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 429 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
391 { 430 {
392 WARN_ON_ONCE(rcu_preempted_readers(rnp)); 431 WARN_ON_ONCE(rcu_preempted_readers(rnp));
393 WARN_ON_ONCE(rnp->qsmask); 432 WARN_ON_ONCE(rnp->qsmask);
394 } 433 }
395 434
396 #ifdef CONFIG_HOTPLUG_CPU 435 #ifdef CONFIG_HOTPLUG_CPU
397 436
398 /* 437 /*
399 * Handle tasklist migration for case in which all CPUs covered by the 438 * Handle tasklist migration for case in which all CPUs covered by the
400 * specified rcu_node have gone offline. Move them up to the root 439 * specified rcu_node have gone offline. Move them up to the root
401 * rcu_node. The reason for not just moving them to the immediate 440 * rcu_node. The reason for not just moving them to the immediate
402 * parent is to remove the need for rcu_read_unlock_special() to 441 * parent is to remove the need for rcu_read_unlock_special() to
403 * make more than two attempts to acquire the target rcu_node's lock. 442 * make more than two attempts to acquire the target rcu_node's lock.
404 * Returns true if there were tasks blocking the current RCU grace 443 * Returns true if there were tasks blocking the current RCU grace
405 * period. 444 * period.
406 * 445 *
407 * Returns 1 if there was previously a task blocking the current grace 446 * Returns 1 if there was previously a task blocking the current grace
408 * period on the specified rcu_node structure. 447 * period on the specified rcu_node structure.
409 * 448 *
410 * The caller must hold rnp->lock with irqs disabled. 449 * The caller must hold rnp->lock with irqs disabled.
411 */ 450 */
412 static int rcu_preempt_offline_tasks(struct rcu_state *rsp, 451 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
413 struct rcu_node *rnp, 452 struct rcu_node *rnp,
414 struct rcu_data *rdp) 453 struct rcu_data *rdp)
415 { 454 {
416 int i; 455 int i;
417 struct list_head *lp; 456 struct list_head *lp;
418 struct list_head *lp_root; 457 struct list_head *lp_root;
419 int retval = 0; 458 int retval = 0;
420 struct rcu_node *rnp_root = rcu_get_root(rsp); 459 struct rcu_node *rnp_root = rcu_get_root(rsp);
421 struct task_struct *tp; 460 struct task_struct *tp;
422 461
423 if (rnp == rnp_root) { 462 if (rnp == rnp_root) {
424 WARN_ONCE(1, "Last CPU thought to be offlined?"); 463 WARN_ONCE(1, "Last CPU thought to be offlined?");
425 return 0; /* Shouldn't happen: at least one CPU online. */ 464 return 0; /* Shouldn't happen: at least one CPU online. */
426 } 465 }
427 WARN_ON_ONCE(rnp != rdp->mynode && 466 WARN_ON_ONCE(rnp != rdp->mynode &&
428 (!list_empty(&rnp->blocked_tasks[0]) || 467 (!list_empty(&rnp->blocked_tasks[0]) ||
429 !list_empty(&rnp->blocked_tasks[1]) || 468 !list_empty(&rnp->blocked_tasks[1]) ||
430 !list_empty(&rnp->blocked_tasks[2]) || 469 !list_empty(&rnp->blocked_tasks[2]) ||
431 !list_empty(&rnp->blocked_tasks[3]))); 470 !list_empty(&rnp->blocked_tasks[3])));
432 471
433 /* 472 /*
434 * Move tasks up to root rcu_node. Rely on the fact that the 473 * Move tasks up to root rcu_node. Rely on the fact that the
435 * root rcu_node can be at most one ahead of the rest of the 474 * root rcu_node can be at most one ahead of the rest of the
436 * rcu_nodes in terms of gp_num value. This fact allows us to 475 * rcu_nodes in terms of gp_num value. This fact allows us to
437 * move the blocked_tasks[] array directly, element by element. 476 * move the blocked_tasks[] array directly, element by element.
438 */ 477 */
439 if (rcu_preempted_readers(rnp)) 478 if (rcu_preempted_readers(rnp))
440 retval |= RCU_OFL_TASKS_NORM_GP; 479 retval |= RCU_OFL_TASKS_NORM_GP;
441 if (rcu_preempted_readers_exp(rnp)) 480 if (rcu_preempted_readers_exp(rnp))
442 retval |= RCU_OFL_TASKS_EXP_GP; 481 retval |= RCU_OFL_TASKS_EXP_GP;
443 for (i = 0; i < 4; i++) { 482 for (i = 0; i < 4; i++) {
444 lp = &rnp->blocked_tasks[i]; 483 lp = &rnp->blocked_tasks[i];
445 lp_root = &rnp_root->blocked_tasks[i]; 484 lp_root = &rnp_root->blocked_tasks[i];
446 while (!list_empty(lp)) { 485 while (!list_empty(lp)) {
447 tp = list_entry(lp->next, typeof(*tp), rcu_node_entry); 486 tp = list_entry(lp->next, typeof(*tp), rcu_node_entry);
448 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ 487 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
449 list_del(&tp->rcu_node_entry); 488 list_del(&tp->rcu_node_entry);
450 tp->rcu_blocked_node = rnp_root; 489 tp->rcu_blocked_node = rnp_root;
451 list_add(&tp->rcu_node_entry, lp_root); 490 list_add(&tp->rcu_node_entry, lp_root);
452 raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */ 491 raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */
453 } 492 }
454 } 493 }
455 return retval; 494 return retval;
456 } 495 }
457 496
458 /* 497 /*
459 * Do CPU-offline processing for preemptable RCU. 498 * Do CPU-offline processing for preemptable RCU.
460 */ 499 */
461 static void rcu_preempt_offline_cpu(int cpu) 500 static void rcu_preempt_offline_cpu(int cpu)
462 { 501 {
463 __rcu_offline_cpu(cpu, &rcu_preempt_state); 502 __rcu_offline_cpu(cpu, &rcu_preempt_state);
464 } 503 }
465 504
466 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 505 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
467 506
468 /* 507 /*
469 * Check for a quiescent state from the current CPU. When a task blocks, 508 * Check for a quiescent state from the current CPU. When a task blocks,
470 * the task is recorded in the corresponding CPU's rcu_node structure, 509 * the task is recorded in the corresponding CPU's rcu_node structure,
471 * which is checked elsewhere. 510 * which is checked elsewhere.
472 * 511 *
473 * Caller must disable hard irqs. 512 * Caller must disable hard irqs.
474 */ 513 */
475 static void rcu_preempt_check_callbacks(int cpu) 514 static void rcu_preempt_check_callbacks(int cpu)
476 { 515 {
477 struct task_struct *t = current; 516 struct task_struct *t = current;
478 517
479 if (t->rcu_read_lock_nesting == 0) { 518 if (t->rcu_read_lock_nesting == 0) {
480 rcu_preempt_qs(cpu); 519 rcu_preempt_qs(cpu);
481 return; 520 return;
482 } 521 }
483 if (per_cpu(rcu_preempt_data, cpu).qs_pending) 522 if (per_cpu(rcu_preempt_data, cpu).qs_pending)
484 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; 523 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
485 } 524 }
486 525
487 /* 526 /*
488 * Process callbacks for preemptable RCU. 527 * Process callbacks for preemptable RCU.
489 */ 528 */
490 static void rcu_preempt_process_callbacks(void) 529 static void rcu_preempt_process_callbacks(void)
491 { 530 {
492 __rcu_process_callbacks(&rcu_preempt_state, 531 __rcu_process_callbacks(&rcu_preempt_state,
493 &__get_cpu_var(rcu_preempt_data)); 532 &__get_cpu_var(rcu_preempt_data));
494 } 533 }
495 534
496 /* 535 /*
497 * Queue a preemptable-RCU callback for invocation after a grace period. 536 * Queue a preemptable-RCU callback for invocation after a grace period.
498 */ 537 */
499 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 538 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
500 { 539 {
501 __call_rcu(head, func, &rcu_preempt_state); 540 __call_rcu(head, func, &rcu_preempt_state);
502 } 541 }
503 EXPORT_SYMBOL_GPL(call_rcu); 542 EXPORT_SYMBOL_GPL(call_rcu);
504 543
505 /** 544 /**
506 * synchronize_rcu - wait until a grace period has elapsed. 545 * synchronize_rcu - wait until a grace period has elapsed.
507 * 546 *
508 * Control will return to the caller some time after a full grace 547 * Control will return to the caller some time after a full grace
509 * period has elapsed, in other words after all currently executing RCU 548 * period has elapsed, in other words after all currently executing RCU
510 * read-side critical sections have completed. RCU read-side critical 549 * read-side critical sections have completed. RCU read-side critical
511 * sections are delimited by rcu_read_lock() and rcu_read_unlock(), 550 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
512 * and may be nested. 551 * and may be nested.
513 */ 552 */
514 void synchronize_rcu(void) 553 void synchronize_rcu(void)
515 { 554 {
516 struct rcu_synchronize rcu; 555 struct rcu_synchronize rcu;
517 556
518 if (!rcu_scheduler_active) 557 if (!rcu_scheduler_active)
519 return; 558 return;
520 559
521 init_completion(&rcu.completion); 560 init_completion(&rcu.completion);
522 /* Will wake me after RCU finished. */ 561 /* Will wake me after RCU finished. */
523 call_rcu(&rcu.head, wakeme_after_rcu); 562 call_rcu(&rcu.head, wakeme_after_rcu);
524 /* Wait for it. */ 563 /* Wait for it. */
525 wait_for_completion(&rcu.completion); 564 wait_for_completion(&rcu.completion);
526 } 565 }
527 EXPORT_SYMBOL_GPL(synchronize_rcu); 566 EXPORT_SYMBOL_GPL(synchronize_rcu);
528 567
529 static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); 568 static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
530 static long sync_rcu_preempt_exp_count; 569 static long sync_rcu_preempt_exp_count;
531 static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); 570 static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
532 571
533 /* 572 /*
534 * Return non-zero if there are any tasks in RCU read-side critical 573 * Return non-zero if there are any tasks in RCU read-side critical
535 * sections blocking the current preemptible-RCU expedited grace period. 574 * sections blocking the current preemptible-RCU expedited grace period.
536 * If there is no preemptible-RCU expedited grace period currently in 575 * If there is no preemptible-RCU expedited grace period currently in
537 * progress, returns zero unconditionally. 576 * progress, returns zero unconditionally.
538 */ 577 */
539 static int rcu_preempted_readers_exp(struct rcu_node *rnp) 578 static int rcu_preempted_readers_exp(struct rcu_node *rnp)
540 { 579 {
541 return !list_empty(&rnp->blocked_tasks[2]) || 580 return !list_empty(&rnp->blocked_tasks[2]) ||
542 !list_empty(&rnp->blocked_tasks[3]); 581 !list_empty(&rnp->blocked_tasks[3]);
543 } 582 }
544 583
545 /* 584 /*
546 * return non-zero if there is no RCU expedited grace period in progress 585 * return non-zero if there is no RCU expedited grace period in progress
547 * for the specified rcu_node structure, in other words, if all CPUs and 586 * for the specified rcu_node structure, in other words, if all CPUs and
548 * tasks covered by the specified rcu_node structure have done their bit 587 * tasks covered by the specified rcu_node structure have done their bit
549 * for the current expedited grace period. Works only for preemptible 588 * for the current expedited grace period. Works only for preemptible
550 * RCU -- other RCU implementation use other means. 589 * RCU -- other RCU implementation use other means.
551 * 590 *
552 * Caller must hold sync_rcu_preempt_exp_mutex. 591 * Caller must hold sync_rcu_preempt_exp_mutex.
553 */ 592 */
554 static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) 593 static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
555 { 594 {
556 return !rcu_preempted_readers_exp(rnp) && 595 return !rcu_preempted_readers_exp(rnp) &&
557 ACCESS_ONCE(rnp->expmask) == 0; 596 ACCESS_ONCE(rnp->expmask) == 0;
558 } 597 }
559 598
560 /* 599 /*
561 * Report the exit from RCU read-side critical section for the last task 600 * Report the exit from RCU read-side critical section for the last task
562 * that queued itself during or before the current expedited preemptible-RCU 601 * that queued itself during or before the current expedited preemptible-RCU
563 * grace period. This event is reported either to the rcu_node structure on 602 * grace period. This event is reported either to the rcu_node structure on
564 * which the task was queued or to one of that rcu_node structure's ancestors, 603 * which the task was queued or to one of that rcu_node structure's ancestors,
565 * recursively up the tree. (Calm down, calm down, we do the recursion 604 * recursively up the tree. (Calm down, calm down, we do the recursion
566 * iteratively!) 605 * iteratively!)
567 * 606 *
568 * Caller must hold sync_rcu_preempt_exp_mutex. 607 * Caller must hold sync_rcu_preempt_exp_mutex.
569 */ 608 */
570 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) 609 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
571 { 610 {
572 unsigned long flags; 611 unsigned long flags;
573 unsigned long mask; 612 unsigned long mask;
574 613
575 raw_spin_lock_irqsave(&rnp->lock, flags); 614 raw_spin_lock_irqsave(&rnp->lock, flags);
576 for (;;) { 615 for (;;) {
577 if (!sync_rcu_preempt_exp_done(rnp)) 616 if (!sync_rcu_preempt_exp_done(rnp))
578 break; 617 break;
579 if (rnp->parent == NULL) { 618 if (rnp->parent == NULL) {
580 wake_up(&sync_rcu_preempt_exp_wq); 619 wake_up(&sync_rcu_preempt_exp_wq);
581 break; 620 break;
582 } 621 }
583 mask = rnp->grpmask; 622 mask = rnp->grpmask;
584 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 623 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
585 rnp = rnp->parent; 624 rnp = rnp->parent;
586 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 625 raw_spin_lock(&rnp->lock); /* irqs already disabled */
587 rnp->expmask &= ~mask; 626 rnp->expmask &= ~mask;
588 } 627 }
589 raw_spin_unlock_irqrestore(&rnp->lock, flags); 628 raw_spin_unlock_irqrestore(&rnp->lock, flags);
590 } 629 }
591 630
592 /* 631 /*
593 * Snapshot the tasks blocking the newly started preemptible-RCU expedited 632 * Snapshot the tasks blocking the newly started preemptible-RCU expedited
594 * grace period for the specified rcu_node structure. If there are no such 633 * grace period for the specified rcu_node structure. If there are no such
595 * tasks, report it up the rcu_node hierarchy. 634 * tasks, report it up the rcu_node hierarchy.
596 * 635 *
597 * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock. 636 * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock.
598 */ 637 */
599 static void 638 static void
600 sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) 639 sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
601 { 640 {
602 int must_wait; 641 int must_wait;
603 642
604 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 643 raw_spin_lock(&rnp->lock); /* irqs already disabled */
605 list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]); 644 list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]);
606 list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]); 645 list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]);
607 must_wait = rcu_preempted_readers_exp(rnp); 646 must_wait = rcu_preempted_readers_exp(rnp);
608 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 647 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
609 if (!must_wait) 648 if (!must_wait)
610 rcu_report_exp_rnp(rsp, rnp); 649 rcu_report_exp_rnp(rsp, rnp);
611 } 650 }
612 651
613 /* 652 /*
614 * Wait for an rcu-preempt grace period, but expedite it. The basic idea 653 * Wait for an rcu-preempt grace period, but expedite it. The basic idea
615 * is to invoke synchronize_sched_expedited() to push all the tasks to 654 * is to invoke synchronize_sched_expedited() to push all the tasks to
616 * the ->blocked_tasks[] lists, move all entries from the first set of 655 * the ->blocked_tasks[] lists, move all entries from the first set of
617 * ->blocked_tasks[] lists to the second set, and finally wait for this 656 * ->blocked_tasks[] lists to the second set, and finally wait for this
618 * second set to drain. 657 * second set to drain.
619 */ 658 */
620 void synchronize_rcu_expedited(void) 659 void synchronize_rcu_expedited(void)
621 { 660 {
622 unsigned long flags; 661 unsigned long flags;
623 struct rcu_node *rnp; 662 struct rcu_node *rnp;
624 struct rcu_state *rsp = &rcu_preempt_state; 663 struct rcu_state *rsp = &rcu_preempt_state;
625 long snap; 664 long snap;
626 int trycount = 0; 665 int trycount = 0;
627 666
628 smp_mb(); /* Caller's modifications seen first by other CPUs. */ 667 smp_mb(); /* Caller's modifications seen first by other CPUs. */
629 snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1; 668 snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
630 smp_mb(); /* Above access cannot bleed into critical section. */ 669 smp_mb(); /* Above access cannot bleed into critical section. */
631 670
632 /* 671 /*
633 * Acquire lock, falling back to synchronize_rcu() if too many 672 * Acquire lock, falling back to synchronize_rcu() if too many
634 * lock-acquisition failures. Of course, if someone does the 673 * lock-acquisition failures. Of course, if someone does the
635 * expedited grace period for us, just leave. 674 * expedited grace period for us, just leave.
636 */ 675 */
637 while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { 676 while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
638 if (trycount++ < 10) 677 if (trycount++ < 10)
639 udelay(trycount * num_online_cpus()); 678 udelay(trycount * num_online_cpus());
640 else { 679 else {
641 synchronize_rcu(); 680 synchronize_rcu();
642 return; 681 return;
643 } 682 }
644 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) 683 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
645 goto mb_ret; /* Others did our work for us. */ 684 goto mb_ret; /* Others did our work for us. */
646 } 685 }
647 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) 686 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
648 goto unlock_mb_ret; /* Others did our work for us. */ 687 goto unlock_mb_ret; /* Others did our work for us. */
649 688
650 /* force all RCU readers onto blocked_tasks[]. */ 689 /* force all RCU readers onto blocked_tasks[]. */
651 synchronize_sched_expedited(); 690 synchronize_sched_expedited();
652 691
653 raw_spin_lock_irqsave(&rsp->onofflock, flags); 692 raw_spin_lock_irqsave(&rsp->onofflock, flags);
654 693
655 /* Initialize ->expmask for all non-leaf rcu_node structures. */ 694 /* Initialize ->expmask for all non-leaf rcu_node structures. */
656 rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { 695 rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
657 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 696 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
658 rnp->expmask = rnp->qsmaskinit; 697 rnp->expmask = rnp->qsmaskinit;
659 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 698 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
660 } 699 }
661 700
662 /* Snapshot current state of ->blocked_tasks[] lists. */ 701 /* Snapshot current state of ->blocked_tasks[] lists. */
663 rcu_for_each_leaf_node(rsp, rnp) 702 rcu_for_each_leaf_node(rsp, rnp)
664 sync_rcu_preempt_exp_init(rsp, rnp); 703 sync_rcu_preempt_exp_init(rsp, rnp);
665 if (NUM_RCU_NODES > 1) 704 if (NUM_RCU_NODES > 1)
666 sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); 705 sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
667 706
668 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 707 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
669 708
670 /* Wait for snapshotted ->blocked_tasks[] lists to drain. */ 709 /* Wait for snapshotted ->blocked_tasks[] lists to drain. */
671 rnp = rcu_get_root(rsp); 710 rnp = rcu_get_root(rsp);
672 wait_event(sync_rcu_preempt_exp_wq, 711 wait_event(sync_rcu_preempt_exp_wq,
673 sync_rcu_preempt_exp_done(rnp)); 712 sync_rcu_preempt_exp_done(rnp));
674 713
675 /* Clean up and exit. */ 714 /* Clean up and exit. */
676 smp_mb(); /* ensure expedited GP seen before counter increment. */ 715 smp_mb(); /* ensure expedited GP seen before counter increment. */
677 ACCESS_ONCE(sync_rcu_preempt_exp_count)++; 716 ACCESS_ONCE(sync_rcu_preempt_exp_count)++;
678 unlock_mb_ret: 717 unlock_mb_ret:
679 mutex_unlock(&sync_rcu_preempt_exp_mutex); 718 mutex_unlock(&sync_rcu_preempt_exp_mutex);
680 mb_ret: 719 mb_ret:
681 smp_mb(); /* ensure subsequent action seen after grace period. */ 720 smp_mb(); /* ensure subsequent action seen after grace period. */
682 } 721 }
683 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 722 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
684 723
685 /* 724 /*
686 * Check to see if there is any immediate preemptable-RCU-related work 725 * Check to see if there is any immediate preemptable-RCU-related work
687 * to be done. 726 * to be done.
688 */ 727 */
689 static int rcu_preempt_pending(int cpu) 728 static int rcu_preempt_pending(int cpu)
690 { 729 {
691 return __rcu_pending(&rcu_preempt_state, 730 return __rcu_pending(&rcu_preempt_state,
692 &per_cpu(rcu_preempt_data, cpu)); 731 &per_cpu(rcu_preempt_data, cpu));
693 } 732 }
694 733
695 /* 734 /*
696 * Does preemptable RCU need the CPU to stay out of dynticks mode? 735 * Does preemptable RCU need the CPU to stay out of dynticks mode?
697 */ 736 */
698 static int rcu_preempt_needs_cpu(int cpu) 737 static int rcu_preempt_needs_cpu(int cpu)
699 { 738 {
700 return !!per_cpu(rcu_preempt_data, cpu).nxtlist; 739 return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
701 } 740 }
702 741
703 /** 742 /**
704 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. 743 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
705 */ 744 */
706 void rcu_barrier(void) 745 void rcu_barrier(void)
707 { 746 {
708 _rcu_barrier(&rcu_preempt_state, call_rcu); 747 _rcu_barrier(&rcu_preempt_state, call_rcu);
709 } 748 }
710 EXPORT_SYMBOL_GPL(rcu_barrier); 749 EXPORT_SYMBOL_GPL(rcu_barrier);
711 750
712 /* 751 /*
713 * Initialize preemptable RCU's per-CPU data. 752 * Initialize preemptable RCU's per-CPU data.
714 */ 753 */
715 static void __cpuinit rcu_preempt_init_percpu_data(int cpu) 754 static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
716 { 755 {
717 rcu_init_percpu_data(cpu, &rcu_preempt_state, 1); 756 rcu_init_percpu_data(cpu, &rcu_preempt_state, 1);
718 } 757 }
719 758
720 /* 759 /*
721 * Move preemptable RCU's callbacks to ->orphan_cbs_list. 760 * Move preemptable RCU's callbacks to ->orphan_cbs_list.
722 */ 761 */
723 static void rcu_preempt_send_cbs_to_orphanage(void) 762 static void rcu_preempt_send_cbs_to_orphanage(void)
724 { 763 {
725 rcu_send_cbs_to_orphanage(&rcu_preempt_state); 764 rcu_send_cbs_to_orphanage(&rcu_preempt_state);
726 } 765 }
727 766
728 /* 767 /*
729 * Initialize preemptable RCU's state structures. 768 * Initialize preemptable RCU's state structures.
730 */ 769 */
731 static void __init __rcu_init_preempt(void) 770 static void __init __rcu_init_preempt(void)
732 { 771 {
733 RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data); 772 RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data);
734 } 773 }
735 774
736 /* 775 /*
737 * Check for a task exiting while in a preemptable-RCU read-side 776 * Check for a task exiting while in a preemptable-RCU read-side
738 * critical section, clean up if so. No need to issue warnings, 777 * critical section, clean up if so. No need to issue warnings,
739 * as debug_check_no_locks_held() already does this if lockdep 778 * as debug_check_no_locks_held() already does this if lockdep
740 * is enabled. 779 * is enabled.
741 */ 780 */
742 void exit_rcu(void) 781 void exit_rcu(void)
743 { 782 {
744 struct task_struct *t = current; 783 struct task_struct *t = current;
745 784
746 if (t->rcu_read_lock_nesting == 0) 785 if (t->rcu_read_lock_nesting == 0)
747 return; 786 return;
748 t->rcu_read_lock_nesting = 1; 787 t->rcu_read_lock_nesting = 1;
749 rcu_read_unlock(); 788 rcu_read_unlock();
750 } 789 }
751 790
752 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 791 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
753 792
754 /* 793 /*
755 * Tell them what RCU they are running. 794 * Tell them what RCU they are running.
756 */ 795 */
757 static void __init rcu_bootup_announce(void) 796 static void __init rcu_bootup_announce(void)
758 { 797 {
759 printk(KERN_INFO "Hierarchical RCU implementation.\n"); 798 printk(KERN_INFO "Hierarchical RCU implementation.\n");
799 rcu_bootup_announce_oddness();
760 } 800 }
761 801
762 /* 802 /*
763 * Return the number of RCU batches processed thus far for debug & stats. 803 * Return the number of RCU batches processed thus far for debug & stats.
764 */ 804 */
765 long rcu_batches_completed(void) 805 long rcu_batches_completed(void)
766 { 806 {
767 return rcu_batches_completed_sched(); 807 return rcu_batches_completed_sched();
768 } 808 }
769 EXPORT_SYMBOL_GPL(rcu_batches_completed); 809 EXPORT_SYMBOL_GPL(rcu_batches_completed);
770 810
771 /* 811 /*
772 * Force a quiescent state for RCU, which, because there is no preemptible 812 * Force a quiescent state for RCU, which, because there is no preemptible
773 * RCU, becomes the same as rcu-sched. 813 * RCU, becomes the same as rcu-sched.
774 */ 814 */
775 void rcu_force_quiescent_state(void) 815 void rcu_force_quiescent_state(void)
776 { 816 {
777 rcu_sched_force_quiescent_state(); 817 rcu_sched_force_quiescent_state();
778 } 818 }
779 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 819 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
780 820
781 /* 821 /*
782 * Because preemptable RCU does not exist, we never have to check for 822 * Because preemptable RCU does not exist, we never have to check for
783 * CPUs being in quiescent states. 823 * CPUs being in quiescent states.
784 */ 824 */
785 static void rcu_preempt_note_context_switch(int cpu) 825 static void rcu_preempt_note_context_switch(int cpu)
786 { 826 {
787 } 827 }
788 828
789 /* 829 /*
790 * Because preemptable RCU does not exist, there are never any preempted 830 * Because preemptable RCU does not exist, there are never any preempted
791 * RCU readers. 831 * RCU readers.
792 */ 832 */
793 static int rcu_preempted_readers(struct rcu_node *rnp) 833 static int rcu_preempted_readers(struct rcu_node *rnp)
794 { 834 {
795 return 0; 835 return 0;
796 } 836 }
797 837
798 #ifdef CONFIG_HOTPLUG_CPU 838 #ifdef CONFIG_HOTPLUG_CPU
799 839
800 /* Because preemptible RCU does not exist, no quieting of tasks. */ 840 /* Because preemptible RCU does not exist, no quieting of tasks. */
801 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) 841 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
802 { 842 {
803 raw_spin_unlock_irqrestore(&rnp->lock, flags); 843 raw_spin_unlock_irqrestore(&rnp->lock, flags);
804 } 844 }
805 845
806 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 846 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
807 847
808 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR 848 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
809 849
810 /* 850 /*
811 * Because preemptable RCU does not exist, we never have to check for 851 * Because preemptable RCU does not exist, we never have to check for
812 * tasks blocked within RCU read-side critical sections. 852 * tasks blocked within RCU read-side critical sections.
813 */ 853 */
814 static void rcu_print_detail_task_stall(struct rcu_state *rsp) 854 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
815 { 855 {
816 } 856 }
817 857
818 /* 858 /*
819 * Because preemptable RCU does not exist, we never have to check for 859 * Because preemptable RCU does not exist, we never have to check for
820 * tasks blocked within RCU read-side critical sections. 860 * tasks blocked within RCU read-side critical sections.
821 */ 861 */
822 static void rcu_print_task_stall(struct rcu_node *rnp) 862 static void rcu_print_task_stall(struct rcu_node *rnp)
823 { 863 {
824 } 864 }
825 865
826 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 866 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
827 867
828 /* 868 /*
829 * Because there is no preemptable RCU, there can be no readers blocked, 869 * Because there is no preemptable RCU, there can be no readers blocked,
830 * so there is no need to check for blocked tasks. So check only for 870 * so there is no need to check for blocked tasks. So check only for
831 * bogus qsmask values. 871 * bogus qsmask values.
832 */ 872 */
833 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 873 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
834 { 874 {
835 WARN_ON_ONCE(rnp->qsmask); 875 WARN_ON_ONCE(rnp->qsmask);
836 } 876 }
837 877
838 #ifdef CONFIG_HOTPLUG_CPU 878 #ifdef CONFIG_HOTPLUG_CPU
839 879
840 /* 880 /*
841 * Because preemptable RCU does not exist, it never needs to migrate 881 * Because preemptable RCU does not exist, it never needs to migrate
842 * tasks that were blocked within RCU read-side critical sections, and 882 * tasks that were blocked within RCU read-side critical sections, and
843 * such non-existent tasks cannot possibly have been blocking the current 883 * such non-existent tasks cannot possibly have been blocking the current
844 * grace period. 884 * grace period.
845 */ 885 */
846 static int rcu_preempt_offline_tasks(struct rcu_state *rsp, 886 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
847 struct rcu_node *rnp, 887 struct rcu_node *rnp,
848 struct rcu_data *rdp) 888 struct rcu_data *rdp)
849 { 889 {
850 return 0; 890 return 0;
851 } 891 }
852 892
853 /* 893 /*
854 * Because preemptable RCU does not exist, it never needs CPU-offline 894 * Because preemptable RCU does not exist, it never needs CPU-offline
855 * processing. 895 * processing.
856 */ 896 */
857 static void rcu_preempt_offline_cpu(int cpu) 897 static void rcu_preempt_offline_cpu(int cpu)
858 { 898 {
859 } 899 }
860 900
861 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 901 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
862 902
863 /* 903 /*
864 * Because preemptable RCU does not exist, it never has any callbacks 904 * Because preemptable RCU does not exist, it never has any callbacks
865 * to check. 905 * to check.
866 */ 906 */
867 static void rcu_preempt_check_callbacks(int cpu) 907 static void rcu_preempt_check_callbacks(int cpu)
868 { 908 {
869 } 909 }
870 910
871 /* 911 /*
872 * Because preemptable RCU does not exist, it never has any callbacks 912 * Because preemptable RCU does not exist, it never has any callbacks
873 * to process. 913 * to process.
874 */ 914 */
875 static void rcu_preempt_process_callbacks(void) 915 static void rcu_preempt_process_callbacks(void)
876 { 916 {
877 } 917 }
878 918
879 /* 919 /*
880 * In classic RCU, call_rcu() is just call_rcu_sched(). 920 * In classic RCU, call_rcu() is just call_rcu_sched().
881 */ 921 */
882 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 922 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
883 { 923 {
884 call_rcu_sched(head, func); 924 call_rcu_sched(head, func);
885 } 925 }
886 EXPORT_SYMBOL_GPL(call_rcu); 926 EXPORT_SYMBOL_GPL(call_rcu);
887 927
888 /* 928 /*
889 * Wait for an rcu-preempt grace period, but make it happen quickly. 929 * Wait for an rcu-preempt grace period, but make it happen quickly.
890 * But because preemptable RCU does not exist, map to rcu-sched. 930 * But because preemptable RCU does not exist, map to rcu-sched.
891 */ 931 */
892 void synchronize_rcu_expedited(void) 932 void synchronize_rcu_expedited(void)
893 { 933 {
894 synchronize_sched_expedited(); 934 synchronize_sched_expedited();
895 } 935 }
896 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 936 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
897 937
898 #ifdef CONFIG_HOTPLUG_CPU 938 #ifdef CONFIG_HOTPLUG_CPU
899 939
900 /* 940 /*
901 * Because preemptable RCU does not exist, there is never any need to 941 * Because preemptable RCU does not exist, there is never any need to
902 * report on tasks preempted in RCU read-side critical sections during 942 * report on tasks preempted in RCU read-side critical sections during
903 * expedited RCU grace periods. 943 * expedited RCU grace periods.
904 */ 944 */
905 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) 945 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
906 { 946 {
907 return; 947 return;
908 } 948 }
909 949
910 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 950 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
911 951
912 /* 952 /*
913 * Because preemptable RCU does not exist, it never has any work to do. 953 * Because preemptable RCU does not exist, it never has any work to do.
914 */ 954 */
915 static int rcu_preempt_pending(int cpu) 955 static int rcu_preempt_pending(int cpu)
916 { 956 {
917 return 0; 957 return 0;
918 } 958 }
919 959
920 /* 960 /*
921 * Because preemptable RCU does not exist, it never needs any CPU. 961 * Because preemptable RCU does not exist, it never needs any CPU.
922 */ 962 */
923 static int rcu_preempt_needs_cpu(int cpu) 963 static int rcu_preempt_needs_cpu(int cpu)
924 { 964 {
925 return 0; 965 return 0;
926 } 966 }
927 967
928 /* 968 /*
929 * Because preemptable RCU does not exist, rcu_barrier() is just 969 * Because preemptable RCU does not exist, rcu_barrier() is just
930 * another name for rcu_barrier_sched(). 970 * another name for rcu_barrier_sched().
931 */ 971 */
932 void rcu_barrier(void) 972 void rcu_barrier(void)
933 { 973 {
934 rcu_barrier_sched(); 974 rcu_barrier_sched();
935 } 975 }
936 EXPORT_SYMBOL_GPL(rcu_barrier); 976 EXPORT_SYMBOL_GPL(rcu_barrier);
937 977
938 /* 978 /*
939 * Because preemptable RCU does not exist, there is no per-CPU 979 * Because preemptable RCU does not exist, there is no per-CPU
940 * data to initialize. 980 * data to initialize.
941 */ 981 */
942 static void __cpuinit rcu_preempt_init_percpu_data(int cpu) 982 static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
943 { 983 {
944 } 984 }
945 985
946 /* 986 /*
947 * Because there is no preemptable RCU, there are no callbacks to move. 987 * Because there is no preemptable RCU, there are no callbacks to move.
948 */ 988 */
949 static void rcu_preempt_send_cbs_to_orphanage(void) 989 static void rcu_preempt_send_cbs_to_orphanage(void)
950 { 990 {
951 } 991 }
952 992
953 /* 993 /*
954 * Because preemptable RCU does not exist, it need not be initialized. 994 * Because preemptable RCU does not exist, it need not be initialized.
955 */ 995 */
956 static void __init __rcu_init_preempt(void) 996 static void __init __rcu_init_preempt(void)
957 { 997 {
958 } 998 }
959 999
960 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ 1000 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
961 1001
962 #if !defined(CONFIG_RCU_FAST_NO_HZ) 1002 #if !defined(CONFIG_RCU_FAST_NO_HZ)
963 1003
964 /* 1004 /*
965 * Check to see if any future RCU-related work will need to be done 1005 * Check to see if any future RCU-related work will need to be done
966 * by the current CPU, even if none need be done immediately, returning 1006 * by the current CPU, even if none need be done immediately, returning
967 * 1 if so. This function is part of the RCU implementation; it is -not- 1007 * 1 if so. This function is part of the RCU implementation; it is -not-
968 * an exported member of the RCU API. 1008 * an exported member of the RCU API.
969 * 1009 *
970 * Because we have preemptible RCU, just check whether this CPU needs 1010 * Because we have preemptible RCU, just check whether this CPU needs
971 * any flavor of RCU. Do not chew up lots of CPU cycles with preemption 1011 * any flavor of RCU. Do not chew up lots of CPU cycles with preemption
972 * disabled in a most-likely vain attempt to cause RCU not to need this CPU. 1012 * disabled in a most-likely vain attempt to cause RCU not to need this CPU.
973 */ 1013 */
974 int rcu_needs_cpu(int cpu) 1014 int rcu_needs_cpu(int cpu)
975 { 1015 {
976 return rcu_needs_cpu_quick_check(cpu); 1016 return rcu_needs_cpu_quick_check(cpu);
977 } 1017 }
978 1018
979 /* 1019 /*
980 * Check to see if we need to continue a callback-flush operations to 1020 * Check to see if we need to continue a callback-flush operations to
981 * allow the last CPU to enter dyntick-idle mode. But fast dyntick-idle 1021 * allow the last CPU to enter dyntick-idle mode. But fast dyntick-idle
982 * entry is not configured, so we never do need to. 1022 * entry is not configured, so we never do need to.
983 */ 1023 */
984 static void rcu_needs_cpu_flush(void) 1024 static void rcu_needs_cpu_flush(void)
985 { 1025 {
986 } 1026 }
987 1027
988 #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 1028 #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
989 1029
990 #define RCU_NEEDS_CPU_FLUSHES 5 1030 #define RCU_NEEDS_CPU_FLUSHES 5
991 static DEFINE_PER_CPU(int, rcu_dyntick_drain); 1031 static DEFINE_PER_CPU(int, rcu_dyntick_drain);
992 static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); 1032 static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
993 1033
994 /* 1034 /*
995 * Check to see if any future RCU-related work will need to be done 1035 * Check to see if any future RCU-related work will need to be done
996 * by the current CPU, even if none need be done immediately, returning 1036 * by the current CPU, even if none need be done immediately, returning
997 * 1 if so. This function is part of the RCU implementation; it is -not- 1037 * 1 if so. This function is part of the RCU implementation; it is -not-
998 * an exported member of the RCU API. 1038 * an exported member of the RCU API.
999 * 1039 *
1000 * Because we are not supporting preemptible RCU, attempt to accelerate 1040 * Because we are not supporting preemptible RCU, attempt to accelerate
1001 * any current grace periods so that RCU no longer needs this CPU, but 1041 * any current grace periods so that RCU no longer needs this CPU, but
1002 * only if all other CPUs are already in dynticks-idle mode. This will 1042 * only if all other CPUs are already in dynticks-idle mode. This will
1003 * allow the CPU cores to be powered down immediately, as opposed to after 1043 * allow the CPU cores to be powered down immediately, as opposed to after
1004 * waiting many milliseconds for grace periods to elapse. 1044 * waiting many milliseconds for grace periods to elapse.
1005 * 1045 *
1006 * Because it is not legal to invoke rcu_process_callbacks() with irqs 1046 * Because it is not legal to invoke rcu_process_callbacks() with irqs
1007 * disabled, we do one pass of force_quiescent_state(), then do a 1047 * disabled, we do one pass of force_quiescent_state(), then do a
1008 * raise_softirq() to cause rcu_process_callbacks() to be invoked later. 1048 * raise_softirq() to cause rcu_process_callbacks() to be invoked later.
1009 * The per-cpu rcu_dyntick_drain variable controls the sequencing. 1049 * The per-cpu rcu_dyntick_drain variable controls the sequencing.
1010 */ 1050 */
1011 int rcu_needs_cpu(int cpu) 1051 int rcu_needs_cpu(int cpu)
1012 { 1052 {
1013 int c = 0; 1053 int c = 0;
1014 int thatcpu; 1054 int thatcpu;
1015 1055
1016 /* Check for being in the holdoff period. */ 1056 /* Check for being in the holdoff period. */
1017 if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) 1057 if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies)
1018 return rcu_needs_cpu_quick_check(cpu); 1058 return rcu_needs_cpu_quick_check(cpu);
1019 1059
1020 /* Don't bother unless we are the last non-dyntick-idle CPU. */ 1060 /* Don't bother unless we are the last non-dyntick-idle CPU. */
1021 for_each_cpu_not(thatcpu, nohz_cpu_mask) 1061 for_each_cpu_not(thatcpu, nohz_cpu_mask)
1022 if (cpu_online(thatcpu) && thatcpu != cpu) { 1062 if (cpu_online(thatcpu) && thatcpu != cpu) {
1023 per_cpu(rcu_dyntick_drain, cpu) = 0; 1063 per_cpu(rcu_dyntick_drain, cpu) = 0;
1024 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; 1064 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
1025 return rcu_needs_cpu_quick_check(cpu); 1065 return rcu_needs_cpu_quick_check(cpu);
1026 } 1066 }
1027 1067
1028 /* Check and update the rcu_dyntick_drain sequencing. */ 1068 /* Check and update the rcu_dyntick_drain sequencing. */
1029 if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { 1069 if (per_cpu(rcu_dyntick_drain, cpu) <= 0) {
1030 /* First time through, initialize the counter. */ 1070 /* First time through, initialize the counter. */
1031 per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES; 1071 per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES;
1032 } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { 1072 } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
1033 /* We have hit the limit, so time to give up. */ 1073 /* We have hit the limit, so time to give up. */
1034 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; 1074 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
1035 return rcu_needs_cpu_quick_check(cpu); 1075 return rcu_needs_cpu_quick_check(cpu);
1036 } 1076 }
1037 1077
1038 /* Do one step pushing remaining RCU callbacks through. */ 1078 /* Do one step pushing remaining RCU callbacks through. */
1039 if (per_cpu(rcu_sched_data, cpu).nxtlist) { 1079 if (per_cpu(rcu_sched_data, cpu).nxtlist) {
1040 rcu_sched_qs(cpu); 1080 rcu_sched_qs(cpu);
1041 force_quiescent_state(&rcu_sched_state, 0); 1081 force_quiescent_state(&rcu_sched_state, 0);
1042 c = c || per_cpu(rcu_sched_data, cpu).nxtlist; 1082 c = c || per_cpu(rcu_sched_data, cpu).nxtlist;
1043 } 1083 }
1044 if (per_cpu(rcu_bh_data, cpu).nxtlist) { 1084 if (per_cpu(rcu_bh_data, cpu).nxtlist) {
1045 rcu_bh_qs(cpu); 1085 rcu_bh_qs(cpu);
1046 force_quiescent_state(&rcu_bh_state, 0); 1086 force_quiescent_state(&rcu_bh_state, 0);
1047 c = c || per_cpu(rcu_bh_data, cpu).nxtlist; 1087 c = c || per_cpu(rcu_bh_data, cpu).nxtlist;
1048 } 1088 }
1049 1089
1050 /* If RCU callbacks are still pending, RCU still needs this CPU. */ 1090 /* If RCU callbacks are still pending, RCU still needs this CPU. */
1051 if (c) 1091 if (c)
1052 raise_softirq(RCU_SOFTIRQ); 1092 raise_softirq(RCU_SOFTIRQ);
1053 return c; 1093 return c;
1054 } 1094 }
1055 1095
1056 /* 1096 /*
1057 * Check to see if we need to continue a callback-flush operations to 1097 * Check to see if we need to continue a callback-flush operations to
1058 * allow the last CPU to enter dyntick-idle mode. 1098 * allow the last CPU to enter dyntick-idle mode.
1059 */ 1099 */
1060 static void rcu_needs_cpu_flush(void) 1100 static void rcu_needs_cpu_flush(void)
1061 { 1101 {
1062 int cpu = smp_processor_id(); 1102 int cpu = smp_processor_id();
1063 unsigned long flags; 1103 unsigned long flags;
1064 1104
1065 if (per_cpu(rcu_dyntick_drain, cpu) <= 0) 1105 if (per_cpu(rcu_dyntick_drain, cpu) <= 0)
1066 return; 1106 return;
1067 local_irq_save(flags); 1107 local_irq_save(flags);
1068 (void)rcu_needs_cpu(cpu); 1108 (void)rcu_needs_cpu(cpu);
1069 local_irq_restore(flags); 1109 local_irq_restore(flags);
1070 } 1110 }
1071 1111
1072 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 1112 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
1073 1113