Commit 1217ed1ba5c67393293dfb0f03c353b118dadeb4

Authored by Paul E. McKenney
Committed by Paul E. McKenney
1 parent 29ce831000

rcu: permit rcu_read_unlock() to be called while holding runqueue locks

Avoid calling into the scheduler while holding core RCU locks.  This
allows rcu_read_unlock() to be called while holding the runqueue locks,
but only as long as there was no chance of the RCU read-side critical
section having been preempted.  (Otherwise, if RCU priority boosting
is enabled, rcu_read_unlock() might call into the scheduler in order to
unboost itself, which might allows self-deadlock on the runqueue locks
within the scheduler.)

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

Showing 3 changed files with 34 additions and 79 deletions Inline Diff

1 /* 1 /*
2 * Read-Copy Update mechanism for mutual exclusion 2 * Read-Copy Update mechanism for mutual exclusion
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * 17 *
18 * Copyright IBM Corporation, 2008 18 * Copyright IBM Corporation, 2008
19 * 19 *
20 * Authors: Dipankar Sarma <dipankar@in.ibm.com> 20 * Authors: Dipankar Sarma <dipankar@in.ibm.com>
21 * Manfred Spraul <manfred@colorfullife.com> 21 * Manfred Spraul <manfred@colorfullife.com>
22 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version 22 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version
23 * 23 *
24 * Based on the original work by Paul McKenney <paulmck@us.ibm.com> 24 * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
25 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 25 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
26 * 26 *
27 * For detailed explanation of Read-Copy Update mechanism see - 27 * For detailed explanation of Read-Copy Update mechanism see -
28 * Documentation/RCU 28 * Documentation/RCU
29 */ 29 */
30 #include <linux/types.h> 30 #include <linux/types.h>
31 #include <linux/kernel.h> 31 #include <linux/kernel.h>
32 #include <linux/init.h> 32 #include <linux/init.h>
33 #include <linux/spinlock.h> 33 #include <linux/spinlock.h>
34 #include <linux/smp.h> 34 #include <linux/smp.h>
35 #include <linux/rcupdate.h> 35 #include <linux/rcupdate.h>
36 #include <linux/interrupt.h> 36 #include <linux/interrupt.h>
37 #include <linux/sched.h> 37 #include <linux/sched.h>
38 #include <linux/nmi.h> 38 #include <linux/nmi.h>
39 #include <asm/atomic.h> 39 #include <asm/atomic.h>
40 #include <linux/bitops.h> 40 #include <linux/bitops.h>
41 #include <linux/module.h> 41 #include <linux/module.h>
42 #include <linux/completion.h> 42 #include <linux/completion.h>
43 #include <linux/moduleparam.h> 43 #include <linux/moduleparam.h>
44 #include <linux/percpu.h> 44 #include <linux/percpu.h>
45 #include <linux/notifier.h> 45 #include <linux/notifier.h>
46 #include <linux/cpu.h> 46 #include <linux/cpu.h>
47 #include <linux/mutex.h> 47 #include <linux/mutex.h>
48 #include <linux/time.h> 48 #include <linux/time.h>
49 #include <linux/kernel_stat.h> 49 #include <linux/kernel_stat.h>
50 #include <linux/wait.h> 50 #include <linux/wait.h>
51 #include <linux/kthread.h> 51 #include <linux/kthread.h>
52 52
53 #include "rcutree.h" 53 #include "rcutree.h"
54 54
55 /* Data structures. */ 55 /* Data structures. */
56 56
57 static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; 57 static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
58 58
59 #define RCU_STATE_INITIALIZER(structname) { \ 59 #define RCU_STATE_INITIALIZER(structname) { \
60 .level = { &structname.node[0] }, \ 60 .level = { &structname.node[0] }, \
61 .levelcnt = { \ 61 .levelcnt = { \
62 NUM_RCU_LVL_0, /* root of hierarchy. */ \ 62 NUM_RCU_LVL_0, /* root of hierarchy. */ \
63 NUM_RCU_LVL_1, \ 63 NUM_RCU_LVL_1, \
64 NUM_RCU_LVL_2, \ 64 NUM_RCU_LVL_2, \
65 NUM_RCU_LVL_3, \ 65 NUM_RCU_LVL_3, \
66 NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ 66 NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \
67 }, \ 67 }, \
68 .signaled = RCU_GP_IDLE, \ 68 .signaled = RCU_GP_IDLE, \
69 .gpnum = -300, \ 69 .gpnum = -300, \
70 .completed = -300, \ 70 .completed = -300, \
71 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ 71 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \
72 .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ 72 .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \
73 .n_force_qs = 0, \ 73 .n_force_qs = 0, \
74 .n_force_qs_ngp = 0, \ 74 .n_force_qs_ngp = 0, \
75 .name = #structname, \ 75 .name = #structname, \
76 } 76 }
77 77
78 struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state); 78 struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state);
79 DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); 79 DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
80 80
81 struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); 81 struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
82 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 82 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
83 83
84 static struct rcu_state *rcu_state; 84 static struct rcu_state *rcu_state;
85 85
86 int rcu_scheduler_active __read_mostly; 86 int rcu_scheduler_active __read_mostly;
87 EXPORT_SYMBOL_GPL(rcu_scheduler_active); 87 EXPORT_SYMBOL_GPL(rcu_scheduler_active);
88 88
89 /* 89 /*
90 * Control variables for per-CPU and per-rcu_node kthreads. These 90 * Control variables for per-CPU and per-rcu_node kthreads. These
91 * handle all flavors of RCU. 91 * handle all flavors of RCU.
92 */ 92 */
93 static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); 93 static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
94 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); 94 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
95 DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); 95 DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu);
96 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); 96 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
97 static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq); 97 static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq);
98 DEFINE_PER_CPU(char, rcu_cpu_has_work); 98 DEFINE_PER_CPU(char, rcu_cpu_has_work);
99 static char rcu_kthreads_spawnable; 99 static char rcu_kthreads_spawnable;
100 100
101 static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); 101 static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
102 static void invoke_rcu_cpu_kthread(void); 102 static void invoke_rcu_cpu_kthread(void);
103 103
104 #define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */ 104 #define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */
105 105
106 /* 106 /*
107 * Track the rcutorture test sequence number and the update version 107 * Track the rcutorture test sequence number and the update version
108 * number within a given test. The rcutorture_testseq is incremented 108 * number within a given test. The rcutorture_testseq is incremented
109 * on every rcutorture module load and unload, so has an odd value 109 * on every rcutorture module load and unload, so has an odd value
110 * when a test is running. The rcutorture_vernum is set to zero 110 * when a test is running. The rcutorture_vernum is set to zero
111 * when rcutorture starts and is incremented on each rcutorture update. 111 * when rcutorture starts and is incremented on each rcutorture update.
112 * These variables enable correlating rcutorture output with the 112 * These variables enable correlating rcutorture output with the
113 * RCU tracing information. 113 * RCU tracing information.
114 */ 114 */
115 unsigned long rcutorture_testseq; 115 unsigned long rcutorture_testseq;
116 unsigned long rcutorture_vernum; 116 unsigned long rcutorture_vernum;
117 117
118 /* 118 /*
119 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s 119 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
120 * permit this function to be invoked without holding the root rcu_node 120 * permit this function to be invoked without holding the root rcu_node
121 * structure's ->lock, but of course results can be subject to change. 121 * structure's ->lock, but of course results can be subject to change.
122 */ 122 */
123 static int rcu_gp_in_progress(struct rcu_state *rsp) 123 static int rcu_gp_in_progress(struct rcu_state *rsp)
124 { 124 {
125 return ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum); 125 return ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum);
126 } 126 }
127 127
128 /* 128 /*
129 * Note a quiescent state. Because we do not need to know 129 * Note a quiescent state. Because we do not need to know
130 * how many quiescent states passed, just if there was at least 130 * how many quiescent states passed, just if there was at least
131 * one since the start of the grace period, this just sets a flag. 131 * one since the start of the grace period, this just sets a flag.
132 */ 132 */
133 void rcu_sched_qs(int cpu) 133 void rcu_sched_qs(int cpu)
134 { 134 {
135 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); 135 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
136 136
137 rdp->passed_quiesc_completed = rdp->gpnum - 1; 137 rdp->passed_quiesc_completed = rdp->gpnum - 1;
138 barrier(); 138 barrier();
139 rdp->passed_quiesc = 1; 139 rdp->passed_quiesc = 1;
140 } 140 }
141 141
142 void rcu_bh_qs(int cpu) 142 void rcu_bh_qs(int cpu)
143 { 143 {
144 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); 144 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
145 145
146 rdp->passed_quiesc_completed = rdp->gpnum - 1; 146 rdp->passed_quiesc_completed = rdp->gpnum - 1;
147 barrier(); 147 barrier();
148 rdp->passed_quiesc = 1; 148 rdp->passed_quiesc = 1;
149 } 149 }
150 150
151 /* 151 /*
152 * Note a context switch. This is a quiescent state for RCU-sched, 152 * Note a context switch. This is a quiescent state for RCU-sched,
153 * and requires special handling for preemptible RCU. 153 * and requires special handling for preemptible RCU.
154 */ 154 */
155 void rcu_note_context_switch(int cpu) 155 void rcu_note_context_switch(int cpu)
156 { 156 {
157 rcu_sched_qs(cpu); 157 rcu_sched_qs(cpu);
158 rcu_preempt_note_context_switch(cpu); 158 rcu_preempt_note_context_switch(cpu);
159 } 159 }
160 EXPORT_SYMBOL_GPL(rcu_note_context_switch); 160 EXPORT_SYMBOL_GPL(rcu_note_context_switch);
161 161
162 #ifdef CONFIG_NO_HZ 162 #ifdef CONFIG_NO_HZ
163 DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 163 DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
164 .dynticks_nesting = 1, 164 .dynticks_nesting = 1,
165 .dynticks = ATOMIC_INIT(1), 165 .dynticks = ATOMIC_INIT(1),
166 }; 166 };
167 #endif /* #ifdef CONFIG_NO_HZ */ 167 #endif /* #ifdef CONFIG_NO_HZ */
168 168
169 static int blimit = 10; /* Maximum callbacks per softirq. */ 169 static int blimit = 10; /* Maximum callbacks per softirq. */
170 static int qhimark = 10000; /* If this many pending, ignore blimit. */ 170 static int qhimark = 10000; /* If this many pending, ignore blimit. */
171 static int qlowmark = 100; /* Once only this many pending, use blimit. */ 171 static int qlowmark = 100; /* Once only this many pending, use blimit. */
172 172
173 module_param(blimit, int, 0); 173 module_param(blimit, int, 0);
174 module_param(qhimark, int, 0); 174 module_param(qhimark, int, 0);
175 module_param(qlowmark, int, 0); 175 module_param(qlowmark, int, 0);
176 176
177 int rcu_cpu_stall_suppress __read_mostly; 177 int rcu_cpu_stall_suppress __read_mostly;
178 module_param(rcu_cpu_stall_suppress, int, 0644); 178 module_param(rcu_cpu_stall_suppress, int, 0644);
179 179
180 static void force_quiescent_state(struct rcu_state *rsp, int relaxed); 180 static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
181 static int rcu_pending(int cpu); 181 static int rcu_pending(int cpu);
182 182
183 /* 183 /*
184 * Return the number of RCU-sched batches processed thus far for debug & stats. 184 * Return the number of RCU-sched batches processed thus far for debug & stats.
185 */ 185 */
186 long rcu_batches_completed_sched(void) 186 long rcu_batches_completed_sched(void)
187 { 187 {
188 return rcu_sched_state.completed; 188 return rcu_sched_state.completed;
189 } 189 }
190 EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); 190 EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
191 191
192 /* 192 /*
193 * Return the number of RCU BH batches processed thus far for debug & stats. 193 * Return the number of RCU BH batches processed thus far for debug & stats.
194 */ 194 */
195 long rcu_batches_completed_bh(void) 195 long rcu_batches_completed_bh(void)
196 { 196 {
197 return rcu_bh_state.completed; 197 return rcu_bh_state.completed;
198 } 198 }
199 EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); 199 EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
200 200
201 /* 201 /*
202 * Force a quiescent state for RCU BH. 202 * Force a quiescent state for RCU BH.
203 */ 203 */
204 void rcu_bh_force_quiescent_state(void) 204 void rcu_bh_force_quiescent_state(void)
205 { 205 {
206 force_quiescent_state(&rcu_bh_state, 0); 206 force_quiescent_state(&rcu_bh_state, 0);
207 } 207 }
208 EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); 208 EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
209 209
210 /* 210 /*
211 * Record the number of times rcutorture tests have been initiated and 211 * Record the number of times rcutorture tests have been initiated and
212 * terminated. This information allows the debugfs tracing stats to be 212 * terminated. This information allows the debugfs tracing stats to be
213 * correlated to the rcutorture messages, even when the rcutorture module 213 * correlated to the rcutorture messages, even when the rcutorture module
214 * is being repeatedly loaded and unloaded. In other words, we cannot 214 * is being repeatedly loaded and unloaded. In other words, we cannot
215 * store this state in rcutorture itself. 215 * store this state in rcutorture itself.
216 */ 216 */
217 void rcutorture_record_test_transition(void) 217 void rcutorture_record_test_transition(void)
218 { 218 {
219 rcutorture_testseq++; 219 rcutorture_testseq++;
220 rcutorture_vernum = 0; 220 rcutorture_vernum = 0;
221 } 221 }
222 EXPORT_SYMBOL_GPL(rcutorture_record_test_transition); 222 EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
223 223
224 /* 224 /*
225 * Record the number of writer passes through the current rcutorture test. 225 * Record the number of writer passes through the current rcutorture test.
226 * This is also used to correlate debugfs tracing stats with the rcutorture 226 * This is also used to correlate debugfs tracing stats with the rcutorture
227 * messages. 227 * messages.
228 */ 228 */
229 void rcutorture_record_progress(unsigned long vernum) 229 void rcutorture_record_progress(unsigned long vernum)
230 { 230 {
231 rcutorture_vernum++; 231 rcutorture_vernum++;
232 } 232 }
233 EXPORT_SYMBOL_GPL(rcutorture_record_progress); 233 EXPORT_SYMBOL_GPL(rcutorture_record_progress);
234 234
235 /* 235 /*
236 * Force a quiescent state for RCU-sched. 236 * Force a quiescent state for RCU-sched.
237 */ 237 */
238 void rcu_sched_force_quiescent_state(void) 238 void rcu_sched_force_quiescent_state(void)
239 { 239 {
240 force_quiescent_state(&rcu_sched_state, 0); 240 force_quiescent_state(&rcu_sched_state, 0);
241 } 241 }
242 EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); 242 EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
243 243
244 /* 244 /*
245 * Does the CPU have callbacks ready to be invoked? 245 * Does the CPU have callbacks ready to be invoked?
246 */ 246 */
247 static int 247 static int
248 cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp) 248 cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
249 { 249 {
250 return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]; 250 return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL];
251 } 251 }
252 252
253 /* 253 /*
254 * Does the current CPU require a yet-as-unscheduled grace period? 254 * Does the current CPU require a yet-as-unscheduled grace period?
255 */ 255 */
256 static int 256 static int
257 cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) 257 cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
258 { 258 {
259 return *rdp->nxttail[RCU_DONE_TAIL] && !rcu_gp_in_progress(rsp); 259 return *rdp->nxttail[RCU_DONE_TAIL] && !rcu_gp_in_progress(rsp);
260 } 260 }
261 261
262 /* 262 /*
263 * Return the root node of the specified rcu_state structure. 263 * Return the root node of the specified rcu_state structure.
264 */ 264 */
265 static struct rcu_node *rcu_get_root(struct rcu_state *rsp) 265 static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
266 { 266 {
267 return &rsp->node[0]; 267 return &rsp->node[0];
268 } 268 }
269 269
270 #ifdef CONFIG_SMP 270 #ifdef CONFIG_SMP
271 271
272 /* 272 /*
273 * If the specified CPU is offline, tell the caller that it is in 273 * If the specified CPU is offline, tell the caller that it is in
274 * a quiescent state. Otherwise, whack it with a reschedule IPI. 274 * a quiescent state. Otherwise, whack it with a reschedule IPI.
275 * Grace periods can end up waiting on an offline CPU when that 275 * Grace periods can end up waiting on an offline CPU when that
276 * CPU is in the process of coming online -- it will be added to the 276 * CPU is in the process of coming online -- it will be added to the
277 * rcu_node bitmasks before it actually makes it online. The same thing 277 * rcu_node bitmasks before it actually makes it online. The same thing
278 * can happen while a CPU is in the process of coming online. Because this 278 * can happen while a CPU is in the process of coming online. Because this
279 * race is quite rare, we check for it after detecting that the grace 279 * race is quite rare, we check for it after detecting that the grace
280 * period has been delayed rather than checking each and every CPU 280 * period has been delayed rather than checking each and every CPU
281 * each and every time we start a new grace period. 281 * each and every time we start a new grace period.
282 */ 282 */
283 static int rcu_implicit_offline_qs(struct rcu_data *rdp) 283 static int rcu_implicit_offline_qs(struct rcu_data *rdp)
284 { 284 {
285 /* 285 /*
286 * If the CPU is offline, it is in a quiescent state. We can 286 * If the CPU is offline, it is in a quiescent state. We can
287 * trust its state not to change because interrupts are disabled. 287 * trust its state not to change because interrupts are disabled.
288 */ 288 */
289 if (cpu_is_offline(rdp->cpu)) { 289 if (cpu_is_offline(rdp->cpu)) {
290 rdp->offline_fqs++; 290 rdp->offline_fqs++;
291 return 1; 291 return 1;
292 } 292 }
293 293
294 /* If preemptible RCU, no point in sending reschedule IPI. */ 294 /* If preemptible RCU, no point in sending reschedule IPI. */
295 if (rdp->preemptible) 295 if (rdp->preemptible)
296 return 0; 296 return 0;
297 297
298 /* The CPU is online, so send it a reschedule IPI. */ 298 /* The CPU is online, so send it a reschedule IPI. */
299 if (rdp->cpu != smp_processor_id()) 299 if (rdp->cpu != smp_processor_id())
300 smp_send_reschedule(rdp->cpu); 300 smp_send_reschedule(rdp->cpu);
301 else 301 else
302 set_need_resched(); 302 set_need_resched();
303 rdp->resched_ipi++; 303 rdp->resched_ipi++;
304 return 0; 304 return 0;
305 } 305 }
306 306
307 #endif /* #ifdef CONFIG_SMP */ 307 #endif /* #ifdef CONFIG_SMP */
308 308
309 #ifdef CONFIG_NO_HZ 309 #ifdef CONFIG_NO_HZ
310 310
311 /** 311 /**
312 * rcu_enter_nohz - inform RCU that current CPU is entering nohz 312 * rcu_enter_nohz - inform RCU that current CPU is entering nohz
313 * 313 *
314 * Enter nohz mode, in other words, -leave- the mode in which RCU 314 * Enter nohz mode, in other words, -leave- the mode in which RCU
315 * read-side critical sections can occur. (Though RCU read-side 315 * read-side critical sections can occur. (Though RCU read-side
316 * critical sections can occur in irq handlers in nohz mode, a possibility 316 * critical sections can occur in irq handlers in nohz mode, a possibility
317 * handled by rcu_irq_enter() and rcu_irq_exit()). 317 * handled by rcu_irq_enter() and rcu_irq_exit()).
318 */ 318 */
319 void rcu_enter_nohz(void) 319 void rcu_enter_nohz(void)
320 { 320 {
321 unsigned long flags; 321 unsigned long flags;
322 struct rcu_dynticks *rdtp; 322 struct rcu_dynticks *rdtp;
323 323
324 local_irq_save(flags); 324 local_irq_save(flags);
325 rdtp = &__get_cpu_var(rcu_dynticks); 325 rdtp = &__get_cpu_var(rcu_dynticks);
326 if (--rdtp->dynticks_nesting) { 326 if (--rdtp->dynticks_nesting) {
327 local_irq_restore(flags); 327 local_irq_restore(flags);
328 return; 328 return;
329 } 329 }
330 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ 330 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
331 smp_mb__before_atomic_inc(); /* See above. */ 331 smp_mb__before_atomic_inc(); /* See above. */
332 atomic_inc(&rdtp->dynticks); 332 atomic_inc(&rdtp->dynticks);
333 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ 333 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
334 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 334 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
335 local_irq_restore(flags); 335 local_irq_restore(flags);
336 336
337 /* If the interrupt queued a callback, get out of dyntick mode. */ 337 /* If the interrupt queued a callback, get out of dyntick mode. */
338 if (in_irq() && 338 if (in_irq() &&
339 (__get_cpu_var(rcu_sched_data).nxtlist || 339 (__get_cpu_var(rcu_sched_data).nxtlist ||
340 __get_cpu_var(rcu_bh_data).nxtlist || 340 __get_cpu_var(rcu_bh_data).nxtlist ||
341 rcu_preempt_needs_cpu(smp_processor_id()))) 341 rcu_preempt_needs_cpu(smp_processor_id())))
342 set_need_resched(); 342 set_need_resched();
343 } 343 }
344 344
345 /* 345 /*
346 * rcu_exit_nohz - inform RCU that current CPU is leaving nohz 346 * rcu_exit_nohz - inform RCU that current CPU is leaving nohz
347 * 347 *
348 * Exit nohz mode, in other words, -enter- the mode in which RCU 348 * Exit nohz mode, in other words, -enter- the mode in which RCU
349 * read-side critical sections normally occur. 349 * read-side critical sections normally occur.
350 */ 350 */
351 void rcu_exit_nohz(void) 351 void rcu_exit_nohz(void)
352 { 352 {
353 unsigned long flags; 353 unsigned long flags;
354 struct rcu_dynticks *rdtp; 354 struct rcu_dynticks *rdtp;
355 355
356 local_irq_save(flags); 356 local_irq_save(flags);
357 rdtp = &__get_cpu_var(rcu_dynticks); 357 rdtp = &__get_cpu_var(rcu_dynticks);
358 if (rdtp->dynticks_nesting++) { 358 if (rdtp->dynticks_nesting++) {
359 local_irq_restore(flags); 359 local_irq_restore(flags);
360 return; 360 return;
361 } 361 }
362 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ 362 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */
363 atomic_inc(&rdtp->dynticks); 363 atomic_inc(&rdtp->dynticks);
364 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 364 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
365 smp_mb__after_atomic_inc(); /* See above. */ 365 smp_mb__after_atomic_inc(); /* See above. */
366 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 366 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
367 local_irq_restore(flags); 367 local_irq_restore(flags);
368 } 368 }
369 369
370 /** 370 /**
371 * rcu_nmi_enter - inform RCU of entry to NMI context 371 * rcu_nmi_enter - inform RCU of entry to NMI context
372 * 372 *
373 * If the CPU was idle with dynamic ticks active, and there is no 373 * If the CPU was idle with dynamic ticks active, and there is no
374 * irq handler running, this updates rdtp->dynticks_nmi to let the 374 * irq handler running, this updates rdtp->dynticks_nmi to let the
375 * RCU grace-period handling know that the CPU is active. 375 * RCU grace-period handling know that the CPU is active.
376 */ 376 */
377 void rcu_nmi_enter(void) 377 void rcu_nmi_enter(void)
378 { 378 {
379 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 379 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
380 380
381 if (rdtp->dynticks_nmi_nesting == 0 && 381 if (rdtp->dynticks_nmi_nesting == 0 &&
382 (atomic_read(&rdtp->dynticks) & 0x1)) 382 (atomic_read(&rdtp->dynticks) & 0x1))
383 return; 383 return;
384 rdtp->dynticks_nmi_nesting++; 384 rdtp->dynticks_nmi_nesting++;
385 smp_mb__before_atomic_inc(); /* Force delay from prior write. */ 385 smp_mb__before_atomic_inc(); /* Force delay from prior write. */
386 atomic_inc(&rdtp->dynticks); 386 atomic_inc(&rdtp->dynticks);
387 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 387 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
388 smp_mb__after_atomic_inc(); /* See above. */ 388 smp_mb__after_atomic_inc(); /* See above. */
389 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 389 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
390 } 390 }
391 391
392 /** 392 /**
393 * rcu_nmi_exit - inform RCU of exit from NMI context 393 * rcu_nmi_exit - inform RCU of exit from NMI context
394 * 394 *
395 * If the CPU was idle with dynamic ticks active, and there is no 395 * If the CPU was idle with dynamic ticks active, and there is no
396 * irq handler running, this updates rdtp->dynticks_nmi to let the 396 * irq handler running, this updates rdtp->dynticks_nmi to let the
397 * RCU grace-period handling know that the CPU is no longer active. 397 * RCU grace-period handling know that the CPU is no longer active.
398 */ 398 */
399 void rcu_nmi_exit(void) 399 void rcu_nmi_exit(void)
400 { 400 {
401 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 401 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
402 402
403 if (rdtp->dynticks_nmi_nesting == 0 || 403 if (rdtp->dynticks_nmi_nesting == 0 ||
404 --rdtp->dynticks_nmi_nesting != 0) 404 --rdtp->dynticks_nmi_nesting != 0)
405 return; 405 return;
406 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ 406 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
407 smp_mb__before_atomic_inc(); /* See above. */ 407 smp_mb__before_atomic_inc(); /* See above. */
408 atomic_inc(&rdtp->dynticks); 408 atomic_inc(&rdtp->dynticks);
409 smp_mb__after_atomic_inc(); /* Force delay to next write. */ 409 smp_mb__after_atomic_inc(); /* Force delay to next write. */
410 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 410 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
411 } 411 }
412 412
413 /** 413 /**
414 * rcu_irq_enter - inform RCU of entry to hard irq context 414 * rcu_irq_enter - inform RCU of entry to hard irq context
415 * 415 *
416 * If the CPU was idle with dynamic ticks active, this updates the 416 * If the CPU was idle with dynamic ticks active, this updates the
417 * rdtp->dynticks to let the RCU handling know that the CPU is active. 417 * rdtp->dynticks to let the RCU handling know that the CPU is active.
418 */ 418 */
419 void rcu_irq_enter(void) 419 void rcu_irq_enter(void)
420 { 420 {
421 rcu_exit_nohz(); 421 rcu_exit_nohz();
422 } 422 }
423 423
424 /** 424 /**
425 * rcu_irq_exit - inform RCU of exit from hard irq context 425 * rcu_irq_exit - inform RCU of exit from hard irq context
426 * 426 *
427 * If the CPU was idle with dynamic ticks active, update the rdp->dynticks 427 * If the CPU was idle with dynamic ticks active, update the rdp->dynticks
428 * to put let the RCU handling be aware that the CPU is going back to idle 428 * to put let the RCU handling be aware that the CPU is going back to idle
429 * with no ticks. 429 * with no ticks.
430 */ 430 */
431 void rcu_irq_exit(void) 431 void rcu_irq_exit(void)
432 { 432 {
433 rcu_enter_nohz(); 433 rcu_enter_nohz();
434 } 434 }
435 435
436 #ifdef CONFIG_SMP 436 #ifdef CONFIG_SMP
437 437
438 /* 438 /*
439 * Snapshot the specified CPU's dynticks counter so that we can later 439 * Snapshot the specified CPU's dynticks counter so that we can later
440 * credit them with an implicit quiescent state. Return 1 if this CPU 440 * credit them with an implicit quiescent state. Return 1 if this CPU
441 * is in dynticks idle mode, which is an extended quiescent state. 441 * is in dynticks idle mode, which is an extended quiescent state.
442 */ 442 */
443 static int dyntick_save_progress_counter(struct rcu_data *rdp) 443 static int dyntick_save_progress_counter(struct rcu_data *rdp)
444 { 444 {
445 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); 445 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
446 return 0; 446 return 0;
447 } 447 }
448 448
449 /* 449 /*
450 * Return true if the specified CPU has passed through a quiescent 450 * Return true if the specified CPU has passed through a quiescent
451 * state by virtue of being in or having passed through an dynticks 451 * state by virtue of being in or having passed through an dynticks
452 * idle state since the last call to dyntick_save_progress_counter() 452 * idle state since the last call to dyntick_save_progress_counter()
453 * for this same CPU. 453 * for this same CPU.
454 */ 454 */
455 static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) 455 static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
456 { 456 {
457 unsigned long curr; 457 unsigned long curr;
458 unsigned long snap; 458 unsigned long snap;
459 459
460 curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks); 460 curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks);
461 snap = (unsigned long)rdp->dynticks_snap; 461 snap = (unsigned long)rdp->dynticks_snap;
462 462
463 /* 463 /*
464 * If the CPU passed through or entered a dynticks idle phase with 464 * If the CPU passed through or entered a dynticks idle phase with
465 * no active irq/NMI handlers, then we can safely pretend that the CPU 465 * no active irq/NMI handlers, then we can safely pretend that the CPU
466 * already acknowledged the request to pass through a quiescent 466 * already acknowledged the request to pass through a quiescent
467 * state. Either way, that CPU cannot possibly be in an RCU 467 * state. Either way, that CPU cannot possibly be in an RCU
468 * read-side critical section that started before the beginning 468 * read-side critical section that started before the beginning
469 * of the current RCU grace period. 469 * of the current RCU grace period.
470 */ 470 */
471 if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) { 471 if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) {
472 rdp->dynticks_fqs++; 472 rdp->dynticks_fqs++;
473 return 1; 473 return 1;
474 } 474 }
475 475
476 /* Go check for the CPU being offline. */ 476 /* Go check for the CPU being offline. */
477 return rcu_implicit_offline_qs(rdp); 477 return rcu_implicit_offline_qs(rdp);
478 } 478 }
479 479
480 #endif /* #ifdef CONFIG_SMP */ 480 #endif /* #ifdef CONFIG_SMP */
481 481
482 #else /* #ifdef CONFIG_NO_HZ */ 482 #else /* #ifdef CONFIG_NO_HZ */
483 483
484 #ifdef CONFIG_SMP 484 #ifdef CONFIG_SMP
485 485
486 static int dyntick_save_progress_counter(struct rcu_data *rdp) 486 static int dyntick_save_progress_counter(struct rcu_data *rdp)
487 { 487 {
488 return 0; 488 return 0;
489 } 489 }
490 490
491 static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) 491 static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
492 { 492 {
493 return rcu_implicit_offline_qs(rdp); 493 return rcu_implicit_offline_qs(rdp);
494 } 494 }
495 495
496 #endif /* #ifdef CONFIG_SMP */ 496 #endif /* #ifdef CONFIG_SMP */
497 497
498 #endif /* #else #ifdef CONFIG_NO_HZ */ 498 #endif /* #else #ifdef CONFIG_NO_HZ */
499 499
500 int rcu_cpu_stall_suppress __read_mostly; 500 int rcu_cpu_stall_suppress __read_mostly;
501 501
502 static void record_gp_stall_check_time(struct rcu_state *rsp) 502 static void record_gp_stall_check_time(struct rcu_state *rsp)
503 { 503 {
504 rsp->gp_start = jiffies; 504 rsp->gp_start = jiffies;
505 rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK; 505 rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
506 } 506 }
507 507
508 static void print_other_cpu_stall(struct rcu_state *rsp) 508 static void print_other_cpu_stall(struct rcu_state *rsp)
509 { 509 {
510 int cpu; 510 int cpu;
511 long delta; 511 long delta;
512 unsigned long flags; 512 unsigned long flags;
513 struct rcu_node *rnp = rcu_get_root(rsp); 513 struct rcu_node *rnp = rcu_get_root(rsp);
514 514
515 /* Only let one CPU complain about others per time interval. */ 515 /* Only let one CPU complain about others per time interval. */
516 516
517 raw_spin_lock_irqsave(&rnp->lock, flags); 517 raw_spin_lock_irqsave(&rnp->lock, flags);
518 delta = jiffies - rsp->jiffies_stall; 518 delta = jiffies - rsp->jiffies_stall;
519 if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) { 519 if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
520 raw_spin_unlock_irqrestore(&rnp->lock, flags); 520 raw_spin_unlock_irqrestore(&rnp->lock, flags);
521 return; 521 return;
522 } 522 }
523 rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK; 523 rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
524 524
525 /* 525 /*
526 * Now rat on any tasks that got kicked up to the root rcu_node 526 * Now rat on any tasks that got kicked up to the root rcu_node
527 * due to CPU offlining. 527 * due to CPU offlining.
528 */ 528 */
529 rcu_print_task_stall(rnp); 529 rcu_print_task_stall(rnp);
530 raw_spin_unlock_irqrestore(&rnp->lock, flags); 530 raw_spin_unlock_irqrestore(&rnp->lock, flags);
531 531
532 /* 532 /*
533 * OK, time to rat on our buddy... 533 * OK, time to rat on our buddy...
534 * See Documentation/RCU/stallwarn.txt for info on how to debug 534 * See Documentation/RCU/stallwarn.txt for info on how to debug
535 * RCU CPU stall warnings. 535 * RCU CPU stall warnings.
536 */ 536 */
537 printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", 537 printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {",
538 rsp->name); 538 rsp->name);
539 rcu_for_each_leaf_node(rsp, rnp) { 539 rcu_for_each_leaf_node(rsp, rnp) {
540 raw_spin_lock_irqsave(&rnp->lock, flags); 540 raw_spin_lock_irqsave(&rnp->lock, flags);
541 rcu_print_task_stall(rnp); 541 rcu_print_task_stall(rnp);
542 raw_spin_unlock_irqrestore(&rnp->lock, flags); 542 raw_spin_unlock_irqrestore(&rnp->lock, flags);
543 if (rnp->qsmask == 0) 543 if (rnp->qsmask == 0)
544 continue; 544 continue;
545 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) 545 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
546 if (rnp->qsmask & (1UL << cpu)) 546 if (rnp->qsmask & (1UL << cpu))
547 printk(" %d", rnp->grplo + cpu); 547 printk(" %d", rnp->grplo + cpu);
548 } 548 }
549 printk("} (detected by %d, t=%ld jiffies)\n", 549 printk("} (detected by %d, t=%ld jiffies)\n",
550 smp_processor_id(), (long)(jiffies - rsp->gp_start)); 550 smp_processor_id(), (long)(jiffies - rsp->gp_start));
551 trigger_all_cpu_backtrace(); 551 trigger_all_cpu_backtrace();
552 552
553 /* If so configured, complain about tasks blocking the grace period. */ 553 /* If so configured, complain about tasks blocking the grace period. */
554 554
555 rcu_print_detail_task_stall(rsp); 555 rcu_print_detail_task_stall(rsp);
556 556
557 force_quiescent_state(rsp, 0); /* Kick them all. */ 557 force_quiescent_state(rsp, 0); /* Kick them all. */
558 } 558 }
559 559
560 static void print_cpu_stall(struct rcu_state *rsp) 560 static void print_cpu_stall(struct rcu_state *rsp)
561 { 561 {
562 unsigned long flags; 562 unsigned long flags;
563 struct rcu_node *rnp = rcu_get_root(rsp); 563 struct rcu_node *rnp = rcu_get_root(rsp);
564 564
565 /* 565 /*
566 * OK, time to rat on ourselves... 566 * OK, time to rat on ourselves...
567 * See Documentation/RCU/stallwarn.txt for info on how to debug 567 * See Documentation/RCU/stallwarn.txt for info on how to debug
568 * RCU CPU stall warnings. 568 * RCU CPU stall warnings.
569 */ 569 */
570 printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", 570 printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n",
571 rsp->name, smp_processor_id(), jiffies - rsp->gp_start); 571 rsp->name, smp_processor_id(), jiffies - rsp->gp_start);
572 trigger_all_cpu_backtrace(); 572 trigger_all_cpu_backtrace();
573 573
574 raw_spin_lock_irqsave(&rnp->lock, flags); 574 raw_spin_lock_irqsave(&rnp->lock, flags);
575 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) 575 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
576 rsp->jiffies_stall = 576 rsp->jiffies_stall =
577 jiffies + RCU_SECONDS_TILL_STALL_RECHECK; 577 jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
578 raw_spin_unlock_irqrestore(&rnp->lock, flags); 578 raw_spin_unlock_irqrestore(&rnp->lock, flags);
579 579
580 set_need_resched(); /* kick ourselves to get things going. */ 580 set_need_resched(); /* kick ourselves to get things going. */
581 } 581 }
582 582
583 static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) 583 static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
584 { 584 {
585 unsigned long j; 585 unsigned long j;
586 unsigned long js; 586 unsigned long js;
587 struct rcu_node *rnp; 587 struct rcu_node *rnp;
588 588
589 if (rcu_cpu_stall_suppress) 589 if (rcu_cpu_stall_suppress)
590 return; 590 return;
591 j = ACCESS_ONCE(jiffies); 591 j = ACCESS_ONCE(jiffies);
592 js = ACCESS_ONCE(rsp->jiffies_stall); 592 js = ACCESS_ONCE(rsp->jiffies_stall);
593 rnp = rdp->mynode; 593 rnp = rdp->mynode;
594 if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) { 594 if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) {
595 595
596 /* We haven't checked in, so go dump stack. */ 596 /* We haven't checked in, so go dump stack. */
597 print_cpu_stall(rsp); 597 print_cpu_stall(rsp);
598 598
599 } else if (rcu_gp_in_progress(rsp) && 599 } else if (rcu_gp_in_progress(rsp) &&
600 ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) { 600 ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) {
601 601
602 /* They had a few time units to dump stack, so complain. */ 602 /* They had a few time units to dump stack, so complain. */
603 print_other_cpu_stall(rsp); 603 print_other_cpu_stall(rsp);
604 } 604 }
605 } 605 }
606 606
607 static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) 607 static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
608 { 608 {
609 rcu_cpu_stall_suppress = 1; 609 rcu_cpu_stall_suppress = 1;
610 return NOTIFY_DONE; 610 return NOTIFY_DONE;
611 } 611 }
612 612
613 /** 613 /**
614 * rcu_cpu_stall_reset - prevent further stall warnings in current grace period 614 * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
615 * 615 *
616 * Set the stall-warning timeout way off into the future, thus preventing 616 * Set the stall-warning timeout way off into the future, thus preventing
617 * any RCU CPU stall-warning messages from appearing in the current set of 617 * any RCU CPU stall-warning messages from appearing in the current set of
618 * RCU grace periods. 618 * RCU grace periods.
619 * 619 *
620 * The caller must disable hard irqs. 620 * The caller must disable hard irqs.
621 */ 621 */
622 void rcu_cpu_stall_reset(void) 622 void rcu_cpu_stall_reset(void)
623 { 623 {
624 rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; 624 rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2;
625 rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; 625 rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2;
626 rcu_preempt_stall_reset(); 626 rcu_preempt_stall_reset();
627 } 627 }
628 628
629 static struct notifier_block rcu_panic_block = { 629 static struct notifier_block rcu_panic_block = {
630 .notifier_call = rcu_panic, 630 .notifier_call = rcu_panic,
631 }; 631 };
632 632
633 static void __init check_cpu_stall_init(void) 633 static void __init check_cpu_stall_init(void)
634 { 634 {
635 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); 635 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
636 } 636 }
637 637
638 /* 638 /*
639 * Update CPU-local rcu_data state to record the newly noticed grace period. 639 * Update CPU-local rcu_data state to record the newly noticed grace period.
640 * This is used both when we started the grace period and when we notice 640 * This is used both when we started the grace period and when we notice
641 * that someone else started the grace period. The caller must hold the 641 * that someone else started the grace period. The caller must hold the
642 * ->lock of the leaf rcu_node structure corresponding to the current CPU, 642 * ->lock of the leaf rcu_node structure corresponding to the current CPU,
643 * and must have irqs disabled. 643 * and must have irqs disabled.
644 */ 644 */
645 static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) 645 static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
646 { 646 {
647 if (rdp->gpnum != rnp->gpnum) { 647 if (rdp->gpnum != rnp->gpnum) {
648 /* 648 /*
649 * If the current grace period is waiting for this CPU, 649 * If the current grace period is waiting for this CPU,
650 * set up to detect a quiescent state, otherwise don't 650 * set up to detect a quiescent state, otherwise don't
651 * go looking for one. 651 * go looking for one.
652 */ 652 */
653 rdp->gpnum = rnp->gpnum; 653 rdp->gpnum = rnp->gpnum;
654 if (rnp->qsmask & rdp->grpmask) { 654 if (rnp->qsmask & rdp->grpmask) {
655 rdp->qs_pending = 1; 655 rdp->qs_pending = 1;
656 rdp->passed_quiesc = 0; 656 rdp->passed_quiesc = 0;
657 } else 657 } else
658 rdp->qs_pending = 0; 658 rdp->qs_pending = 0;
659 } 659 }
660 } 660 }
661 661
662 static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp) 662 static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp)
663 { 663 {
664 unsigned long flags; 664 unsigned long flags;
665 struct rcu_node *rnp; 665 struct rcu_node *rnp;
666 666
667 local_irq_save(flags); 667 local_irq_save(flags);
668 rnp = rdp->mynode; 668 rnp = rdp->mynode;
669 if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */ 669 if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */
670 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ 670 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
671 local_irq_restore(flags); 671 local_irq_restore(flags);
672 return; 672 return;
673 } 673 }
674 __note_new_gpnum(rsp, rnp, rdp); 674 __note_new_gpnum(rsp, rnp, rdp);
675 raw_spin_unlock_irqrestore(&rnp->lock, flags); 675 raw_spin_unlock_irqrestore(&rnp->lock, flags);
676 } 676 }
677 677
678 /* 678 /*
679 * Did someone else start a new RCU grace period start since we last 679 * Did someone else start a new RCU grace period start since we last
680 * checked? Update local state appropriately if so. Must be called 680 * checked? Update local state appropriately if so. Must be called
681 * on the CPU corresponding to rdp. 681 * on the CPU corresponding to rdp.
682 */ 682 */
683 static int 683 static int
684 check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp) 684 check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp)
685 { 685 {
686 unsigned long flags; 686 unsigned long flags;
687 int ret = 0; 687 int ret = 0;
688 688
689 local_irq_save(flags); 689 local_irq_save(flags);
690 if (rdp->gpnum != rsp->gpnum) { 690 if (rdp->gpnum != rsp->gpnum) {
691 note_new_gpnum(rsp, rdp); 691 note_new_gpnum(rsp, rdp);
692 ret = 1; 692 ret = 1;
693 } 693 }
694 local_irq_restore(flags); 694 local_irq_restore(flags);
695 return ret; 695 return ret;
696 } 696 }
697 697
698 /* 698 /*
699 * Advance this CPU's callbacks, but only if the current grace period 699 * Advance this CPU's callbacks, but only if the current grace period
700 * has ended. This may be called only from the CPU to whom the rdp 700 * has ended. This may be called only from the CPU to whom the rdp
701 * belongs. In addition, the corresponding leaf rcu_node structure's 701 * belongs. In addition, the corresponding leaf rcu_node structure's
702 * ->lock must be held by the caller, with irqs disabled. 702 * ->lock must be held by the caller, with irqs disabled.
703 */ 703 */
704 static void 704 static void
705 __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) 705 __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
706 { 706 {
707 /* Did another grace period end? */ 707 /* Did another grace period end? */
708 if (rdp->completed != rnp->completed) { 708 if (rdp->completed != rnp->completed) {
709 709
710 /* Advance callbacks. No harm if list empty. */ 710 /* Advance callbacks. No harm if list empty. */
711 rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL]; 711 rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL];
712 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL]; 712 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL];
713 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 713 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
714 714
715 /* Remember that we saw this grace-period completion. */ 715 /* Remember that we saw this grace-period completion. */
716 rdp->completed = rnp->completed; 716 rdp->completed = rnp->completed;
717 717
718 /* 718 /*
719 * If we were in an extended quiescent state, we may have 719 * If we were in an extended quiescent state, we may have
720 * missed some grace periods that others CPUs handled on 720 * missed some grace periods that others CPUs handled on
721 * our behalf. Catch up with this state to avoid noting 721 * our behalf. Catch up with this state to avoid noting
722 * spurious new grace periods. If another grace period 722 * spurious new grace periods. If another grace period
723 * has started, then rnp->gpnum will have advanced, so 723 * has started, then rnp->gpnum will have advanced, so
724 * we will detect this later on. 724 * we will detect this later on.
725 */ 725 */
726 if (ULONG_CMP_LT(rdp->gpnum, rdp->completed)) 726 if (ULONG_CMP_LT(rdp->gpnum, rdp->completed))
727 rdp->gpnum = rdp->completed; 727 rdp->gpnum = rdp->completed;
728 728
729 /* 729 /*
730 * If RCU does not need a quiescent state from this CPU, 730 * If RCU does not need a quiescent state from this CPU,
731 * then make sure that this CPU doesn't go looking for one. 731 * then make sure that this CPU doesn't go looking for one.
732 */ 732 */
733 if ((rnp->qsmask & rdp->grpmask) == 0) 733 if ((rnp->qsmask & rdp->grpmask) == 0)
734 rdp->qs_pending = 0; 734 rdp->qs_pending = 0;
735 } 735 }
736 } 736 }
737 737
738 /* 738 /*
739 * Advance this CPU's callbacks, but only if the current grace period 739 * Advance this CPU's callbacks, but only if the current grace period
740 * has ended. This may be called only from the CPU to whom the rdp 740 * has ended. This may be called only from the CPU to whom the rdp
741 * belongs. 741 * belongs.
742 */ 742 */
743 static void 743 static void
744 rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp) 744 rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
745 { 745 {
746 unsigned long flags; 746 unsigned long flags;
747 struct rcu_node *rnp; 747 struct rcu_node *rnp;
748 748
749 local_irq_save(flags); 749 local_irq_save(flags);
750 rnp = rdp->mynode; 750 rnp = rdp->mynode;
751 if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */ 751 if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */
752 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ 752 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
753 local_irq_restore(flags); 753 local_irq_restore(flags);
754 return; 754 return;
755 } 755 }
756 __rcu_process_gp_end(rsp, rnp, rdp); 756 __rcu_process_gp_end(rsp, rnp, rdp);
757 raw_spin_unlock_irqrestore(&rnp->lock, flags); 757 raw_spin_unlock_irqrestore(&rnp->lock, flags);
758 } 758 }
759 759
760 /* 760 /*
761 * Do per-CPU grace-period initialization for running CPU. The caller 761 * Do per-CPU grace-period initialization for running CPU. The caller
762 * must hold the lock of the leaf rcu_node structure corresponding to 762 * must hold the lock of the leaf rcu_node structure corresponding to
763 * this CPU. 763 * this CPU.
764 */ 764 */
765 static void 765 static void
766 rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) 766 rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
767 { 767 {
768 /* Prior grace period ended, so advance callbacks for current CPU. */ 768 /* Prior grace period ended, so advance callbacks for current CPU. */
769 __rcu_process_gp_end(rsp, rnp, rdp); 769 __rcu_process_gp_end(rsp, rnp, rdp);
770 770
771 /* 771 /*
772 * Because this CPU just now started the new grace period, we know 772 * Because this CPU just now started the new grace period, we know
773 * that all of its callbacks will be covered by this upcoming grace 773 * that all of its callbacks will be covered by this upcoming grace
774 * period, even the ones that were registered arbitrarily recently. 774 * period, even the ones that were registered arbitrarily recently.
775 * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL. 775 * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL.
776 * 776 *
777 * Other CPUs cannot be sure exactly when the grace period started. 777 * Other CPUs cannot be sure exactly when the grace period started.
778 * Therefore, their recently registered callbacks must pass through 778 * Therefore, their recently registered callbacks must pass through
779 * an additional RCU_NEXT_READY stage, so that they will be handled 779 * an additional RCU_NEXT_READY stage, so that they will be handled
780 * by the next RCU grace period. 780 * by the next RCU grace period.
781 */ 781 */
782 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 782 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
783 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 783 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
784 784
785 /* Set state so that this CPU will detect the next quiescent state. */ 785 /* Set state so that this CPU will detect the next quiescent state. */
786 __note_new_gpnum(rsp, rnp, rdp); 786 __note_new_gpnum(rsp, rnp, rdp);
787 } 787 }
788 788
789 /* 789 /*
790 * Start a new RCU grace period if warranted, re-initializing the hierarchy 790 * Start a new RCU grace period if warranted, re-initializing the hierarchy
791 * in preparation for detecting the next grace period. The caller must hold 791 * in preparation for detecting the next grace period. The caller must hold
792 * the root node's ->lock, which is released before return. Hard irqs must 792 * the root node's ->lock, which is released before return. Hard irqs must
793 * be disabled. 793 * be disabled.
794 */ 794 */
795 static void 795 static void
796 rcu_start_gp(struct rcu_state *rsp, unsigned long flags) 796 rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
797 __releases(rcu_get_root(rsp)->lock) 797 __releases(rcu_get_root(rsp)->lock)
798 { 798 {
799 struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 799 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
800 struct rcu_node *rnp = rcu_get_root(rsp); 800 struct rcu_node *rnp = rcu_get_root(rsp);
801 801
802 if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { 802 if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) {
803 if (cpu_needs_another_gp(rsp, rdp)) 803 if (cpu_needs_another_gp(rsp, rdp))
804 rsp->fqs_need_gp = 1; 804 rsp->fqs_need_gp = 1;
805 if (rnp->completed == rsp->completed) { 805 if (rnp->completed == rsp->completed) {
806 raw_spin_unlock_irqrestore(&rnp->lock, flags); 806 raw_spin_unlock_irqrestore(&rnp->lock, flags);
807 return; 807 return;
808 } 808 }
809 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 809 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
810 810
811 /* 811 /*
812 * Propagate new ->completed value to rcu_node structures 812 * Propagate new ->completed value to rcu_node structures
813 * so that other CPUs don't have to wait until the start 813 * so that other CPUs don't have to wait until the start
814 * of the next grace period to process their callbacks. 814 * of the next grace period to process their callbacks.
815 */ 815 */
816 rcu_for_each_node_breadth_first(rsp, rnp) { 816 rcu_for_each_node_breadth_first(rsp, rnp) {
817 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 817 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
818 rnp->completed = rsp->completed; 818 rnp->completed = rsp->completed;
819 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 819 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
820 } 820 }
821 local_irq_restore(flags); 821 local_irq_restore(flags);
822 return; 822 return;
823 } 823 }
824 824
825 /* Advance to a new grace period and initialize state. */ 825 /* Advance to a new grace period and initialize state. */
826 rsp->gpnum++; 826 rsp->gpnum++;
827 WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); 827 WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT);
828 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ 828 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
829 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 829 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
830 record_gp_stall_check_time(rsp); 830 record_gp_stall_check_time(rsp);
831 831
832 /* Special-case the common single-level case. */ 832 /* Special-case the common single-level case. */
833 if (NUM_RCU_NODES == 1) { 833 if (NUM_RCU_NODES == 1) {
834 rcu_preempt_check_blocked_tasks(rnp); 834 rcu_preempt_check_blocked_tasks(rnp);
835 rnp->qsmask = rnp->qsmaskinit; 835 rnp->qsmask = rnp->qsmaskinit;
836 rnp->gpnum = rsp->gpnum; 836 rnp->gpnum = rsp->gpnum;
837 rnp->completed = rsp->completed; 837 rnp->completed = rsp->completed;
838 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ 838 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */
839 rcu_start_gp_per_cpu(rsp, rnp, rdp); 839 rcu_start_gp_per_cpu(rsp, rnp, rdp);
840 rcu_preempt_boost_start_gp(rnp); 840 rcu_preempt_boost_start_gp(rnp);
841 raw_spin_unlock_irqrestore(&rnp->lock, flags); 841 raw_spin_unlock_irqrestore(&rnp->lock, flags);
842 return; 842 return;
843 } 843 }
844 844
845 raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */ 845 raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */
846 846
847 847
848 /* Exclude any concurrent CPU-hotplug operations. */ 848 /* Exclude any concurrent CPU-hotplug operations. */
849 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ 849 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
850 850
851 /* 851 /*
852 * Set the quiescent-state-needed bits in all the rcu_node 852 * Set the quiescent-state-needed bits in all the rcu_node
853 * structures for all currently online CPUs in breadth-first 853 * structures for all currently online CPUs in breadth-first
854 * order, starting from the root rcu_node structure. This 854 * order, starting from the root rcu_node structure. This
855 * operation relies on the layout of the hierarchy within the 855 * operation relies on the layout of the hierarchy within the
856 * rsp->node[] array. Note that other CPUs will access only 856 * rsp->node[] array. Note that other CPUs will access only
857 * the leaves of the hierarchy, which still indicate that no 857 * the leaves of the hierarchy, which still indicate that no
858 * grace period is in progress, at least until the corresponding 858 * grace period is in progress, at least until the corresponding
859 * leaf node has been initialized. In addition, we have excluded 859 * leaf node has been initialized. In addition, we have excluded
860 * CPU-hotplug operations. 860 * CPU-hotplug operations.
861 * 861 *
862 * Note that the grace period cannot complete until we finish 862 * Note that the grace period cannot complete until we finish
863 * the initialization process, as there will be at least one 863 * the initialization process, as there will be at least one
864 * qsmask bit set in the root node until that time, namely the 864 * qsmask bit set in the root node until that time, namely the
865 * one corresponding to this CPU, due to the fact that we have 865 * one corresponding to this CPU, due to the fact that we have
866 * irqs disabled. 866 * irqs disabled.
867 */ 867 */
868 rcu_for_each_node_breadth_first(rsp, rnp) { 868 rcu_for_each_node_breadth_first(rsp, rnp) {
869 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 869 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
870 rcu_preempt_check_blocked_tasks(rnp); 870 rcu_preempt_check_blocked_tasks(rnp);
871 rnp->qsmask = rnp->qsmaskinit; 871 rnp->qsmask = rnp->qsmaskinit;
872 rnp->gpnum = rsp->gpnum; 872 rnp->gpnum = rsp->gpnum;
873 rnp->completed = rsp->completed; 873 rnp->completed = rsp->completed;
874 if (rnp == rdp->mynode) 874 if (rnp == rdp->mynode)
875 rcu_start_gp_per_cpu(rsp, rnp, rdp); 875 rcu_start_gp_per_cpu(rsp, rnp, rdp);
876 rcu_preempt_boost_start_gp(rnp); 876 rcu_preempt_boost_start_gp(rnp);
877 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 877 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
878 } 878 }
879 879
880 rnp = rcu_get_root(rsp); 880 rnp = rcu_get_root(rsp);
881 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 881 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
882 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ 882 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */
883 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 883 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
884 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 884 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
885 } 885 }
886 886
887 /* 887 /*
888 * Report a full set of quiescent states to the specified rcu_state 888 * Report a full set of quiescent states to the specified rcu_state
889 * data structure. This involves cleaning up after the prior grace 889 * data structure. This involves cleaning up after the prior grace
890 * period and letting rcu_start_gp() start up the next grace period 890 * period and letting rcu_start_gp() start up the next grace period
891 * if one is needed. Note that the caller must hold rnp->lock, as 891 * if one is needed. Note that the caller must hold rnp->lock, as
892 * required by rcu_start_gp(), which will release it. 892 * required by rcu_start_gp(), which will release it.
893 */ 893 */
894 static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) 894 static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
895 __releases(rcu_get_root(rsp)->lock) 895 __releases(rcu_get_root(rsp)->lock)
896 { 896 {
897 unsigned long gp_duration; 897 unsigned long gp_duration;
898 898
899 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 899 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
900 900
901 /* 901 /*
902 * Ensure that all grace-period and pre-grace-period activity 902 * Ensure that all grace-period and pre-grace-period activity
903 * is seen before the assignment to rsp->completed. 903 * is seen before the assignment to rsp->completed.
904 */ 904 */
905 smp_mb(); /* See above block comment. */ 905 smp_mb(); /* See above block comment. */
906 gp_duration = jiffies - rsp->gp_start; 906 gp_duration = jiffies - rsp->gp_start;
907 if (gp_duration > rsp->gp_max) 907 if (gp_duration > rsp->gp_max)
908 rsp->gp_max = gp_duration; 908 rsp->gp_max = gp_duration;
909 rsp->completed = rsp->gpnum; 909 rsp->completed = rsp->gpnum;
910 rsp->signaled = RCU_GP_IDLE; 910 rsp->signaled = RCU_GP_IDLE;
911 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ 911 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
912 } 912 }
913 913
914 /* 914 /*
915 * Similar to rcu_report_qs_rdp(), for which it is a helper function. 915 * Similar to rcu_report_qs_rdp(), for which it is a helper function.
916 * Allows quiescent states for a group of CPUs to be reported at one go 916 * Allows quiescent states for a group of CPUs to be reported at one go
917 * to the specified rcu_node structure, though all the CPUs in the group 917 * to the specified rcu_node structure, though all the CPUs in the group
918 * must be represented by the same rcu_node structure (which need not be 918 * must be represented by the same rcu_node structure (which need not be
919 * a leaf rcu_node structure, though it often will be). That structure's 919 * a leaf rcu_node structure, though it often will be). That structure's
920 * lock must be held upon entry, and it is released before return. 920 * lock must be held upon entry, and it is released before return.
921 */ 921 */
922 static void 922 static void
923 rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, 923 rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
924 struct rcu_node *rnp, unsigned long flags) 924 struct rcu_node *rnp, unsigned long flags)
925 __releases(rnp->lock) 925 __releases(rnp->lock)
926 { 926 {
927 struct rcu_node *rnp_c; 927 struct rcu_node *rnp_c;
928 928
929 /* Walk up the rcu_node hierarchy. */ 929 /* Walk up the rcu_node hierarchy. */
930 for (;;) { 930 for (;;) {
931 if (!(rnp->qsmask & mask)) { 931 if (!(rnp->qsmask & mask)) {
932 932
933 /* Our bit has already been cleared, so done. */ 933 /* Our bit has already been cleared, so done. */
934 raw_spin_unlock_irqrestore(&rnp->lock, flags); 934 raw_spin_unlock_irqrestore(&rnp->lock, flags);
935 return; 935 return;
936 } 936 }
937 rnp->qsmask &= ~mask; 937 rnp->qsmask &= ~mask;
938 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { 938 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
939 939
940 /* Other bits still set at this level, so done. */ 940 /* Other bits still set at this level, so done. */
941 raw_spin_unlock_irqrestore(&rnp->lock, flags); 941 raw_spin_unlock_irqrestore(&rnp->lock, flags);
942 return; 942 return;
943 } 943 }
944 mask = rnp->grpmask; 944 mask = rnp->grpmask;
945 if (rnp->parent == NULL) { 945 if (rnp->parent == NULL) {
946 946
947 /* No more levels. Exit loop holding root lock. */ 947 /* No more levels. Exit loop holding root lock. */
948 948
949 break; 949 break;
950 } 950 }
951 raw_spin_unlock_irqrestore(&rnp->lock, flags); 951 raw_spin_unlock_irqrestore(&rnp->lock, flags);
952 rnp_c = rnp; 952 rnp_c = rnp;
953 rnp = rnp->parent; 953 rnp = rnp->parent;
954 raw_spin_lock_irqsave(&rnp->lock, flags); 954 raw_spin_lock_irqsave(&rnp->lock, flags);
955 WARN_ON_ONCE(rnp_c->qsmask); 955 WARN_ON_ONCE(rnp_c->qsmask);
956 } 956 }
957 957
958 /* 958 /*
959 * Get here if we are the last CPU to pass through a quiescent 959 * Get here if we are the last CPU to pass through a quiescent
960 * state for this grace period. Invoke rcu_report_qs_rsp() 960 * state for this grace period. Invoke rcu_report_qs_rsp()
961 * to clean up and start the next grace period if one is needed. 961 * to clean up and start the next grace period if one is needed.
962 */ 962 */
963 rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */ 963 rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */
964 } 964 }
965 965
966 /* 966 /*
967 * Record a quiescent state for the specified CPU to that CPU's rcu_data 967 * Record a quiescent state for the specified CPU to that CPU's rcu_data
968 * structure. This must be either called from the specified CPU, or 968 * structure. This must be either called from the specified CPU, or
969 * called when the specified CPU is known to be offline (and when it is 969 * called when the specified CPU is known to be offline (and when it is
970 * also known that no other CPU is concurrently trying to help the offline 970 * also known that no other CPU is concurrently trying to help the offline
971 * CPU). The lastcomp argument is used to make sure we are still in the 971 * CPU). The lastcomp argument is used to make sure we are still in the
972 * grace period of interest. We don't want to end the current grace period 972 * grace period of interest. We don't want to end the current grace period
973 * based on quiescent states detected in an earlier grace period! 973 * based on quiescent states detected in an earlier grace period!
974 */ 974 */
975 static void 975 static void
976 rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) 976 rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp)
977 { 977 {
978 unsigned long flags; 978 unsigned long flags;
979 unsigned long mask; 979 unsigned long mask;
980 struct rcu_node *rnp; 980 struct rcu_node *rnp;
981 981
982 rnp = rdp->mynode; 982 rnp = rdp->mynode;
983 raw_spin_lock_irqsave(&rnp->lock, flags); 983 raw_spin_lock_irqsave(&rnp->lock, flags);
984 if (lastcomp != rnp->completed) { 984 if (lastcomp != rnp->completed) {
985 985
986 /* 986 /*
987 * Someone beat us to it for this grace period, so leave. 987 * Someone beat us to it for this grace period, so leave.
988 * The race with GP start is resolved by the fact that we 988 * The race with GP start is resolved by the fact that we
989 * hold the leaf rcu_node lock, so that the per-CPU bits 989 * hold the leaf rcu_node lock, so that the per-CPU bits
990 * cannot yet be initialized -- so we would simply find our 990 * cannot yet be initialized -- so we would simply find our
991 * CPU's bit already cleared in rcu_report_qs_rnp() if this 991 * CPU's bit already cleared in rcu_report_qs_rnp() if this
992 * race occurred. 992 * race occurred.
993 */ 993 */
994 rdp->passed_quiesc = 0; /* try again later! */ 994 rdp->passed_quiesc = 0; /* try again later! */
995 raw_spin_unlock_irqrestore(&rnp->lock, flags); 995 raw_spin_unlock_irqrestore(&rnp->lock, flags);
996 return; 996 return;
997 } 997 }
998 mask = rdp->grpmask; 998 mask = rdp->grpmask;
999 if ((rnp->qsmask & mask) == 0) { 999 if ((rnp->qsmask & mask) == 0) {
1000 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1000 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1001 } else { 1001 } else {
1002 rdp->qs_pending = 0; 1002 rdp->qs_pending = 0;
1003 1003
1004 /* 1004 /*
1005 * This GP can't end until cpu checks in, so all of our 1005 * This GP can't end until cpu checks in, so all of our
1006 * callbacks can be processed during the next GP. 1006 * callbacks can be processed during the next GP.
1007 */ 1007 */
1008 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 1008 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
1009 1009
1010 rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */ 1010 rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
1011 } 1011 }
1012 } 1012 }
1013 1013
1014 /* 1014 /*
1015 * Check to see if there is a new grace period of which this CPU 1015 * Check to see if there is a new grace period of which this CPU
1016 * is not yet aware, and if so, set up local rcu_data state for it. 1016 * is not yet aware, and if so, set up local rcu_data state for it.
1017 * Otherwise, see if this CPU has just passed through its first 1017 * Otherwise, see if this CPU has just passed through its first
1018 * quiescent state for this grace period, and record that fact if so. 1018 * quiescent state for this grace period, and record that fact if so.
1019 */ 1019 */
1020 static void 1020 static void
1021 rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) 1021 rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
1022 { 1022 {
1023 /* If there is now a new grace period, record and return. */ 1023 /* If there is now a new grace period, record and return. */
1024 if (check_for_new_grace_period(rsp, rdp)) 1024 if (check_for_new_grace_period(rsp, rdp))
1025 return; 1025 return;
1026 1026
1027 /* 1027 /*
1028 * Does this CPU still need to do its part for current grace period? 1028 * Does this CPU still need to do its part for current grace period?
1029 * If no, return and let the other CPUs do their part as well. 1029 * If no, return and let the other CPUs do their part as well.
1030 */ 1030 */
1031 if (!rdp->qs_pending) 1031 if (!rdp->qs_pending)
1032 return; 1032 return;
1033 1033
1034 /* 1034 /*
1035 * Was there a quiescent state since the beginning of the grace 1035 * Was there a quiescent state since the beginning of the grace
1036 * period? If no, then exit and wait for the next call. 1036 * period? If no, then exit and wait for the next call.
1037 */ 1037 */
1038 if (!rdp->passed_quiesc) 1038 if (!rdp->passed_quiesc)
1039 return; 1039 return;
1040 1040
1041 /* 1041 /*
1042 * Tell RCU we are done (but rcu_report_qs_rdp() will be the 1042 * Tell RCU we are done (but rcu_report_qs_rdp() will be the
1043 * judge of that). 1043 * judge of that).
1044 */ 1044 */
1045 rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed); 1045 rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed);
1046 } 1046 }
1047 1047
1048 #ifdef CONFIG_HOTPLUG_CPU 1048 #ifdef CONFIG_HOTPLUG_CPU
1049 1049
1050 /* 1050 /*
1051 * Move a dying CPU's RCU callbacks to online CPU's callback list. 1051 * Move a dying CPU's RCU callbacks to online CPU's callback list.
1052 * Synchronization is not required because this function executes 1052 * Synchronization is not required because this function executes
1053 * in stop_machine() context. 1053 * in stop_machine() context.
1054 */ 1054 */
1055 static void rcu_send_cbs_to_online(struct rcu_state *rsp) 1055 static void rcu_send_cbs_to_online(struct rcu_state *rsp)
1056 { 1056 {
1057 int i; 1057 int i;
1058 /* current DYING CPU is cleared in the cpu_online_mask */ 1058 /* current DYING CPU is cleared in the cpu_online_mask */
1059 int receive_cpu = cpumask_any(cpu_online_mask); 1059 int receive_cpu = cpumask_any(cpu_online_mask);
1060 struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 1060 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1061 struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); 1061 struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu);
1062 1062
1063 if (rdp->nxtlist == NULL) 1063 if (rdp->nxtlist == NULL)
1064 return; /* irqs disabled, so comparison is stable. */ 1064 return; /* irqs disabled, so comparison is stable. */
1065 1065
1066 *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; 1066 *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
1067 receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 1067 receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
1068 receive_rdp->qlen += rdp->qlen; 1068 receive_rdp->qlen += rdp->qlen;
1069 receive_rdp->n_cbs_adopted += rdp->qlen; 1069 receive_rdp->n_cbs_adopted += rdp->qlen;
1070 rdp->n_cbs_orphaned += rdp->qlen; 1070 rdp->n_cbs_orphaned += rdp->qlen;
1071 1071
1072 rdp->nxtlist = NULL; 1072 rdp->nxtlist = NULL;
1073 for (i = 0; i < RCU_NEXT_SIZE; i++) 1073 for (i = 0; i < RCU_NEXT_SIZE; i++)
1074 rdp->nxttail[i] = &rdp->nxtlist; 1074 rdp->nxttail[i] = &rdp->nxtlist;
1075 rdp->qlen = 0; 1075 rdp->qlen = 0;
1076 } 1076 }
1077 1077
1078 /* 1078 /*
1079 * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy 1079 * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy
1080 * and move all callbacks from the outgoing CPU to the current one. 1080 * and move all callbacks from the outgoing CPU to the current one.
1081 * There can only be one CPU hotplug operation at a time, so no other 1081 * There can only be one CPU hotplug operation at a time, so no other
1082 * CPU can be attempting to update rcu_cpu_kthread_task. 1082 * CPU can be attempting to update rcu_cpu_kthread_task.
1083 */ 1083 */
1084 static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) 1084 static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1085 { 1085 {
1086 unsigned long flags; 1086 unsigned long flags;
1087 unsigned long mask; 1087 unsigned long mask;
1088 int need_report = 0; 1088 int need_report = 0;
1089 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 1089 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1090 struct rcu_node *rnp; 1090 struct rcu_node *rnp;
1091 struct task_struct *t; 1091 struct task_struct *t;
1092 1092
1093 /* Stop the CPU's kthread. */ 1093 /* Stop the CPU's kthread. */
1094 t = per_cpu(rcu_cpu_kthread_task, cpu); 1094 t = per_cpu(rcu_cpu_kthread_task, cpu);
1095 if (t != NULL) { 1095 if (t != NULL) {
1096 per_cpu(rcu_cpu_kthread_task, cpu) = NULL; 1096 per_cpu(rcu_cpu_kthread_task, cpu) = NULL;
1097 kthread_stop(t); 1097 kthread_stop(t);
1098 } 1098 }
1099 1099
1100 /* Exclude any attempts to start a new grace period. */ 1100 /* Exclude any attempts to start a new grace period. */
1101 raw_spin_lock_irqsave(&rsp->onofflock, flags); 1101 raw_spin_lock_irqsave(&rsp->onofflock, flags);
1102 1102
1103 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ 1103 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
1104 rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */ 1104 rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */
1105 mask = rdp->grpmask; /* rnp->grplo is constant. */ 1105 mask = rdp->grpmask; /* rnp->grplo is constant. */
1106 do { 1106 do {
1107 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 1107 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
1108 rnp->qsmaskinit &= ~mask; 1108 rnp->qsmaskinit &= ~mask;
1109 if (rnp->qsmaskinit != 0) { 1109 if (rnp->qsmaskinit != 0) {
1110 if (rnp != rdp->mynode) 1110 if (rnp != rdp->mynode)
1111 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1111 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1112 break; 1112 break;
1113 } 1113 }
1114 if (rnp == rdp->mynode) 1114 if (rnp == rdp->mynode)
1115 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); 1115 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
1116 else 1116 else
1117 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1117 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1118 mask = rnp->grpmask; 1118 mask = rnp->grpmask;
1119 rnp = rnp->parent; 1119 rnp = rnp->parent;
1120 } while (rnp != NULL); 1120 } while (rnp != NULL);
1121 1121
1122 /* 1122 /*
1123 * We still hold the leaf rcu_node structure lock here, and 1123 * We still hold the leaf rcu_node structure lock here, and
1124 * irqs are still disabled. The reason for this subterfuge is 1124 * irqs are still disabled. The reason for this subterfuge is
1125 * because invoking rcu_report_unblock_qs_rnp() with ->onofflock 1125 * because invoking rcu_report_unblock_qs_rnp() with ->onofflock
1126 * held leads to deadlock. 1126 * held leads to deadlock.
1127 */ 1127 */
1128 raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ 1128 raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
1129 rnp = rdp->mynode; 1129 rnp = rdp->mynode;
1130 if (need_report & RCU_OFL_TASKS_NORM_GP) 1130 if (need_report & RCU_OFL_TASKS_NORM_GP)
1131 rcu_report_unblock_qs_rnp(rnp, flags); 1131 rcu_report_unblock_qs_rnp(rnp, flags);
1132 else 1132 else
1133 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1133 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1134 if (need_report & RCU_OFL_TASKS_EXP_GP) 1134 if (need_report & RCU_OFL_TASKS_EXP_GP)
1135 rcu_report_exp_rnp(rsp, rnp); 1135 rcu_report_exp_rnp(rsp, rnp);
1136 1136 rcu_node_kthread_setaffinity(rnp, -1);
1137 /*
1138 * If there are no more online CPUs for this rcu_node structure,
1139 * kill the rcu_node structure's kthread. Otherwise, adjust its
1140 * affinity.
1141 */
1142 t = rnp->node_kthread_task;
1143 if (t != NULL &&
1144 rnp->qsmaskinit == 0) {
1145 raw_spin_lock_irqsave(&rnp->lock, flags);
1146 rnp->node_kthread_task = NULL;
1147 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1148 kthread_stop(t);
1149 rcu_stop_boost_kthread(rnp);
1150 } else
1151 rcu_node_kthread_setaffinity(rnp, -1);
1152 } 1137 }
1153 1138
1154 /* 1139 /*
1155 * Remove the specified CPU from the RCU hierarchy and move any pending 1140 * Remove the specified CPU from the RCU hierarchy and move any pending
1156 * callbacks that it might have to the current CPU. This code assumes 1141 * callbacks that it might have to the current CPU. This code assumes
1157 * that at least one CPU in the system will remain running at all times. 1142 * that at least one CPU in the system will remain running at all times.
1158 * Any attempt to offline -all- CPUs is likely to strand RCU callbacks. 1143 * Any attempt to offline -all- CPUs is likely to strand RCU callbacks.
1159 */ 1144 */
1160 static void rcu_offline_cpu(int cpu) 1145 static void rcu_offline_cpu(int cpu)
1161 { 1146 {
1162 __rcu_offline_cpu(cpu, &rcu_sched_state); 1147 __rcu_offline_cpu(cpu, &rcu_sched_state);
1163 __rcu_offline_cpu(cpu, &rcu_bh_state); 1148 __rcu_offline_cpu(cpu, &rcu_bh_state);
1164 rcu_preempt_offline_cpu(cpu); 1149 rcu_preempt_offline_cpu(cpu);
1165 } 1150 }
1166 1151
1167 #else /* #ifdef CONFIG_HOTPLUG_CPU */ 1152 #else /* #ifdef CONFIG_HOTPLUG_CPU */
1168 1153
1169 static void rcu_send_cbs_to_online(struct rcu_state *rsp) 1154 static void rcu_send_cbs_to_online(struct rcu_state *rsp)
1170 { 1155 {
1171 } 1156 }
1172 1157
1173 static void rcu_offline_cpu(int cpu) 1158 static void rcu_offline_cpu(int cpu)
1174 { 1159 {
1175 } 1160 }
1176 1161
1177 #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ 1162 #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
1178 1163
1179 /* 1164 /*
1180 * Invoke any RCU callbacks that have made it to the end of their grace 1165 * Invoke any RCU callbacks that have made it to the end of their grace
1181 * period. Thottle as specified by rdp->blimit. 1166 * period. Thottle as specified by rdp->blimit.
1182 */ 1167 */
1183 static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) 1168 static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1184 { 1169 {
1185 unsigned long flags; 1170 unsigned long flags;
1186 struct rcu_head *next, *list, **tail; 1171 struct rcu_head *next, *list, **tail;
1187 int count; 1172 int count;
1188 1173
1189 /* If no callbacks are ready, just return.*/ 1174 /* If no callbacks are ready, just return.*/
1190 if (!cpu_has_callbacks_ready_to_invoke(rdp)) 1175 if (!cpu_has_callbacks_ready_to_invoke(rdp))
1191 return; 1176 return;
1192 1177
1193 /* 1178 /*
1194 * Extract the list of ready callbacks, disabling to prevent 1179 * Extract the list of ready callbacks, disabling to prevent
1195 * races with call_rcu() from interrupt handlers. 1180 * races with call_rcu() from interrupt handlers.
1196 */ 1181 */
1197 local_irq_save(flags); 1182 local_irq_save(flags);
1198 list = rdp->nxtlist; 1183 list = rdp->nxtlist;
1199 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; 1184 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
1200 *rdp->nxttail[RCU_DONE_TAIL] = NULL; 1185 *rdp->nxttail[RCU_DONE_TAIL] = NULL;
1201 tail = rdp->nxttail[RCU_DONE_TAIL]; 1186 tail = rdp->nxttail[RCU_DONE_TAIL];
1202 for (count = RCU_NEXT_SIZE - 1; count >= 0; count--) 1187 for (count = RCU_NEXT_SIZE - 1; count >= 0; count--)
1203 if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL]) 1188 if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL])
1204 rdp->nxttail[count] = &rdp->nxtlist; 1189 rdp->nxttail[count] = &rdp->nxtlist;
1205 local_irq_restore(flags); 1190 local_irq_restore(flags);
1206 1191
1207 /* Invoke callbacks. */ 1192 /* Invoke callbacks. */
1208 count = 0; 1193 count = 0;
1209 while (list) { 1194 while (list) {
1210 next = list->next; 1195 next = list->next;
1211 prefetch(next); 1196 prefetch(next);
1212 debug_rcu_head_unqueue(list); 1197 debug_rcu_head_unqueue(list);
1213 __rcu_reclaim(list); 1198 __rcu_reclaim(list);
1214 list = next; 1199 list = next;
1215 if (++count >= rdp->blimit) 1200 if (++count >= rdp->blimit)
1216 break; 1201 break;
1217 } 1202 }
1218 1203
1219 local_irq_save(flags); 1204 local_irq_save(flags);
1220 1205
1221 /* Update count, and requeue any remaining callbacks. */ 1206 /* Update count, and requeue any remaining callbacks. */
1222 rdp->qlen -= count; 1207 rdp->qlen -= count;
1223 rdp->n_cbs_invoked += count; 1208 rdp->n_cbs_invoked += count;
1224 if (list != NULL) { 1209 if (list != NULL) {
1225 *tail = rdp->nxtlist; 1210 *tail = rdp->nxtlist;
1226 rdp->nxtlist = list; 1211 rdp->nxtlist = list;
1227 for (count = 0; count < RCU_NEXT_SIZE; count++) 1212 for (count = 0; count < RCU_NEXT_SIZE; count++)
1228 if (&rdp->nxtlist == rdp->nxttail[count]) 1213 if (&rdp->nxtlist == rdp->nxttail[count])
1229 rdp->nxttail[count] = tail; 1214 rdp->nxttail[count] = tail;
1230 else 1215 else
1231 break; 1216 break;
1232 } 1217 }
1233 1218
1234 /* Reinstate batch limit if we have worked down the excess. */ 1219 /* Reinstate batch limit if we have worked down the excess. */
1235 if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) 1220 if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark)
1236 rdp->blimit = blimit; 1221 rdp->blimit = blimit;
1237 1222
1238 /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ 1223 /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
1239 if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) { 1224 if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) {
1240 rdp->qlen_last_fqs_check = 0; 1225 rdp->qlen_last_fqs_check = 0;
1241 rdp->n_force_qs_snap = rsp->n_force_qs; 1226 rdp->n_force_qs_snap = rsp->n_force_qs;
1242 } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) 1227 } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
1243 rdp->qlen_last_fqs_check = rdp->qlen; 1228 rdp->qlen_last_fqs_check = rdp->qlen;
1244 1229
1245 local_irq_restore(flags); 1230 local_irq_restore(flags);
1246 1231
1247 /* Re-raise the RCU softirq if there are callbacks remaining. */ 1232 /* Re-raise the RCU softirq if there are callbacks remaining. */
1248 if (cpu_has_callbacks_ready_to_invoke(rdp)) 1233 if (cpu_has_callbacks_ready_to_invoke(rdp))
1249 invoke_rcu_cpu_kthread(); 1234 invoke_rcu_cpu_kthread();
1250 } 1235 }
1251 1236
1252 /* 1237 /*
1253 * Check to see if this CPU is in a non-context-switch quiescent state 1238 * Check to see if this CPU is in a non-context-switch quiescent state
1254 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). 1239 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
1255 * Also schedule the RCU softirq handler. 1240 * Also schedule the RCU softirq handler.
1256 * 1241 *
1257 * This function must be called with hardirqs disabled. It is normally 1242 * This function must be called with hardirqs disabled. It is normally
1258 * invoked from the scheduling-clock interrupt. If rcu_pending returns 1243 * invoked from the scheduling-clock interrupt. If rcu_pending returns
1259 * false, there is no point in invoking rcu_check_callbacks(). 1244 * false, there is no point in invoking rcu_check_callbacks().
1260 */ 1245 */
1261 void rcu_check_callbacks(int cpu, int user) 1246 void rcu_check_callbacks(int cpu, int user)
1262 { 1247 {
1263 if (user || 1248 if (user ||
1264 (idle_cpu(cpu) && rcu_scheduler_active && 1249 (idle_cpu(cpu) && rcu_scheduler_active &&
1265 !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { 1250 !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
1266 1251
1267 /* 1252 /*
1268 * Get here if this CPU took its interrupt from user 1253 * Get here if this CPU took its interrupt from user
1269 * mode or from the idle loop, and if this is not a 1254 * mode or from the idle loop, and if this is not a
1270 * nested interrupt. In this case, the CPU is in 1255 * nested interrupt. In this case, the CPU is in
1271 * a quiescent state, so note it. 1256 * a quiescent state, so note it.
1272 * 1257 *
1273 * No memory barrier is required here because both 1258 * No memory barrier is required here because both
1274 * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local 1259 * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local
1275 * variables that other CPUs neither access nor modify, 1260 * variables that other CPUs neither access nor modify,
1276 * at least not while the corresponding CPU is online. 1261 * at least not while the corresponding CPU is online.
1277 */ 1262 */
1278 1263
1279 rcu_sched_qs(cpu); 1264 rcu_sched_qs(cpu);
1280 rcu_bh_qs(cpu); 1265 rcu_bh_qs(cpu);
1281 1266
1282 } else if (!in_softirq()) { 1267 } else if (!in_softirq()) {
1283 1268
1284 /* 1269 /*
1285 * Get here if this CPU did not take its interrupt from 1270 * Get here if this CPU did not take its interrupt from
1286 * softirq, in other words, if it is not interrupting 1271 * softirq, in other words, if it is not interrupting
1287 * a rcu_bh read-side critical section. This is an _bh 1272 * a rcu_bh read-side critical section. This is an _bh
1288 * critical section, so note it. 1273 * critical section, so note it.
1289 */ 1274 */
1290 1275
1291 rcu_bh_qs(cpu); 1276 rcu_bh_qs(cpu);
1292 } 1277 }
1293 rcu_preempt_check_callbacks(cpu); 1278 rcu_preempt_check_callbacks(cpu);
1294 if (rcu_pending(cpu)) 1279 if (rcu_pending(cpu))
1295 invoke_rcu_cpu_kthread(); 1280 invoke_rcu_cpu_kthread();
1296 } 1281 }
1297 1282
1298 #ifdef CONFIG_SMP 1283 #ifdef CONFIG_SMP
1299 1284
1300 /* 1285 /*
1301 * Scan the leaf rcu_node structures, processing dyntick state for any that 1286 * Scan the leaf rcu_node structures, processing dyntick state for any that
1302 * have not yet encountered a quiescent state, using the function specified. 1287 * have not yet encountered a quiescent state, using the function specified.
1303 * Also initiate boosting for any threads blocked on the root rcu_node. 1288 * Also initiate boosting for any threads blocked on the root rcu_node.
1304 * 1289 *
1305 * The caller must have suppressed start of new grace periods. 1290 * The caller must have suppressed start of new grace periods.
1306 */ 1291 */
1307 static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) 1292 static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
1308 { 1293 {
1309 unsigned long bit; 1294 unsigned long bit;
1310 int cpu; 1295 int cpu;
1311 unsigned long flags; 1296 unsigned long flags;
1312 unsigned long mask; 1297 unsigned long mask;
1313 struct rcu_node *rnp; 1298 struct rcu_node *rnp;
1314 1299
1315 rcu_for_each_leaf_node(rsp, rnp) { 1300 rcu_for_each_leaf_node(rsp, rnp) {
1316 mask = 0; 1301 mask = 0;
1317 raw_spin_lock_irqsave(&rnp->lock, flags); 1302 raw_spin_lock_irqsave(&rnp->lock, flags);
1318 if (!rcu_gp_in_progress(rsp)) { 1303 if (!rcu_gp_in_progress(rsp)) {
1319 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1304 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1320 return; 1305 return;
1321 } 1306 }
1322 if (rnp->qsmask == 0) { 1307 if (rnp->qsmask == 0) {
1323 rcu_initiate_boost(rnp); 1308 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
1324 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1325 continue; 1309 continue;
1326 } 1310 }
1327 cpu = rnp->grplo; 1311 cpu = rnp->grplo;
1328 bit = 1; 1312 bit = 1;
1329 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { 1313 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
1330 if ((rnp->qsmask & bit) != 0 && 1314 if ((rnp->qsmask & bit) != 0 &&
1331 f(per_cpu_ptr(rsp->rda, cpu))) 1315 f(per_cpu_ptr(rsp->rda, cpu)))
1332 mask |= bit; 1316 mask |= bit;
1333 } 1317 }
1334 if (mask != 0) { 1318 if (mask != 0) {
1335 1319
1336 /* rcu_report_qs_rnp() releases rnp->lock. */ 1320 /* rcu_report_qs_rnp() releases rnp->lock. */
1337 rcu_report_qs_rnp(mask, rsp, rnp, flags); 1321 rcu_report_qs_rnp(mask, rsp, rnp, flags);
1338 continue; 1322 continue;
1339 } 1323 }
1340 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1324 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1341 } 1325 }
1342 rnp = rcu_get_root(rsp); 1326 rnp = rcu_get_root(rsp);
1343 raw_spin_lock_irqsave(&rnp->lock, flags); 1327 if (rnp->qsmask == 0) {
1344 if (rnp->qsmask == 0) 1328 raw_spin_lock_irqsave(&rnp->lock, flags);
1345 rcu_initiate_boost(rnp); 1329 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
1346 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1330 }
1347 } 1331 }
1348 1332
1349 /* 1333 /*
1350 * Force quiescent states on reluctant CPUs, and also detect which 1334 * Force quiescent states on reluctant CPUs, and also detect which
1351 * CPUs are in dyntick-idle mode. 1335 * CPUs are in dyntick-idle mode.
1352 */ 1336 */
1353 static void force_quiescent_state(struct rcu_state *rsp, int relaxed) 1337 static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1354 { 1338 {
1355 unsigned long flags; 1339 unsigned long flags;
1356 struct rcu_node *rnp = rcu_get_root(rsp); 1340 struct rcu_node *rnp = rcu_get_root(rsp);
1357 1341
1358 if (!rcu_gp_in_progress(rsp)) 1342 if (!rcu_gp_in_progress(rsp))
1359 return; /* No grace period in progress, nothing to force. */ 1343 return; /* No grace period in progress, nothing to force. */
1360 if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) { 1344 if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) {
1361 rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ 1345 rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */
1362 return; /* Someone else is already on the job. */ 1346 return; /* Someone else is already on the job. */
1363 } 1347 }
1364 if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) 1348 if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies))
1365 goto unlock_fqs_ret; /* no emergency and done recently. */ 1349 goto unlock_fqs_ret; /* no emergency and done recently. */
1366 rsp->n_force_qs++; 1350 rsp->n_force_qs++;
1367 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 1351 raw_spin_lock(&rnp->lock); /* irqs already disabled */
1368 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 1352 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
1369 if(!rcu_gp_in_progress(rsp)) { 1353 if(!rcu_gp_in_progress(rsp)) {
1370 rsp->n_force_qs_ngp++; 1354 rsp->n_force_qs_ngp++;
1371 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 1355 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
1372 goto unlock_fqs_ret; /* no GP in progress, time updated. */ 1356 goto unlock_fqs_ret; /* no GP in progress, time updated. */
1373 } 1357 }
1374 rsp->fqs_active = 1; 1358 rsp->fqs_active = 1;
1375 switch (rsp->signaled) { 1359 switch (rsp->signaled) {
1376 case RCU_GP_IDLE: 1360 case RCU_GP_IDLE:
1377 case RCU_GP_INIT: 1361 case RCU_GP_INIT:
1378 1362
1379 break; /* grace period idle or initializing, ignore. */ 1363 break; /* grace period idle or initializing, ignore. */
1380 1364
1381 case RCU_SAVE_DYNTICK: 1365 case RCU_SAVE_DYNTICK:
1382 if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) 1366 if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK)
1383 break; /* So gcc recognizes the dead code. */ 1367 break; /* So gcc recognizes the dead code. */
1384 1368
1385 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 1369 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
1386 1370
1387 /* Record dyntick-idle state. */ 1371 /* Record dyntick-idle state. */
1388 force_qs_rnp(rsp, dyntick_save_progress_counter); 1372 force_qs_rnp(rsp, dyntick_save_progress_counter);
1389 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 1373 raw_spin_lock(&rnp->lock); /* irqs already disabled */
1390 if (rcu_gp_in_progress(rsp)) 1374 if (rcu_gp_in_progress(rsp))
1391 rsp->signaled = RCU_FORCE_QS; 1375 rsp->signaled = RCU_FORCE_QS;
1392 break; 1376 break;
1393 1377
1394 case RCU_FORCE_QS: 1378 case RCU_FORCE_QS:
1395 1379
1396 /* Check dyntick-idle state, send IPI to laggarts. */ 1380 /* Check dyntick-idle state, send IPI to laggarts. */
1397 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 1381 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
1398 force_qs_rnp(rsp, rcu_implicit_dynticks_qs); 1382 force_qs_rnp(rsp, rcu_implicit_dynticks_qs);
1399 1383
1400 /* Leave state in case more forcing is required. */ 1384 /* Leave state in case more forcing is required. */
1401 1385
1402 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 1386 raw_spin_lock(&rnp->lock); /* irqs already disabled */
1403 break; 1387 break;
1404 } 1388 }
1405 rsp->fqs_active = 0; 1389 rsp->fqs_active = 0;
1406 if (rsp->fqs_need_gp) { 1390 if (rsp->fqs_need_gp) {
1407 raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */ 1391 raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */
1408 rsp->fqs_need_gp = 0; 1392 rsp->fqs_need_gp = 0;
1409 rcu_start_gp(rsp, flags); /* releases rnp->lock */ 1393 rcu_start_gp(rsp, flags); /* releases rnp->lock */
1410 return; 1394 return;
1411 } 1395 }
1412 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 1396 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
1413 unlock_fqs_ret: 1397 unlock_fqs_ret:
1414 raw_spin_unlock_irqrestore(&rsp->fqslock, flags); 1398 raw_spin_unlock_irqrestore(&rsp->fqslock, flags);
1415 } 1399 }
1416 1400
1417 #else /* #ifdef CONFIG_SMP */ 1401 #else /* #ifdef CONFIG_SMP */
1418 1402
1419 static void force_quiescent_state(struct rcu_state *rsp, int relaxed) 1403 static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1420 { 1404 {
1421 set_need_resched(); 1405 set_need_resched();
1422 } 1406 }
1423 1407
1424 #endif /* #else #ifdef CONFIG_SMP */ 1408 #endif /* #else #ifdef CONFIG_SMP */
1425 1409
1426 /* 1410 /*
1427 * This does the RCU processing work from softirq context for the 1411 * This does the RCU processing work from softirq context for the
1428 * specified rcu_state and rcu_data structures. This may be called 1412 * specified rcu_state and rcu_data structures. This may be called
1429 * only from the CPU to whom the rdp belongs. 1413 * only from the CPU to whom the rdp belongs.
1430 */ 1414 */
1431 static void 1415 static void
1432 __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) 1416 __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1433 { 1417 {
1434 unsigned long flags; 1418 unsigned long flags;
1435 1419
1436 WARN_ON_ONCE(rdp->beenonline == 0); 1420 WARN_ON_ONCE(rdp->beenonline == 0);
1437 1421
1438 /* 1422 /*
1439 * If an RCU GP has gone long enough, go check for dyntick 1423 * If an RCU GP has gone long enough, go check for dyntick
1440 * idle CPUs and, if needed, send resched IPIs. 1424 * idle CPUs and, if needed, send resched IPIs.
1441 */ 1425 */
1442 if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) 1426 if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
1443 force_quiescent_state(rsp, 1); 1427 force_quiescent_state(rsp, 1);
1444 1428
1445 /* 1429 /*
1446 * Advance callbacks in response to end of earlier grace 1430 * Advance callbacks in response to end of earlier grace
1447 * period that some other CPU ended. 1431 * period that some other CPU ended.
1448 */ 1432 */
1449 rcu_process_gp_end(rsp, rdp); 1433 rcu_process_gp_end(rsp, rdp);
1450 1434
1451 /* Update RCU state based on any recent quiescent states. */ 1435 /* Update RCU state based on any recent quiescent states. */
1452 rcu_check_quiescent_state(rsp, rdp); 1436 rcu_check_quiescent_state(rsp, rdp);
1453 1437
1454 /* Does this CPU require a not-yet-started grace period? */ 1438 /* Does this CPU require a not-yet-started grace period? */
1455 if (cpu_needs_another_gp(rsp, rdp)) { 1439 if (cpu_needs_another_gp(rsp, rdp)) {
1456 raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags); 1440 raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags);
1457 rcu_start_gp(rsp, flags); /* releases above lock */ 1441 rcu_start_gp(rsp, flags); /* releases above lock */
1458 } 1442 }
1459 1443
1460 /* If there are callbacks ready, invoke them. */ 1444 /* If there are callbacks ready, invoke them. */
1461 rcu_do_batch(rsp, rdp); 1445 rcu_do_batch(rsp, rdp);
1462 } 1446 }
1463 1447
1464 /* 1448 /*
1465 * Do softirq processing for the current CPU. 1449 * Do softirq processing for the current CPU.
1466 */ 1450 */
1467 static void rcu_process_callbacks(void) 1451 static void rcu_process_callbacks(void)
1468 { 1452 {
1469 __rcu_process_callbacks(&rcu_sched_state, 1453 __rcu_process_callbacks(&rcu_sched_state,
1470 &__get_cpu_var(rcu_sched_data)); 1454 &__get_cpu_var(rcu_sched_data));
1471 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); 1455 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
1472 rcu_preempt_process_callbacks(); 1456 rcu_preempt_process_callbacks();
1473 1457
1474 /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ 1458 /* If we are last CPU on way to dyntick-idle mode, accelerate it. */
1475 rcu_needs_cpu_flush(); 1459 rcu_needs_cpu_flush();
1476 } 1460 }
1477 1461
1478 /* 1462 /*
1479 * Wake up the current CPU's kthread. This replaces raise_softirq() 1463 * Wake up the current CPU's kthread. This replaces raise_softirq()
1480 * in earlier versions of RCU. Note that because we are running on 1464 * in earlier versions of RCU. Note that because we are running on
1481 * the current CPU with interrupts disabled, the rcu_cpu_kthread_task 1465 * the current CPU with interrupts disabled, the rcu_cpu_kthread_task
1482 * cannot disappear out from under us. 1466 * cannot disappear out from under us.
1483 */ 1467 */
1484 static void invoke_rcu_cpu_kthread(void) 1468 static void invoke_rcu_cpu_kthread(void)
1485 { 1469 {
1486 unsigned long flags; 1470 unsigned long flags;
1487 1471
1488 local_irq_save(flags); 1472 local_irq_save(flags);
1489 __this_cpu_write(rcu_cpu_has_work, 1); 1473 __this_cpu_write(rcu_cpu_has_work, 1);
1490 if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) { 1474 if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) {
1491 local_irq_restore(flags); 1475 local_irq_restore(flags);
1492 return; 1476 return;
1493 } 1477 }
1494 wake_up(&__get_cpu_var(rcu_cpu_wq)); 1478 wake_up(&__get_cpu_var(rcu_cpu_wq));
1495 local_irq_restore(flags); 1479 local_irq_restore(flags);
1496 } 1480 }
1497 1481
1498 /* 1482 /*
1499 * Wake up the specified per-rcu_node-structure kthread. 1483 * Wake up the specified per-rcu_node-structure kthread.
1500 * The caller must hold ->lock. 1484 * Because the per-rcu_node kthreads are immortal, we don't need
1485 * to do anything to keep them alive.
1501 */ 1486 */
1502 static void invoke_rcu_node_kthread(struct rcu_node *rnp) 1487 static void invoke_rcu_node_kthread(struct rcu_node *rnp)
1503 { 1488 {
1504 struct task_struct *t; 1489 struct task_struct *t;
1505 1490
1506 t = rnp->node_kthread_task; 1491 t = rnp->node_kthread_task;
1507 if (t != NULL) 1492 if (t != NULL)
1508 wake_up_process(t); 1493 wake_up_process(t);
1509 } 1494 }
1510 1495
1511 /* 1496 /*
1512 * Set the specified CPU's kthread to run RT or not, as specified by 1497 * Set the specified CPU's kthread to run RT or not, as specified by
1513 * the to_rt argument. The CPU-hotplug locks are held, so the task 1498 * the to_rt argument. The CPU-hotplug locks are held, so the task
1514 * is not going away. 1499 * is not going away.
1515 */ 1500 */
1516 static void rcu_cpu_kthread_setrt(int cpu, int to_rt) 1501 static void rcu_cpu_kthread_setrt(int cpu, int to_rt)
1517 { 1502 {
1518 int policy; 1503 int policy;
1519 struct sched_param sp; 1504 struct sched_param sp;
1520 struct task_struct *t; 1505 struct task_struct *t;
1521 1506
1522 t = per_cpu(rcu_cpu_kthread_task, cpu); 1507 t = per_cpu(rcu_cpu_kthread_task, cpu);
1523 if (t == NULL) 1508 if (t == NULL)
1524 return; 1509 return;
1525 if (to_rt) { 1510 if (to_rt) {
1526 policy = SCHED_FIFO; 1511 policy = SCHED_FIFO;
1527 sp.sched_priority = RCU_KTHREAD_PRIO; 1512 sp.sched_priority = RCU_KTHREAD_PRIO;
1528 } else { 1513 } else {
1529 policy = SCHED_NORMAL; 1514 policy = SCHED_NORMAL;
1530 sp.sched_priority = 0; 1515 sp.sched_priority = 0;
1531 } 1516 }
1532 sched_setscheduler_nocheck(t, policy, &sp); 1517 sched_setscheduler_nocheck(t, policy, &sp);
1533 } 1518 }
1534 1519
1535 /* 1520 /*
1536 * Timer handler to initiate the waking up of per-CPU kthreads that 1521 * Timer handler to initiate the waking up of per-CPU kthreads that
1537 * have yielded the CPU due to excess numbers of RCU callbacks. 1522 * have yielded the CPU due to excess numbers of RCU callbacks.
1538 * We wake up the per-rcu_node kthread, which in turn will wake up 1523 * We wake up the per-rcu_node kthread, which in turn will wake up
1539 * the booster kthread. 1524 * the booster kthread.
1540 */ 1525 */
1541 static void rcu_cpu_kthread_timer(unsigned long arg) 1526 static void rcu_cpu_kthread_timer(unsigned long arg)
1542 { 1527 {
1543 unsigned long flags; 1528 unsigned long flags;
1544 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); 1529 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg);
1545 struct rcu_node *rnp = rdp->mynode; 1530 struct rcu_node *rnp = rdp->mynode;
1546 1531
1547 raw_spin_lock_irqsave(&rnp->lock, flags); 1532 raw_spin_lock_irqsave(&rnp->lock, flags);
1548 rnp->wakemask |= rdp->grpmask; 1533 rnp->wakemask |= rdp->grpmask;
1549 invoke_rcu_node_kthread(rnp);
1550 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1534 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1535 invoke_rcu_node_kthread(rnp);
1551 } 1536 }
1552 1537
1553 /* 1538 /*
1554 * Drop to non-real-time priority and yield, but only after posting a 1539 * Drop to non-real-time priority and yield, but only after posting a
1555 * timer that will cause us to regain our real-time priority if we 1540 * timer that will cause us to regain our real-time priority if we
1556 * remain preempted. Either way, we restore our real-time priority 1541 * remain preempted. Either way, we restore our real-time priority
1557 * before returning. 1542 * before returning.
1558 */ 1543 */
1559 static void rcu_yield(void (*f)(unsigned long), unsigned long arg) 1544 static void rcu_yield(void (*f)(unsigned long), unsigned long arg)
1560 { 1545 {
1561 struct sched_param sp; 1546 struct sched_param sp;
1562 struct timer_list yield_timer; 1547 struct timer_list yield_timer;
1563 1548
1564 setup_timer_on_stack(&yield_timer, f, arg); 1549 setup_timer_on_stack(&yield_timer, f, arg);
1565 mod_timer(&yield_timer, jiffies + 2); 1550 mod_timer(&yield_timer, jiffies + 2);
1566 sp.sched_priority = 0; 1551 sp.sched_priority = 0;
1567 sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); 1552 sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp);
1568 set_user_nice(current, 19); 1553 set_user_nice(current, 19);
1569 schedule(); 1554 schedule();
1570 sp.sched_priority = RCU_KTHREAD_PRIO; 1555 sp.sched_priority = RCU_KTHREAD_PRIO;
1571 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1556 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1572 del_timer(&yield_timer); 1557 del_timer(&yield_timer);
1573 } 1558 }
1574 1559
1575 /* 1560 /*
1576 * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU. 1561 * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU.
1577 * This can happen while the corresponding CPU is either coming online 1562 * This can happen while the corresponding CPU is either coming online
1578 * or going offline. We cannot wait until the CPU is fully online 1563 * or going offline. We cannot wait until the CPU is fully online
1579 * before starting the kthread, because the various notifier functions 1564 * before starting the kthread, because the various notifier functions
1580 * can wait for RCU grace periods. So we park rcu_cpu_kthread() until 1565 * can wait for RCU grace periods. So we park rcu_cpu_kthread() until
1581 * the corresponding CPU is online. 1566 * the corresponding CPU is online.
1582 * 1567 *
1583 * Return 1 if the kthread needs to stop, 0 otherwise. 1568 * Return 1 if the kthread needs to stop, 0 otherwise.
1584 * 1569 *
1585 * Caller must disable bh. This function can momentarily enable it. 1570 * Caller must disable bh. This function can momentarily enable it.
1586 */ 1571 */
1587 static int rcu_cpu_kthread_should_stop(int cpu) 1572 static int rcu_cpu_kthread_should_stop(int cpu)
1588 { 1573 {
1589 while (cpu_is_offline(cpu) || 1574 while (cpu_is_offline(cpu) ||
1590 !cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)) || 1575 !cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)) ||
1591 smp_processor_id() != cpu) { 1576 smp_processor_id() != cpu) {
1592 if (kthread_should_stop()) 1577 if (kthread_should_stop())
1593 return 1; 1578 return 1;
1594 per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; 1579 per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
1595 per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id(); 1580 per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id();
1596 local_bh_enable(); 1581 local_bh_enable();
1597 schedule_timeout_uninterruptible(1); 1582 schedule_timeout_uninterruptible(1);
1598 if (!cpumask_equal(&current->cpus_allowed, cpumask_of(cpu))) 1583 if (!cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)))
1599 set_cpus_allowed_ptr(current, cpumask_of(cpu)); 1584 set_cpus_allowed_ptr(current, cpumask_of(cpu));
1600 local_bh_disable(); 1585 local_bh_disable();
1601 } 1586 }
1602 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; 1587 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
1603 return 0; 1588 return 0;
1604 } 1589 }
1605 1590
1606 /* 1591 /*
1607 * Per-CPU kernel thread that invokes RCU callbacks. This replaces the 1592 * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
1608 * earlier RCU softirq. 1593 * earlier RCU softirq.
1609 */ 1594 */
1610 static int rcu_cpu_kthread(void *arg) 1595 static int rcu_cpu_kthread(void *arg)
1611 { 1596 {
1612 int cpu = (int)(long)arg; 1597 int cpu = (int)(long)arg;
1613 unsigned long flags; 1598 unsigned long flags;
1614 int spincnt = 0; 1599 int spincnt = 0;
1615 unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); 1600 unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu);
1616 wait_queue_head_t *wqp = &per_cpu(rcu_cpu_wq, cpu); 1601 wait_queue_head_t *wqp = &per_cpu(rcu_cpu_wq, cpu);
1617 char work; 1602 char work;
1618 char *workp = &per_cpu(rcu_cpu_has_work, cpu); 1603 char *workp = &per_cpu(rcu_cpu_has_work, cpu);
1619 1604
1620 for (;;) { 1605 for (;;) {
1621 *statusp = RCU_KTHREAD_WAITING; 1606 *statusp = RCU_KTHREAD_WAITING;
1622 wait_event_interruptible(*wqp, 1607 wait_event_interruptible(*wqp,
1623 *workp != 0 || kthread_should_stop()); 1608 *workp != 0 || kthread_should_stop());
1624 local_bh_disable(); 1609 local_bh_disable();
1625 if (rcu_cpu_kthread_should_stop(cpu)) { 1610 if (rcu_cpu_kthread_should_stop(cpu)) {
1626 local_bh_enable(); 1611 local_bh_enable();
1627 break; 1612 break;
1628 } 1613 }
1629 *statusp = RCU_KTHREAD_RUNNING; 1614 *statusp = RCU_KTHREAD_RUNNING;
1630 per_cpu(rcu_cpu_kthread_loops, cpu)++; 1615 per_cpu(rcu_cpu_kthread_loops, cpu)++;
1631 local_irq_save(flags); 1616 local_irq_save(flags);
1632 work = *workp; 1617 work = *workp;
1633 *workp = 0; 1618 *workp = 0;
1634 local_irq_restore(flags); 1619 local_irq_restore(flags);
1635 if (work) 1620 if (work)
1636 rcu_process_callbacks(); 1621 rcu_process_callbacks();
1637 local_bh_enable(); 1622 local_bh_enable();
1638 if (*workp != 0) 1623 if (*workp != 0)
1639 spincnt++; 1624 spincnt++;
1640 else 1625 else
1641 spincnt = 0; 1626 spincnt = 0;
1642 if (spincnt > 10) { 1627 if (spincnt > 10) {
1643 *statusp = RCU_KTHREAD_YIELDING; 1628 *statusp = RCU_KTHREAD_YIELDING;
1644 rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); 1629 rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu);
1645 spincnt = 0; 1630 spincnt = 0;
1646 } 1631 }
1647 } 1632 }
1648 *statusp = RCU_KTHREAD_STOPPED; 1633 *statusp = RCU_KTHREAD_STOPPED;
1649 return 0; 1634 return 0;
1650 } 1635 }
1651 1636
1652 /* 1637 /*
1653 * Spawn a per-CPU kthread, setting up affinity and priority. 1638 * Spawn a per-CPU kthread, setting up affinity and priority.
1654 * Because the CPU hotplug lock is held, no other CPU will be attempting 1639 * Because the CPU hotplug lock is held, no other CPU will be attempting
1655 * to manipulate rcu_cpu_kthread_task. There might be another CPU 1640 * to manipulate rcu_cpu_kthread_task. There might be another CPU
1656 * attempting to access it during boot, but the locking in kthread_bind() 1641 * attempting to access it during boot, but the locking in kthread_bind()
1657 * will enforce sufficient ordering. 1642 * will enforce sufficient ordering.
1658 */ 1643 */
1659 static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) 1644 static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
1660 { 1645 {
1661 struct sched_param sp; 1646 struct sched_param sp;
1662 struct task_struct *t; 1647 struct task_struct *t;
1663 1648
1664 if (!rcu_kthreads_spawnable || 1649 if (!rcu_kthreads_spawnable ||
1665 per_cpu(rcu_cpu_kthread_task, cpu) != NULL) 1650 per_cpu(rcu_cpu_kthread_task, cpu) != NULL)
1666 return 0; 1651 return 0;
1667 t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu); 1652 t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu);
1668 if (IS_ERR(t)) 1653 if (IS_ERR(t))
1669 return PTR_ERR(t); 1654 return PTR_ERR(t);
1670 kthread_bind(t, cpu); 1655 kthread_bind(t, cpu);
1671 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; 1656 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
1672 WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); 1657 WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
1673 per_cpu(rcu_cpu_kthread_task, cpu) = t; 1658 per_cpu(rcu_cpu_kthread_task, cpu) = t;
1674 wake_up_process(t); 1659 wake_up_process(t);
1675 sp.sched_priority = RCU_KTHREAD_PRIO; 1660 sp.sched_priority = RCU_KTHREAD_PRIO;
1676 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1661 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1677 return 0; 1662 return 0;
1678 } 1663 }
1679 1664
1680 /* 1665 /*
1681 * Per-rcu_node kthread, which is in charge of waking up the per-CPU 1666 * Per-rcu_node kthread, which is in charge of waking up the per-CPU
1682 * kthreads when needed. We ignore requests to wake up kthreads 1667 * kthreads when needed. We ignore requests to wake up kthreads
1683 * for offline CPUs, which is OK because force_quiescent_state() 1668 * for offline CPUs, which is OK because force_quiescent_state()
1684 * takes care of this case. 1669 * takes care of this case.
1685 */ 1670 */
1686 static int rcu_node_kthread(void *arg) 1671 static int rcu_node_kthread(void *arg)
1687 { 1672 {
1688 int cpu; 1673 int cpu;
1689 unsigned long flags; 1674 unsigned long flags;
1690 unsigned long mask; 1675 unsigned long mask;
1691 struct rcu_node *rnp = (struct rcu_node *)arg; 1676 struct rcu_node *rnp = (struct rcu_node *)arg;
1692 struct sched_param sp; 1677 struct sched_param sp;
1693 struct task_struct *t; 1678 struct task_struct *t;
1694 1679
1695 for (;;) { 1680 for (;;) {
1696 rnp->node_kthread_status = RCU_KTHREAD_WAITING; 1681 rnp->node_kthread_status = RCU_KTHREAD_WAITING;
1697 wait_event_interruptible(rnp->node_wq, rnp->wakemask != 0 || 1682 wait_event_interruptible(rnp->node_wq, rnp->wakemask != 0);
1698 kthread_should_stop());
1699 if (kthread_should_stop())
1700 break;
1701 rnp->node_kthread_status = RCU_KTHREAD_RUNNING; 1683 rnp->node_kthread_status = RCU_KTHREAD_RUNNING;
1702 raw_spin_lock_irqsave(&rnp->lock, flags); 1684 raw_spin_lock_irqsave(&rnp->lock, flags);
1703 mask = rnp->wakemask; 1685 mask = rnp->wakemask;
1704 rnp->wakemask = 0; 1686 rnp->wakemask = 0;
1705 rcu_initiate_boost(rnp); 1687 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
1706 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1707 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { 1688 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
1708 if ((mask & 0x1) == 0) 1689 if ((mask & 0x1) == 0)
1709 continue; 1690 continue;
1710 preempt_disable(); 1691 preempt_disable();
1711 t = per_cpu(rcu_cpu_kthread_task, cpu); 1692 t = per_cpu(rcu_cpu_kthread_task, cpu);
1712 if (!cpu_online(cpu) || t == NULL) { 1693 if (!cpu_online(cpu) || t == NULL) {
1713 preempt_enable(); 1694 preempt_enable();
1714 continue; 1695 continue;
1715 } 1696 }
1716 per_cpu(rcu_cpu_has_work, cpu) = 1; 1697 per_cpu(rcu_cpu_has_work, cpu) = 1;
1717 sp.sched_priority = RCU_KTHREAD_PRIO; 1698 sp.sched_priority = RCU_KTHREAD_PRIO;
1718 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1699 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1719 preempt_enable(); 1700 preempt_enable();
1720 } 1701 }
1721 } 1702 }
1703 /* NOTREACHED */
1722 rnp->node_kthread_status = RCU_KTHREAD_STOPPED; 1704 rnp->node_kthread_status = RCU_KTHREAD_STOPPED;
1723 return 0; 1705 return 0;
1724 } 1706 }
1725 1707
1726 /* 1708 /*
1727 * Set the per-rcu_node kthread's affinity to cover all CPUs that are 1709 * Set the per-rcu_node kthread's affinity to cover all CPUs that are
1728 * served by the rcu_node in question. The CPU hotplug lock is still 1710 * served by the rcu_node in question. The CPU hotplug lock is still
1729 * held, so the value of rnp->qsmaskinit will be stable. 1711 * held, so the value of rnp->qsmaskinit will be stable.
1730 * 1712 *
1731 * We don't include outgoingcpu in the affinity set, use -1 if there is 1713 * We don't include outgoingcpu in the affinity set, use -1 if there is
1732 * no outgoing CPU. If there are no CPUs left in the affinity set, 1714 * no outgoing CPU. If there are no CPUs left in the affinity set,
1733 * this function allows the kthread to execute on any CPU. 1715 * this function allows the kthread to execute on any CPU.
1734 */ 1716 */
1735 static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) 1717 static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1736 { 1718 {
1737 cpumask_var_t cm; 1719 cpumask_var_t cm;
1738 int cpu; 1720 int cpu;
1739 unsigned long mask = rnp->qsmaskinit; 1721 unsigned long mask = rnp->qsmaskinit;
1740 1722
1741 if (rnp->node_kthread_task == NULL || mask == 0) 1723 if (rnp->node_kthread_task == NULL)
1742 return; 1724 return;
1743 if (!alloc_cpumask_var(&cm, GFP_KERNEL)) 1725 if (!alloc_cpumask_var(&cm, GFP_KERNEL))
1744 return; 1726 return;
1745 cpumask_clear(cm); 1727 cpumask_clear(cm);
1746 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) 1728 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
1747 if ((mask & 0x1) && cpu != outgoingcpu) 1729 if ((mask & 0x1) && cpu != outgoingcpu)
1748 cpumask_set_cpu(cpu, cm); 1730 cpumask_set_cpu(cpu, cm);
1749 if (cpumask_weight(cm) == 0) { 1731 if (cpumask_weight(cm) == 0) {
1750 cpumask_setall(cm); 1732 cpumask_setall(cm);
1751 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) 1733 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++)
1752 cpumask_clear_cpu(cpu, cm); 1734 cpumask_clear_cpu(cpu, cm);
1753 WARN_ON_ONCE(cpumask_weight(cm) == 0); 1735 WARN_ON_ONCE(cpumask_weight(cm) == 0);
1754 } 1736 }
1755 set_cpus_allowed_ptr(rnp->node_kthread_task, cm); 1737 set_cpus_allowed_ptr(rnp->node_kthread_task, cm);
1756 rcu_boost_kthread_setaffinity(rnp, cm); 1738 rcu_boost_kthread_setaffinity(rnp, cm);
1757 free_cpumask_var(cm); 1739 free_cpumask_var(cm);
1758 } 1740 }
1759 1741
1760 /* 1742 /*
1761 * Spawn a per-rcu_node kthread, setting priority and affinity. 1743 * Spawn a per-rcu_node kthread, setting priority and affinity.
1762 * Called during boot before online/offline can happen, or, if 1744 * Called during boot before online/offline can happen, or, if
1763 * during runtime, with the main CPU-hotplug locks held. So only 1745 * during runtime, with the main CPU-hotplug locks held. So only
1764 * one of these can be executing at a time. 1746 * one of these can be executing at a time.
1765 */ 1747 */
1766 static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, 1748 static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
1767 struct rcu_node *rnp) 1749 struct rcu_node *rnp)
1768 { 1750 {
1769 unsigned long flags; 1751 unsigned long flags;
1770 int rnp_index = rnp - &rsp->node[0]; 1752 int rnp_index = rnp - &rsp->node[0];
1771 struct sched_param sp; 1753 struct sched_param sp;
1772 struct task_struct *t; 1754 struct task_struct *t;
1773 1755
1774 if (!rcu_kthreads_spawnable || 1756 if (!rcu_kthreads_spawnable ||
1775 rnp->qsmaskinit == 0) 1757 rnp->qsmaskinit == 0)
1776 return 0; 1758 return 0;
1777 if (rnp->node_kthread_task == NULL) { 1759 if (rnp->node_kthread_task == NULL) {
1778 t = kthread_create(rcu_node_kthread, (void *)rnp, 1760 t = kthread_create(rcu_node_kthread, (void *)rnp,
1779 "rcun%d", rnp_index); 1761 "rcun%d", rnp_index);
1780 if (IS_ERR(t)) 1762 if (IS_ERR(t))
1781 return PTR_ERR(t); 1763 return PTR_ERR(t);
1782 raw_spin_lock_irqsave(&rnp->lock, flags); 1764 raw_spin_lock_irqsave(&rnp->lock, flags);
1783 rnp->node_kthread_task = t; 1765 rnp->node_kthread_task = t;
1784 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1766 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1785 wake_up_process(t); 1767 wake_up_process(t);
1786 sp.sched_priority = 99; 1768 sp.sched_priority = 99;
1787 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1769 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1788 } 1770 }
1789 return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); 1771 return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index);
1790 } 1772 }
1791 1773
1792 /* 1774 /*
1793 * Spawn all kthreads -- called as soon as the scheduler is running. 1775 * Spawn all kthreads -- called as soon as the scheduler is running.
1794 */ 1776 */
1795 static int __init rcu_spawn_kthreads(void) 1777 static int __init rcu_spawn_kthreads(void)
1796 { 1778 {
1797 int cpu; 1779 int cpu;
1798 struct rcu_node *rnp; 1780 struct rcu_node *rnp;
1799 1781
1800 rcu_kthreads_spawnable = 1; 1782 rcu_kthreads_spawnable = 1;
1801 for_each_possible_cpu(cpu) { 1783 for_each_possible_cpu(cpu) {
1802 init_waitqueue_head(&per_cpu(rcu_cpu_wq, cpu)); 1784 init_waitqueue_head(&per_cpu(rcu_cpu_wq, cpu));
1803 per_cpu(rcu_cpu_has_work, cpu) = 0; 1785 per_cpu(rcu_cpu_has_work, cpu) = 0;
1804 if (cpu_online(cpu)) 1786 if (cpu_online(cpu))
1805 (void)rcu_spawn_one_cpu_kthread(cpu); 1787 (void)rcu_spawn_one_cpu_kthread(cpu);
1806 } 1788 }
1807 rnp = rcu_get_root(rcu_state); 1789 rnp = rcu_get_root(rcu_state);
1808 init_waitqueue_head(&rnp->node_wq); 1790 init_waitqueue_head(&rnp->node_wq);
1809 rcu_init_boost_waitqueue(rnp); 1791 rcu_init_boost_waitqueue(rnp);
1810 (void)rcu_spawn_one_node_kthread(rcu_state, rnp); 1792 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1811 if (NUM_RCU_NODES > 1) 1793 if (NUM_RCU_NODES > 1)
1812 rcu_for_each_leaf_node(rcu_state, rnp) { 1794 rcu_for_each_leaf_node(rcu_state, rnp) {
1813 init_waitqueue_head(&rnp->node_wq); 1795 init_waitqueue_head(&rnp->node_wq);
1814 rcu_init_boost_waitqueue(rnp); 1796 rcu_init_boost_waitqueue(rnp);
1815 (void)rcu_spawn_one_node_kthread(rcu_state, rnp); 1797 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1816 } 1798 }
1817 return 0; 1799 return 0;
1818 } 1800 }
1819 early_initcall(rcu_spawn_kthreads); 1801 early_initcall(rcu_spawn_kthreads);
1820 1802
1821 static void 1803 static void
1822 __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), 1804 __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1823 struct rcu_state *rsp) 1805 struct rcu_state *rsp)
1824 { 1806 {
1825 unsigned long flags; 1807 unsigned long flags;
1826 struct rcu_data *rdp; 1808 struct rcu_data *rdp;
1827 1809
1828 debug_rcu_head_queue(head); 1810 debug_rcu_head_queue(head);
1829 head->func = func; 1811 head->func = func;
1830 head->next = NULL; 1812 head->next = NULL;
1831 1813
1832 smp_mb(); /* Ensure RCU update seen before callback registry. */ 1814 smp_mb(); /* Ensure RCU update seen before callback registry. */
1833 1815
1834 /* 1816 /*
1835 * Opportunistically note grace-period endings and beginnings. 1817 * Opportunistically note grace-period endings and beginnings.
1836 * Note that we might see a beginning right after we see an 1818 * Note that we might see a beginning right after we see an
1837 * end, but never vice versa, since this CPU has to pass through 1819 * end, but never vice versa, since this CPU has to pass through
1838 * a quiescent state betweentimes. 1820 * a quiescent state betweentimes.
1839 */ 1821 */
1840 local_irq_save(flags); 1822 local_irq_save(flags);
1841 rdp = this_cpu_ptr(rsp->rda); 1823 rdp = this_cpu_ptr(rsp->rda);
1842 1824
1843 /* Add the callback to our list. */ 1825 /* Add the callback to our list. */
1844 *rdp->nxttail[RCU_NEXT_TAIL] = head; 1826 *rdp->nxttail[RCU_NEXT_TAIL] = head;
1845 rdp->nxttail[RCU_NEXT_TAIL] = &head->next; 1827 rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
1846 rdp->qlen++; 1828 rdp->qlen++;
1847 1829
1848 /* If interrupts were disabled, don't dive into RCU core. */ 1830 /* If interrupts were disabled, don't dive into RCU core. */
1849 if (irqs_disabled_flags(flags)) { 1831 if (irqs_disabled_flags(flags)) {
1850 local_irq_restore(flags); 1832 local_irq_restore(flags);
1851 return; 1833 return;
1852 } 1834 }
1853 1835
1854 /* 1836 /*
1855 * Force the grace period if too many callbacks or too long waiting. 1837 * Force the grace period if too many callbacks or too long waiting.
1856 * Enforce hysteresis, and don't invoke force_quiescent_state() 1838 * Enforce hysteresis, and don't invoke force_quiescent_state()
1857 * if some other CPU has recently done so. Also, don't bother 1839 * if some other CPU has recently done so. Also, don't bother
1858 * invoking force_quiescent_state() if the newly enqueued callback 1840 * invoking force_quiescent_state() if the newly enqueued callback
1859 * is the only one waiting for a grace period to complete. 1841 * is the only one waiting for a grace period to complete.
1860 */ 1842 */
1861 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { 1843 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
1862 1844
1863 /* Are we ignoring a completed grace period? */ 1845 /* Are we ignoring a completed grace period? */
1864 rcu_process_gp_end(rsp, rdp); 1846 rcu_process_gp_end(rsp, rdp);
1865 check_for_new_grace_period(rsp, rdp); 1847 check_for_new_grace_period(rsp, rdp);
1866 1848
1867 /* Start a new grace period if one not already started. */ 1849 /* Start a new grace period if one not already started. */
1868 if (!rcu_gp_in_progress(rsp)) { 1850 if (!rcu_gp_in_progress(rsp)) {
1869 unsigned long nestflag; 1851 unsigned long nestflag;
1870 struct rcu_node *rnp_root = rcu_get_root(rsp); 1852 struct rcu_node *rnp_root = rcu_get_root(rsp);
1871 1853
1872 raw_spin_lock_irqsave(&rnp_root->lock, nestflag); 1854 raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
1873 rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ 1855 rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */
1874 } else { 1856 } else {
1875 /* Give the grace period a kick. */ 1857 /* Give the grace period a kick. */
1876 rdp->blimit = LONG_MAX; 1858 rdp->blimit = LONG_MAX;
1877 if (rsp->n_force_qs == rdp->n_force_qs_snap && 1859 if (rsp->n_force_qs == rdp->n_force_qs_snap &&
1878 *rdp->nxttail[RCU_DONE_TAIL] != head) 1860 *rdp->nxttail[RCU_DONE_TAIL] != head)
1879 force_quiescent_state(rsp, 0); 1861 force_quiescent_state(rsp, 0);
1880 rdp->n_force_qs_snap = rsp->n_force_qs; 1862 rdp->n_force_qs_snap = rsp->n_force_qs;
1881 rdp->qlen_last_fqs_check = rdp->qlen; 1863 rdp->qlen_last_fqs_check = rdp->qlen;
1882 } 1864 }
1883 } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) 1865 } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
1884 force_quiescent_state(rsp, 1); 1866 force_quiescent_state(rsp, 1);
1885 local_irq_restore(flags); 1867 local_irq_restore(flags);
1886 } 1868 }
1887 1869
1888 /* 1870 /*
1889 * Queue an RCU-sched callback for invocation after a grace period. 1871 * Queue an RCU-sched callback for invocation after a grace period.
1890 */ 1872 */
1891 void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 1873 void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1892 { 1874 {
1893 __call_rcu(head, func, &rcu_sched_state); 1875 __call_rcu(head, func, &rcu_sched_state);
1894 } 1876 }
1895 EXPORT_SYMBOL_GPL(call_rcu_sched); 1877 EXPORT_SYMBOL_GPL(call_rcu_sched);
1896 1878
1897 /* 1879 /*
1898 * Queue an RCU for invocation after a quicker grace period. 1880 * Queue an RCU for invocation after a quicker grace period.
1899 */ 1881 */
1900 void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 1882 void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1901 { 1883 {
1902 __call_rcu(head, func, &rcu_bh_state); 1884 __call_rcu(head, func, &rcu_bh_state);
1903 } 1885 }
1904 EXPORT_SYMBOL_GPL(call_rcu_bh); 1886 EXPORT_SYMBOL_GPL(call_rcu_bh);
1905 1887
1906 /** 1888 /**
1907 * synchronize_sched - wait until an rcu-sched grace period has elapsed. 1889 * synchronize_sched - wait until an rcu-sched grace period has elapsed.
1908 * 1890 *
1909 * Control will return to the caller some time after a full rcu-sched 1891 * Control will return to the caller some time after a full rcu-sched
1910 * grace period has elapsed, in other words after all currently executing 1892 * grace period has elapsed, in other words after all currently executing
1911 * rcu-sched read-side critical sections have completed. These read-side 1893 * rcu-sched read-side critical sections have completed. These read-side
1912 * critical sections are delimited by rcu_read_lock_sched() and 1894 * critical sections are delimited by rcu_read_lock_sched() and
1913 * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(), 1895 * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(),
1914 * local_irq_disable(), and so on may be used in place of 1896 * local_irq_disable(), and so on may be used in place of
1915 * rcu_read_lock_sched(). 1897 * rcu_read_lock_sched().
1916 * 1898 *
1917 * This means that all preempt_disable code sequences, including NMI and 1899 * This means that all preempt_disable code sequences, including NMI and
1918 * hardware-interrupt handlers, in progress on entry will have completed 1900 * hardware-interrupt handlers, in progress on entry will have completed
1919 * before this primitive returns. However, this does not guarantee that 1901 * before this primitive returns. However, this does not guarantee that
1920 * softirq handlers will have completed, since in some kernels, these 1902 * softirq handlers will have completed, since in some kernels, these
1921 * handlers can run in process context, and can block. 1903 * handlers can run in process context, and can block.
1922 * 1904 *
1923 * This primitive provides the guarantees made by the (now removed) 1905 * This primitive provides the guarantees made by the (now removed)
1924 * synchronize_kernel() API. In contrast, synchronize_rcu() only 1906 * synchronize_kernel() API. In contrast, synchronize_rcu() only
1925 * guarantees that rcu_read_lock() sections will have completed. 1907 * guarantees that rcu_read_lock() sections will have completed.
1926 * In "classic RCU", these two guarantees happen to be one and 1908 * In "classic RCU", these two guarantees happen to be one and
1927 * the same, but can differ in realtime RCU implementations. 1909 * the same, but can differ in realtime RCU implementations.
1928 */ 1910 */
1929 void synchronize_sched(void) 1911 void synchronize_sched(void)
1930 { 1912 {
1931 struct rcu_synchronize rcu; 1913 struct rcu_synchronize rcu;
1932 1914
1933 if (rcu_blocking_is_gp()) 1915 if (rcu_blocking_is_gp())
1934 return; 1916 return;
1935 1917
1936 init_rcu_head_on_stack(&rcu.head); 1918 init_rcu_head_on_stack(&rcu.head);
1937 init_completion(&rcu.completion); 1919 init_completion(&rcu.completion);
1938 /* Will wake me after RCU finished. */ 1920 /* Will wake me after RCU finished. */
1939 call_rcu_sched(&rcu.head, wakeme_after_rcu); 1921 call_rcu_sched(&rcu.head, wakeme_after_rcu);
1940 /* Wait for it. */ 1922 /* Wait for it. */
1941 wait_for_completion(&rcu.completion); 1923 wait_for_completion(&rcu.completion);
1942 destroy_rcu_head_on_stack(&rcu.head); 1924 destroy_rcu_head_on_stack(&rcu.head);
1943 } 1925 }
1944 EXPORT_SYMBOL_GPL(synchronize_sched); 1926 EXPORT_SYMBOL_GPL(synchronize_sched);
1945 1927
1946 /** 1928 /**
1947 * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. 1929 * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
1948 * 1930 *
1949 * Control will return to the caller some time after a full rcu_bh grace 1931 * Control will return to the caller some time after a full rcu_bh grace
1950 * period has elapsed, in other words after all currently executing rcu_bh 1932 * period has elapsed, in other words after all currently executing rcu_bh
1951 * read-side critical sections have completed. RCU read-side critical 1933 * read-side critical sections have completed. RCU read-side critical
1952 * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(), 1934 * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
1953 * and may be nested. 1935 * and may be nested.
1954 */ 1936 */
1955 void synchronize_rcu_bh(void) 1937 void synchronize_rcu_bh(void)
1956 { 1938 {
1957 struct rcu_synchronize rcu; 1939 struct rcu_synchronize rcu;
1958 1940
1959 if (rcu_blocking_is_gp()) 1941 if (rcu_blocking_is_gp())
1960 return; 1942 return;
1961 1943
1962 init_rcu_head_on_stack(&rcu.head); 1944 init_rcu_head_on_stack(&rcu.head);
1963 init_completion(&rcu.completion); 1945 init_completion(&rcu.completion);
1964 /* Will wake me after RCU finished. */ 1946 /* Will wake me after RCU finished. */
1965 call_rcu_bh(&rcu.head, wakeme_after_rcu); 1947 call_rcu_bh(&rcu.head, wakeme_after_rcu);
1966 /* Wait for it. */ 1948 /* Wait for it. */
1967 wait_for_completion(&rcu.completion); 1949 wait_for_completion(&rcu.completion);
1968 destroy_rcu_head_on_stack(&rcu.head); 1950 destroy_rcu_head_on_stack(&rcu.head);
1969 } 1951 }
1970 EXPORT_SYMBOL_GPL(synchronize_rcu_bh); 1952 EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
1971 1953
1972 /* 1954 /*
1973 * Check to see if there is any immediate RCU-related work to be done 1955 * Check to see if there is any immediate RCU-related work to be done
1974 * by the current CPU, for the specified type of RCU, returning 1 if so. 1956 * by the current CPU, for the specified type of RCU, returning 1 if so.
1975 * The checks are in order of increasing expense: checks that can be 1957 * The checks are in order of increasing expense: checks that can be
1976 * carried out against CPU-local state are performed first. However, 1958 * carried out against CPU-local state are performed first. However,
1977 * we must check for CPU stalls first, else we might not get a chance. 1959 * we must check for CPU stalls first, else we might not get a chance.
1978 */ 1960 */
1979 static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) 1961 static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
1980 { 1962 {
1981 struct rcu_node *rnp = rdp->mynode; 1963 struct rcu_node *rnp = rdp->mynode;
1982 1964
1983 rdp->n_rcu_pending++; 1965 rdp->n_rcu_pending++;
1984 1966
1985 /* Check for CPU stalls, if enabled. */ 1967 /* Check for CPU stalls, if enabled. */
1986 check_cpu_stall(rsp, rdp); 1968 check_cpu_stall(rsp, rdp);
1987 1969
1988 /* Is the RCU core waiting for a quiescent state from this CPU? */ 1970 /* Is the RCU core waiting for a quiescent state from this CPU? */
1989 if (rdp->qs_pending && !rdp->passed_quiesc) { 1971 if (rdp->qs_pending && !rdp->passed_quiesc) {
1990 1972
1991 /* 1973 /*
1992 * If force_quiescent_state() coming soon and this CPU 1974 * If force_quiescent_state() coming soon and this CPU
1993 * needs a quiescent state, and this is either RCU-sched 1975 * needs a quiescent state, and this is either RCU-sched
1994 * or RCU-bh, force a local reschedule. 1976 * or RCU-bh, force a local reschedule.
1995 */ 1977 */
1996 rdp->n_rp_qs_pending++; 1978 rdp->n_rp_qs_pending++;
1997 if (!rdp->preemptible && 1979 if (!rdp->preemptible &&
1998 ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, 1980 ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1,
1999 jiffies)) 1981 jiffies))
2000 set_need_resched(); 1982 set_need_resched();
2001 } else if (rdp->qs_pending && rdp->passed_quiesc) { 1983 } else if (rdp->qs_pending && rdp->passed_quiesc) {
2002 rdp->n_rp_report_qs++; 1984 rdp->n_rp_report_qs++;
2003 return 1; 1985 return 1;
2004 } 1986 }
2005 1987
2006 /* Does this CPU have callbacks ready to invoke? */ 1988 /* Does this CPU have callbacks ready to invoke? */
2007 if (cpu_has_callbacks_ready_to_invoke(rdp)) { 1989 if (cpu_has_callbacks_ready_to_invoke(rdp)) {
2008 rdp->n_rp_cb_ready++; 1990 rdp->n_rp_cb_ready++;
2009 return 1; 1991 return 1;
2010 } 1992 }
2011 1993
2012 /* Has RCU gone idle with this CPU needing another grace period? */ 1994 /* Has RCU gone idle with this CPU needing another grace period? */
2013 if (cpu_needs_another_gp(rsp, rdp)) { 1995 if (cpu_needs_another_gp(rsp, rdp)) {
2014 rdp->n_rp_cpu_needs_gp++; 1996 rdp->n_rp_cpu_needs_gp++;
2015 return 1; 1997 return 1;
2016 } 1998 }
2017 1999
2018 /* Has another RCU grace period completed? */ 2000 /* Has another RCU grace period completed? */
2019 if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */ 2001 if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */
2020 rdp->n_rp_gp_completed++; 2002 rdp->n_rp_gp_completed++;
2021 return 1; 2003 return 1;
2022 } 2004 }
2023 2005
2024 /* Has a new RCU grace period started? */ 2006 /* Has a new RCU grace period started? */
2025 if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */ 2007 if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */
2026 rdp->n_rp_gp_started++; 2008 rdp->n_rp_gp_started++;
2027 return 1; 2009 return 1;
2028 } 2010 }
2029 2011
2030 /* Has an RCU GP gone long enough to send resched IPIs &c? */ 2012 /* Has an RCU GP gone long enough to send resched IPIs &c? */
2031 if (rcu_gp_in_progress(rsp) && 2013 if (rcu_gp_in_progress(rsp) &&
2032 ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) { 2014 ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) {
2033 rdp->n_rp_need_fqs++; 2015 rdp->n_rp_need_fqs++;
2034 return 1; 2016 return 1;
2035 } 2017 }
2036 2018
2037 /* nothing to do */ 2019 /* nothing to do */
2038 rdp->n_rp_need_nothing++; 2020 rdp->n_rp_need_nothing++;
2039 return 0; 2021 return 0;
2040 } 2022 }
2041 2023
2042 /* 2024 /*
2043 * Check to see if there is any immediate RCU-related work to be done 2025 * Check to see if there is any immediate RCU-related work to be done
2044 * by the current CPU, returning 1 if so. This function is part of the 2026 * by the current CPU, returning 1 if so. This function is part of the
2045 * RCU implementation; it is -not- an exported member of the RCU API. 2027 * RCU implementation; it is -not- an exported member of the RCU API.
2046 */ 2028 */
2047 static int rcu_pending(int cpu) 2029 static int rcu_pending(int cpu)
2048 { 2030 {
2049 return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || 2031 return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) ||
2050 __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) || 2032 __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) ||
2051 rcu_preempt_pending(cpu); 2033 rcu_preempt_pending(cpu);
2052 } 2034 }
2053 2035
2054 /* 2036 /*
2055 * Check to see if any future RCU-related work will need to be done 2037 * Check to see if any future RCU-related work will need to be done
2056 * by the current CPU, even if none need be done immediately, returning 2038 * by the current CPU, even if none need be done immediately, returning
2057 * 1 if so. 2039 * 1 if so.
2058 */ 2040 */
2059 static int rcu_needs_cpu_quick_check(int cpu) 2041 static int rcu_needs_cpu_quick_check(int cpu)
2060 { 2042 {
2061 /* RCU callbacks either ready or pending? */ 2043 /* RCU callbacks either ready or pending? */
2062 return per_cpu(rcu_sched_data, cpu).nxtlist || 2044 return per_cpu(rcu_sched_data, cpu).nxtlist ||
2063 per_cpu(rcu_bh_data, cpu).nxtlist || 2045 per_cpu(rcu_bh_data, cpu).nxtlist ||
2064 rcu_preempt_needs_cpu(cpu); 2046 rcu_preempt_needs_cpu(cpu);
2065 } 2047 }
2066 2048
2067 static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; 2049 static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
2068 static atomic_t rcu_barrier_cpu_count; 2050 static atomic_t rcu_barrier_cpu_count;
2069 static DEFINE_MUTEX(rcu_barrier_mutex); 2051 static DEFINE_MUTEX(rcu_barrier_mutex);
2070 static struct completion rcu_barrier_completion; 2052 static struct completion rcu_barrier_completion;
2071 2053
2072 static void rcu_barrier_callback(struct rcu_head *notused) 2054 static void rcu_barrier_callback(struct rcu_head *notused)
2073 { 2055 {
2074 if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 2056 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
2075 complete(&rcu_barrier_completion); 2057 complete(&rcu_barrier_completion);
2076 } 2058 }
2077 2059
2078 /* 2060 /*
2079 * Called with preemption disabled, and from cross-cpu IRQ context. 2061 * Called with preemption disabled, and from cross-cpu IRQ context.
2080 */ 2062 */
2081 static void rcu_barrier_func(void *type) 2063 static void rcu_barrier_func(void *type)
2082 { 2064 {
2083 int cpu = smp_processor_id(); 2065 int cpu = smp_processor_id();
2084 struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); 2066 struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
2085 void (*call_rcu_func)(struct rcu_head *head, 2067 void (*call_rcu_func)(struct rcu_head *head,
2086 void (*func)(struct rcu_head *head)); 2068 void (*func)(struct rcu_head *head));
2087 2069
2088 atomic_inc(&rcu_barrier_cpu_count); 2070 atomic_inc(&rcu_barrier_cpu_count);
2089 call_rcu_func = type; 2071 call_rcu_func = type;
2090 call_rcu_func(head, rcu_barrier_callback); 2072 call_rcu_func(head, rcu_barrier_callback);
2091 } 2073 }
2092 2074
2093 /* 2075 /*
2094 * Orchestrate the specified type of RCU barrier, waiting for all 2076 * Orchestrate the specified type of RCU barrier, waiting for all
2095 * RCU callbacks of the specified type to complete. 2077 * RCU callbacks of the specified type to complete.
2096 */ 2078 */
2097 static void _rcu_barrier(struct rcu_state *rsp, 2079 static void _rcu_barrier(struct rcu_state *rsp,
2098 void (*call_rcu_func)(struct rcu_head *head, 2080 void (*call_rcu_func)(struct rcu_head *head,
2099 void (*func)(struct rcu_head *head))) 2081 void (*func)(struct rcu_head *head)))
2100 { 2082 {
2101 BUG_ON(in_interrupt()); 2083 BUG_ON(in_interrupt());
2102 /* Take mutex to serialize concurrent rcu_barrier() requests. */ 2084 /* Take mutex to serialize concurrent rcu_barrier() requests. */
2103 mutex_lock(&rcu_barrier_mutex); 2085 mutex_lock(&rcu_barrier_mutex);
2104 init_completion(&rcu_barrier_completion); 2086 init_completion(&rcu_barrier_completion);
2105 /* 2087 /*
2106 * Initialize rcu_barrier_cpu_count to 1, then invoke 2088 * Initialize rcu_barrier_cpu_count to 1, then invoke
2107 * rcu_barrier_func() on each CPU, so that each CPU also has 2089 * rcu_barrier_func() on each CPU, so that each CPU also has
2108 * incremented rcu_barrier_cpu_count. Only then is it safe to 2090 * incremented rcu_barrier_cpu_count. Only then is it safe to
2109 * decrement rcu_barrier_cpu_count -- otherwise the first CPU 2091 * decrement rcu_barrier_cpu_count -- otherwise the first CPU
2110 * might complete its grace period before all of the other CPUs 2092 * might complete its grace period before all of the other CPUs
2111 * did their increment, causing this function to return too 2093 * did their increment, causing this function to return too
2112 * early. Note that on_each_cpu() disables irqs, which prevents 2094 * early. Note that on_each_cpu() disables irqs, which prevents
2113 * any CPUs from coming online or going offline until each online 2095 * any CPUs from coming online or going offline until each online
2114 * CPU has queued its RCU-barrier callback. 2096 * CPU has queued its RCU-barrier callback.
2115 */ 2097 */
2116 atomic_set(&rcu_barrier_cpu_count, 1); 2098 atomic_set(&rcu_barrier_cpu_count, 1);
2117 on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); 2099 on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1);
2118 if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 2100 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
2119 complete(&rcu_barrier_completion); 2101 complete(&rcu_barrier_completion);
2120 wait_for_completion(&rcu_barrier_completion); 2102 wait_for_completion(&rcu_barrier_completion);
2121 mutex_unlock(&rcu_barrier_mutex); 2103 mutex_unlock(&rcu_barrier_mutex);
2122 } 2104 }
2123 2105
2124 /** 2106 /**
2125 * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. 2107 * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
2126 */ 2108 */
2127 void rcu_barrier_bh(void) 2109 void rcu_barrier_bh(void)
2128 { 2110 {
2129 _rcu_barrier(&rcu_bh_state, call_rcu_bh); 2111 _rcu_barrier(&rcu_bh_state, call_rcu_bh);
2130 } 2112 }
2131 EXPORT_SYMBOL_GPL(rcu_barrier_bh); 2113 EXPORT_SYMBOL_GPL(rcu_barrier_bh);
2132 2114
2133 /** 2115 /**
2134 * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. 2116 * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
2135 */ 2117 */
2136 void rcu_barrier_sched(void) 2118 void rcu_barrier_sched(void)
2137 { 2119 {
2138 _rcu_barrier(&rcu_sched_state, call_rcu_sched); 2120 _rcu_barrier(&rcu_sched_state, call_rcu_sched);
2139 } 2121 }
2140 EXPORT_SYMBOL_GPL(rcu_barrier_sched); 2122 EXPORT_SYMBOL_GPL(rcu_barrier_sched);
2141 2123
2142 /* 2124 /*
2143 * Do boot-time initialization of a CPU's per-CPU RCU data. 2125 * Do boot-time initialization of a CPU's per-CPU RCU data.
2144 */ 2126 */
2145 static void __init 2127 static void __init
2146 rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) 2128 rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
2147 { 2129 {
2148 unsigned long flags; 2130 unsigned long flags;
2149 int i; 2131 int i;
2150 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2132 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2151 struct rcu_node *rnp = rcu_get_root(rsp); 2133 struct rcu_node *rnp = rcu_get_root(rsp);
2152 2134
2153 /* Set up local state, ensuring consistent view of global state. */ 2135 /* Set up local state, ensuring consistent view of global state. */
2154 raw_spin_lock_irqsave(&rnp->lock, flags); 2136 raw_spin_lock_irqsave(&rnp->lock, flags);
2155 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); 2137 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
2156 rdp->nxtlist = NULL; 2138 rdp->nxtlist = NULL;
2157 for (i = 0; i < RCU_NEXT_SIZE; i++) 2139 for (i = 0; i < RCU_NEXT_SIZE; i++)
2158 rdp->nxttail[i] = &rdp->nxtlist; 2140 rdp->nxttail[i] = &rdp->nxtlist;
2159 rdp->qlen = 0; 2141 rdp->qlen = 0;
2160 #ifdef CONFIG_NO_HZ 2142 #ifdef CONFIG_NO_HZ
2161 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 2143 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
2162 #endif /* #ifdef CONFIG_NO_HZ */ 2144 #endif /* #ifdef CONFIG_NO_HZ */
2163 rdp->cpu = cpu; 2145 rdp->cpu = cpu;
2164 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2146 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2165 } 2147 }
2166 2148
2167 /* 2149 /*
2168 * Initialize a CPU's per-CPU RCU data. Note that only one online or 2150 * Initialize a CPU's per-CPU RCU data. Note that only one online or
2169 * offline event can be happening at a given time. Note also that we 2151 * offline event can be happening at a given time. Note also that we
2170 * can accept some slop in the rsp->completed access due to the fact 2152 * can accept some slop in the rsp->completed access due to the fact
2171 * that this CPU cannot possibly have any RCU callbacks in flight yet. 2153 * that this CPU cannot possibly have any RCU callbacks in flight yet.
2172 */ 2154 */
2173 static void __cpuinit 2155 static void __cpuinit
2174 rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) 2156 rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2175 { 2157 {
2176 unsigned long flags; 2158 unsigned long flags;
2177 unsigned long mask; 2159 unsigned long mask;
2178 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2160 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2179 struct rcu_node *rnp = rcu_get_root(rsp); 2161 struct rcu_node *rnp = rcu_get_root(rsp);
2180 2162
2181 /* Set up local state, ensuring consistent view of global state. */ 2163 /* Set up local state, ensuring consistent view of global state. */
2182 raw_spin_lock_irqsave(&rnp->lock, flags); 2164 raw_spin_lock_irqsave(&rnp->lock, flags);
2183 rdp->passed_quiesc = 0; /* We could be racing with new GP, */ 2165 rdp->passed_quiesc = 0; /* We could be racing with new GP, */
2184 rdp->qs_pending = 1; /* so set up to respond to current GP. */ 2166 rdp->qs_pending = 1; /* so set up to respond to current GP. */
2185 rdp->beenonline = 1; /* We have now been online. */ 2167 rdp->beenonline = 1; /* We have now been online. */
2186 rdp->preemptible = preemptible; 2168 rdp->preemptible = preemptible;
2187 rdp->qlen_last_fqs_check = 0; 2169 rdp->qlen_last_fqs_check = 0;
2188 rdp->n_force_qs_snap = rsp->n_force_qs; 2170 rdp->n_force_qs_snap = rsp->n_force_qs;
2189 rdp->blimit = blimit; 2171 rdp->blimit = blimit;
2190 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 2172 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2191 2173
2192 /* 2174 /*
2193 * A new grace period might start here. If so, we won't be part 2175 * A new grace period might start here. If so, we won't be part
2194 * of it, but that is OK, as we are currently in a quiescent state. 2176 * of it, but that is OK, as we are currently in a quiescent state.
2195 */ 2177 */
2196 2178
2197 /* Exclude any attempts to start a new GP on large systems. */ 2179 /* Exclude any attempts to start a new GP on large systems. */
2198 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ 2180 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
2199 2181
2200 /* Add CPU to rcu_node bitmasks. */ 2182 /* Add CPU to rcu_node bitmasks. */
2201 rnp = rdp->mynode; 2183 rnp = rdp->mynode;
2202 mask = rdp->grpmask; 2184 mask = rdp->grpmask;
2203 do { 2185 do {
2204 /* Exclude any attempts to start a new GP on small systems. */ 2186 /* Exclude any attempts to start a new GP on small systems. */
2205 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 2187 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
2206 rnp->qsmaskinit |= mask; 2188 rnp->qsmaskinit |= mask;
2207 mask = rnp->grpmask; 2189 mask = rnp->grpmask;
2208 if (rnp == rdp->mynode) { 2190 if (rnp == rdp->mynode) {
2209 rdp->gpnum = rnp->completed; /* if GP in progress... */ 2191 rdp->gpnum = rnp->completed; /* if GP in progress... */
2210 rdp->completed = rnp->completed; 2192 rdp->completed = rnp->completed;
2211 rdp->passed_quiesc_completed = rnp->completed - 1; 2193 rdp->passed_quiesc_completed = rnp->completed - 1;
2212 } 2194 }
2213 raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ 2195 raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
2214 rnp = rnp->parent; 2196 rnp = rnp->parent;
2215 } while (rnp != NULL && !(rnp->qsmaskinit & mask)); 2197 } while (rnp != NULL && !(rnp->qsmaskinit & mask));
2216 2198
2217 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 2199 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
2218 } 2200 }
2219 2201
2220 static void __cpuinit rcu_online_cpu(int cpu) 2202 static void __cpuinit rcu_online_cpu(int cpu)
2221 { 2203 {
2222 rcu_init_percpu_data(cpu, &rcu_sched_state, 0); 2204 rcu_init_percpu_data(cpu, &rcu_sched_state, 0);
2223 rcu_init_percpu_data(cpu, &rcu_bh_state, 0); 2205 rcu_init_percpu_data(cpu, &rcu_bh_state, 0);
2224 rcu_preempt_init_percpu_data(cpu); 2206 rcu_preempt_init_percpu_data(cpu);
2225 } 2207 }
2226 2208
2227 static void __cpuinit rcu_online_kthreads(int cpu) 2209 static void __cpuinit rcu_online_kthreads(int cpu)
2228 { 2210 {
2229 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 2211 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
2230 struct rcu_node *rnp = rdp->mynode; 2212 struct rcu_node *rnp = rdp->mynode;
2231 2213
2232 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ 2214 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
2233 if (rcu_kthreads_spawnable) { 2215 if (rcu_kthreads_spawnable) {
2234 (void)rcu_spawn_one_cpu_kthread(cpu); 2216 (void)rcu_spawn_one_cpu_kthread(cpu);
2235 if (rnp->node_kthread_task == NULL) 2217 if (rnp->node_kthread_task == NULL)
2236 (void)rcu_spawn_one_node_kthread(rcu_state, rnp); 2218 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
2237 } 2219 }
2238 } 2220 }
2239 2221
2240 /* 2222 /*
2241 * Handle CPU online/offline notification events. 2223 * Handle CPU online/offline notification events.
2242 */ 2224 */
2243 static int __cpuinit rcu_cpu_notify(struct notifier_block *self, 2225 static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2244 unsigned long action, void *hcpu) 2226 unsigned long action, void *hcpu)
2245 { 2227 {
2246 long cpu = (long)hcpu; 2228 long cpu = (long)hcpu;
2247 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 2229 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
2248 struct rcu_node *rnp = rdp->mynode; 2230 struct rcu_node *rnp = rdp->mynode;
2249 2231
2250 switch (action) { 2232 switch (action) {
2251 case CPU_UP_PREPARE: 2233 case CPU_UP_PREPARE:
2252 case CPU_UP_PREPARE_FROZEN: 2234 case CPU_UP_PREPARE_FROZEN:
2253 rcu_online_cpu(cpu); 2235 rcu_online_cpu(cpu);
2254 rcu_online_kthreads(cpu); 2236 rcu_online_kthreads(cpu);
2255 break; 2237 break;
2256 case CPU_ONLINE: 2238 case CPU_ONLINE:
2257 case CPU_DOWN_FAILED: 2239 case CPU_DOWN_FAILED:
2258 rcu_node_kthread_setaffinity(rnp, -1); 2240 rcu_node_kthread_setaffinity(rnp, -1);
2259 rcu_cpu_kthread_setrt(cpu, 1); 2241 rcu_cpu_kthread_setrt(cpu, 1);
2260 break; 2242 break;
2261 case CPU_DOWN_PREPARE: 2243 case CPU_DOWN_PREPARE:
2262 rcu_node_kthread_setaffinity(rnp, cpu); 2244 rcu_node_kthread_setaffinity(rnp, cpu);
2263 rcu_cpu_kthread_setrt(cpu, 0); 2245 rcu_cpu_kthread_setrt(cpu, 0);
2264 break; 2246 break;
2265 case CPU_DYING: 2247 case CPU_DYING:
2266 case CPU_DYING_FROZEN: 2248 case CPU_DYING_FROZEN:
2267 /* 2249 /*
2268 * The whole machine is "stopped" except this CPU, so we can 2250 * The whole machine is "stopped" except this CPU, so we can
2269 * touch any data without introducing corruption. We send the 2251 * touch any data without introducing corruption. We send the
2270 * dying CPU's callbacks to an arbitrarily chosen online CPU. 2252 * dying CPU's callbacks to an arbitrarily chosen online CPU.
2271 */ 2253 */
2272 rcu_send_cbs_to_online(&rcu_bh_state); 2254 rcu_send_cbs_to_online(&rcu_bh_state);
2273 rcu_send_cbs_to_online(&rcu_sched_state); 2255 rcu_send_cbs_to_online(&rcu_sched_state);
2274 rcu_preempt_send_cbs_to_online(); 2256 rcu_preempt_send_cbs_to_online();
2275 break; 2257 break;
2276 case CPU_DEAD: 2258 case CPU_DEAD:
2277 case CPU_DEAD_FROZEN: 2259 case CPU_DEAD_FROZEN:
2278 case CPU_UP_CANCELED: 2260 case CPU_UP_CANCELED:
2279 case CPU_UP_CANCELED_FROZEN: 2261 case CPU_UP_CANCELED_FROZEN:
2280 rcu_offline_cpu(cpu); 2262 rcu_offline_cpu(cpu);
2281 break; 2263 break;
2282 default: 2264 default:
2283 break; 2265 break;
2284 } 2266 }
2285 return NOTIFY_OK; 2267 return NOTIFY_OK;
2286 } 2268 }
2287 2269
2288 /* 2270 /*
2289 * This function is invoked towards the end of the scheduler's initialization 2271 * This function is invoked towards the end of the scheduler's initialization
2290 * process. Before this is called, the idle task might contain 2272 * process. Before this is called, the idle task might contain
2291 * RCU read-side critical sections (during which time, this idle 2273 * RCU read-side critical sections (during which time, this idle
2292 * task is booting the system). After this function is called, the 2274 * task is booting the system). After this function is called, the
2293 * idle tasks are prohibited from containing RCU read-side critical 2275 * idle tasks are prohibited from containing RCU read-side critical
2294 * sections. This function also enables RCU lockdep checking. 2276 * sections. This function also enables RCU lockdep checking.
2295 */ 2277 */
2296 void rcu_scheduler_starting(void) 2278 void rcu_scheduler_starting(void)
2297 { 2279 {
2298 WARN_ON(num_online_cpus() != 1); 2280 WARN_ON(num_online_cpus() != 1);
2299 WARN_ON(nr_context_switches() > 0); 2281 WARN_ON(nr_context_switches() > 0);
2300 rcu_scheduler_active = 1; 2282 rcu_scheduler_active = 1;
2301 } 2283 }
2302 2284
2303 /* 2285 /*
2304 * Compute the per-level fanout, either using the exact fanout specified 2286 * Compute the per-level fanout, either using the exact fanout specified
2305 * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. 2287 * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT.
2306 */ 2288 */
2307 #ifdef CONFIG_RCU_FANOUT_EXACT 2289 #ifdef CONFIG_RCU_FANOUT_EXACT
2308 static void __init rcu_init_levelspread(struct rcu_state *rsp) 2290 static void __init rcu_init_levelspread(struct rcu_state *rsp)
2309 { 2291 {
2310 int i; 2292 int i;
2311 2293
2312 for (i = NUM_RCU_LVLS - 1; i > 0; i--) 2294 for (i = NUM_RCU_LVLS - 1; i > 0; i--)
2313 rsp->levelspread[i] = CONFIG_RCU_FANOUT; 2295 rsp->levelspread[i] = CONFIG_RCU_FANOUT;
2314 rsp->levelspread[0] = RCU_FANOUT_LEAF; 2296 rsp->levelspread[0] = RCU_FANOUT_LEAF;
2315 } 2297 }
2316 #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ 2298 #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
2317 static void __init rcu_init_levelspread(struct rcu_state *rsp) 2299 static void __init rcu_init_levelspread(struct rcu_state *rsp)
2318 { 2300 {
2319 int ccur; 2301 int ccur;
2320 int cprv; 2302 int cprv;
2321 int i; 2303 int i;
2322 2304
2323 cprv = NR_CPUS; 2305 cprv = NR_CPUS;
2324 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { 2306 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
2325 ccur = rsp->levelcnt[i]; 2307 ccur = rsp->levelcnt[i];
2326 rsp->levelspread[i] = (cprv + ccur - 1) / ccur; 2308 rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
2327 cprv = ccur; 2309 cprv = ccur;
2328 } 2310 }
2329 } 2311 }
2330 #endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */ 2312 #endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */
2331 2313
2332 /* 2314 /*
2333 * Helper function for rcu_init() that initializes one rcu_state structure. 2315 * Helper function for rcu_init() that initializes one rcu_state structure.
2334 */ 2316 */
2335 static void __init rcu_init_one(struct rcu_state *rsp, 2317 static void __init rcu_init_one(struct rcu_state *rsp,
2336 struct rcu_data __percpu *rda) 2318 struct rcu_data __percpu *rda)
2337 { 2319 {
2338 static char *buf[] = { "rcu_node_level_0", 2320 static char *buf[] = { "rcu_node_level_0",
2339 "rcu_node_level_1", 2321 "rcu_node_level_1",
2340 "rcu_node_level_2", 2322 "rcu_node_level_2",
2341 "rcu_node_level_3" }; /* Match MAX_RCU_LVLS */ 2323 "rcu_node_level_3" }; /* Match MAX_RCU_LVLS */
2342 int cpustride = 1; 2324 int cpustride = 1;
2343 int i; 2325 int i;
2344 int j; 2326 int j;
2345 struct rcu_node *rnp; 2327 struct rcu_node *rnp;
2346 2328
2347 BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */ 2329 BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */
2348 2330
2349 /* Initialize the level-tracking arrays. */ 2331 /* Initialize the level-tracking arrays. */
2350 2332
2351 for (i = 1; i < NUM_RCU_LVLS; i++) 2333 for (i = 1; i < NUM_RCU_LVLS; i++)
2352 rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; 2334 rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
2353 rcu_init_levelspread(rsp); 2335 rcu_init_levelspread(rsp);
2354 2336
2355 /* Initialize the elements themselves, starting from the leaves. */ 2337 /* Initialize the elements themselves, starting from the leaves. */
2356 2338
2357 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { 2339 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
2358 cpustride *= rsp->levelspread[i]; 2340 cpustride *= rsp->levelspread[i];
2359 rnp = rsp->level[i]; 2341 rnp = rsp->level[i];
2360 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { 2342 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
2361 raw_spin_lock_init(&rnp->lock); 2343 raw_spin_lock_init(&rnp->lock);
2362 lockdep_set_class_and_name(&rnp->lock, 2344 lockdep_set_class_and_name(&rnp->lock,
2363 &rcu_node_class[i], buf[i]); 2345 &rcu_node_class[i], buf[i]);
2364 rnp->gpnum = 0; 2346 rnp->gpnum = 0;
2365 rnp->qsmask = 0; 2347 rnp->qsmask = 0;
2366 rnp->qsmaskinit = 0; 2348 rnp->qsmaskinit = 0;
2367 rnp->grplo = j * cpustride; 2349 rnp->grplo = j * cpustride;
2368 rnp->grphi = (j + 1) * cpustride - 1; 2350 rnp->grphi = (j + 1) * cpustride - 1;
2369 if (rnp->grphi >= NR_CPUS) 2351 if (rnp->grphi >= NR_CPUS)
2370 rnp->grphi = NR_CPUS - 1; 2352 rnp->grphi = NR_CPUS - 1;
2371 if (i == 0) { 2353 if (i == 0) {
2372 rnp->grpnum = 0; 2354 rnp->grpnum = 0;
2373 rnp->grpmask = 0; 2355 rnp->grpmask = 0;
2374 rnp->parent = NULL; 2356 rnp->parent = NULL;
2375 } else { 2357 } else {
2376 rnp->grpnum = j % rsp->levelspread[i - 1]; 2358 rnp->grpnum = j % rsp->levelspread[i - 1];
2377 rnp->grpmask = 1UL << rnp->grpnum; 2359 rnp->grpmask = 1UL << rnp->grpnum;
2378 rnp->parent = rsp->level[i - 1] + 2360 rnp->parent = rsp->level[i - 1] +
2379 j / rsp->levelspread[i - 1]; 2361 j / rsp->levelspread[i - 1];
2380 } 2362 }
2381 rnp->level = i; 2363 rnp->level = i;
2382 INIT_LIST_HEAD(&rnp->blkd_tasks); 2364 INIT_LIST_HEAD(&rnp->blkd_tasks);
2383 } 2365 }
2384 } 2366 }
2385 2367
2386 rsp->rda = rda; 2368 rsp->rda = rda;
2387 rnp = rsp->level[NUM_RCU_LVLS - 1]; 2369 rnp = rsp->level[NUM_RCU_LVLS - 1];
2388 for_each_possible_cpu(i) { 2370 for_each_possible_cpu(i) {
2389 while (i > rnp->grphi) 2371 while (i > rnp->grphi)
2390 rnp++; 2372 rnp++;
2391 per_cpu_ptr(rsp->rda, i)->mynode = rnp; 2373 per_cpu_ptr(rsp->rda, i)->mynode = rnp;
2392 rcu_boot_init_percpu_data(i, rsp); 2374 rcu_boot_init_percpu_data(i, rsp);
2393 } 2375 }
2394 } 2376 }
2395 2377
2396 void __init rcu_init(void) 2378 void __init rcu_init(void)
2397 { 2379 {
2398 int cpu; 2380 int cpu;
2399 2381
2400 rcu_bootup_announce(); 2382 rcu_bootup_announce();
2401 rcu_init_one(&rcu_sched_state, &rcu_sched_data); 2383 rcu_init_one(&rcu_sched_state, &rcu_sched_data);
2402 rcu_init_one(&rcu_bh_state, &rcu_bh_data); 2384 rcu_init_one(&rcu_bh_state, &rcu_bh_data);
2403 __rcu_init_preempt(); 2385 __rcu_init_preempt();
2404 2386
2405 /* 2387 /*
2406 * We don't need protection against CPU-hotplug here because 2388 * We don't need protection against CPU-hotplug here because
2407 * this is called early in boot, before either interrupts 2389 * this is called early in boot, before either interrupts
2408 * or the scheduler are operational. 2390 * or the scheduler are operational.
2409 */ 2391 */
2410 cpu_notifier(rcu_cpu_notify, 0); 2392 cpu_notifier(rcu_cpu_notify, 0);
2411 for_each_online_cpu(cpu) 2393 for_each_online_cpu(cpu)
2412 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); 2394 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
2413 check_cpu_stall_init(); 2395 check_cpu_stall_init();
2414 } 2396 }
1 /* 1 /*
2 * Read-Copy Update mechanism for mutual exclusion (tree-based version) 2 * Read-Copy Update mechanism for mutual exclusion (tree-based version)
3 * Internal non-public definitions. 3 * Internal non-public definitions.
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details. 13 * GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * 18 *
19 * Copyright IBM Corporation, 2008 19 * Copyright IBM Corporation, 2008
20 * 20 *
21 * Author: Ingo Molnar <mingo@elte.hu> 21 * Author: Ingo Molnar <mingo@elte.hu>
22 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> 22 * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
23 */ 23 */
24 24
25 #include <linux/cache.h> 25 #include <linux/cache.h>
26 #include <linux/spinlock.h> 26 #include <linux/spinlock.h>
27 #include <linux/threads.h> 27 #include <linux/threads.h>
28 #include <linux/cpumask.h> 28 #include <linux/cpumask.h>
29 #include <linux/seqlock.h> 29 #include <linux/seqlock.h>
30 30
31 /* 31 /*
32 * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. 32 * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT.
33 * In theory, it should be possible to add more levels straightforwardly. 33 * In theory, it should be possible to add more levels straightforwardly.
34 * In practice, this did work well going from three levels to four. 34 * In practice, this did work well going from three levels to four.
35 * Of course, your mileage may vary. 35 * Of course, your mileage may vary.
36 */ 36 */
37 #define MAX_RCU_LVLS 4 37 #define MAX_RCU_LVLS 4
38 #if CONFIG_RCU_FANOUT > 16 38 #if CONFIG_RCU_FANOUT > 16
39 #define RCU_FANOUT_LEAF 16 39 #define RCU_FANOUT_LEAF 16
40 #else /* #if CONFIG_RCU_FANOUT > 16 */ 40 #else /* #if CONFIG_RCU_FANOUT > 16 */
41 #define RCU_FANOUT_LEAF (CONFIG_RCU_FANOUT) 41 #define RCU_FANOUT_LEAF (CONFIG_RCU_FANOUT)
42 #endif /* #else #if CONFIG_RCU_FANOUT > 16 */ 42 #endif /* #else #if CONFIG_RCU_FANOUT > 16 */
43 #define RCU_FANOUT_1 (RCU_FANOUT_LEAF) 43 #define RCU_FANOUT_1 (RCU_FANOUT_LEAF)
44 #define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT) 44 #define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT)
45 #define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT) 45 #define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT)
46 #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) 46 #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
47 47
48 #if NR_CPUS <= RCU_FANOUT_1 48 #if NR_CPUS <= RCU_FANOUT_1
49 # define NUM_RCU_LVLS 1 49 # define NUM_RCU_LVLS 1
50 # define NUM_RCU_LVL_0 1 50 # define NUM_RCU_LVL_0 1
51 # define NUM_RCU_LVL_1 (NR_CPUS) 51 # define NUM_RCU_LVL_1 (NR_CPUS)
52 # define NUM_RCU_LVL_2 0 52 # define NUM_RCU_LVL_2 0
53 # define NUM_RCU_LVL_3 0 53 # define NUM_RCU_LVL_3 0
54 # define NUM_RCU_LVL_4 0 54 # define NUM_RCU_LVL_4 0
55 #elif NR_CPUS <= RCU_FANOUT_2 55 #elif NR_CPUS <= RCU_FANOUT_2
56 # define NUM_RCU_LVLS 2 56 # define NUM_RCU_LVLS 2
57 # define NUM_RCU_LVL_0 1 57 # define NUM_RCU_LVL_0 1
58 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) 58 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
59 # define NUM_RCU_LVL_2 (NR_CPUS) 59 # define NUM_RCU_LVL_2 (NR_CPUS)
60 # define NUM_RCU_LVL_3 0 60 # define NUM_RCU_LVL_3 0
61 # define NUM_RCU_LVL_4 0 61 # define NUM_RCU_LVL_4 0
62 #elif NR_CPUS <= RCU_FANOUT_3 62 #elif NR_CPUS <= RCU_FANOUT_3
63 # define NUM_RCU_LVLS 3 63 # define NUM_RCU_LVLS 3
64 # define NUM_RCU_LVL_0 1 64 # define NUM_RCU_LVL_0 1
65 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) 65 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
66 # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) 66 # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
67 # define NUM_RCU_LVL_3 (NR_CPUS) 67 # define NUM_RCU_LVL_3 (NR_CPUS)
68 # define NUM_RCU_LVL_4 0 68 # define NUM_RCU_LVL_4 0
69 #elif NR_CPUS <= RCU_FANOUT_4 69 #elif NR_CPUS <= RCU_FANOUT_4
70 # define NUM_RCU_LVLS 4 70 # define NUM_RCU_LVLS 4
71 # define NUM_RCU_LVL_0 1 71 # define NUM_RCU_LVL_0 1
72 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) 72 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
73 # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) 73 # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
74 # define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) 74 # define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
75 # define NUM_RCU_LVL_4 (NR_CPUS) 75 # define NUM_RCU_LVL_4 (NR_CPUS)
76 #else 76 #else
77 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" 77 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
78 #endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */ 78 #endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
79 79
80 #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) 80 #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
81 #define NUM_RCU_NODES (RCU_SUM - NR_CPUS) 81 #define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
82 82
83 /* 83 /*
84 * Dynticks per-CPU state. 84 * Dynticks per-CPU state.
85 */ 85 */
86 struct rcu_dynticks { 86 struct rcu_dynticks {
87 int dynticks_nesting; /* Track irq/process nesting level. */ 87 int dynticks_nesting; /* Track irq/process nesting level. */
88 int dynticks_nmi_nesting; /* Track NMI nesting level. */ 88 int dynticks_nmi_nesting; /* Track NMI nesting level. */
89 atomic_t dynticks; /* Even value for dynticks-idle, else odd. */ 89 atomic_t dynticks; /* Even value for dynticks-idle, else odd. */
90 }; 90 };
91 91
92 /* RCU's kthread states for tracing. */ 92 /* RCU's kthread states for tracing. */
93 #define RCU_KTHREAD_STOPPED 0 93 #define RCU_KTHREAD_STOPPED 0
94 #define RCU_KTHREAD_RUNNING 1 94 #define RCU_KTHREAD_RUNNING 1
95 #define RCU_KTHREAD_WAITING 2 95 #define RCU_KTHREAD_WAITING 2
96 #define RCU_KTHREAD_OFFCPU 3 96 #define RCU_KTHREAD_OFFCPU 3
97 #define RCU_KTHREAD_YIELDING 4 97 #define RCU_KTHREAD_YIELDING 4
98 #define RCU_KTHREAD_MAX 4 98 #define RCU_KTHREAD_MAX 4
99 99
100 /* 100 /*
101 * Definition for node within the RCU grace-period-detection hierarchy. 101 * Definition for node within the RCU grace-period-detection hierarchy.
102 */ 102 */
103 struct rcu_node { 103 struct rcu_node {
104 raw_spinlock_t lock; /* Root rcu_node's lock protects some */ 104 raw_spinlock_t lock; /* Root rcu_node's lock protects some */
105 /* rcu_state fields as well as following. */ 105 /* rcu_state fields as well as following. */
106 unsigned long gpnum; /* Current grace period for this node. */ 106 unsigned long gpnum; /* Current grace period for this node. */
107 /* This will either be equal to or one */ 107 /* This will either be equal to or one */
108 /* behind the root rcu_node's gpnum. */ 108 /* behind the root rcu_node's gpnum. */
109 unsigned long completed; /* Last GP completed for this node. */ 109 unsigned long completed; /* Last GP completed for this node. */
110 /* This will either be equal to or one */ 110 /* This will either be equal to or one */
111 /* behind the root rcu_node's gpnum. */ 111 /* behind the root rcu_node's gpnum. */
112 unsigned long qsmask; /* CPUs or groups that need to switch in */ 112 unsigned long qsmask; /* CPUs or groups that need to switch in */
113 /* order for current grace period to proceed.*/ 113 /* order for current grace period to proceed.*/
114 /* In leaf rcu_node, each bit corresponds to */ 114 /* In leaf rcu_node, each bit corresponds to */
115 /* an rcu_data structure, otherwise, each */ 115 /* an rcu_data structure, otherwise, each */
116 /* bit corresponds to a child rcu_node */ 116 /* bit corresponds to a child rcu_node */
117 /* structure. */ 117 /* structure. */
118 unsigned long expmask; /* Groups that have ->blkd_tasks */ 118 unsigned long expmask; /* Groups that have ->blkd_tasks */
119 /* elements that need to drain to allow the */ 119 /* elements that need to drain to allow the */
120 /* current expedited grace period to */ 120 /* current expedited grace period to */
121 /* complete (only for TREE_PREEMPT_RCU). */ 121 /* complete (only for TREE_PREEMPT_RCU). */
122 unsigned long wakemask; /* CPUs whose kthread needs to be awakened. */ 122 unsigned long wakemask; /* CPUs whose kthread needs to be awakened. */
123 unsigned long qsmaskinit; 123 unsigned long qsmaskinit;
124 /* Per-GP initial value for qsmask & expmask. */ 124 /* Per-GP initial value for qsmask & expmask. */
125 unsigned long grpmask; /* Mask to apply to parent qsmask. */ 125 unsigned long grpmask; /* Mask to apply to parent qsmask. */
126 /* Only one bit will be set in this mask. */ 126 /* Only one bit will be set in this mask. */
127 int grplo; /* lowest-numbered CPU or group here. */ 127 int grplo; /* lowest-numbered CPU or group here. */
128 int grphi; /* highest-numbered CPU or group here. */ 128 int grphi; /* highest-numbered CPU or group here. */
129 u8 grpnum; /* CPU/group number for next level up. */ 129 u8 grpnum; /* CPU/group number for next level up. */
130 u8 level; /* root is at level 0. */ 130 u8 level; /* root is at level 0. */
131 struct rcu_node *parent; 131 struct rcu_node *parent;
132 struct list_head blkd_tasks; 132 struct list_head blkd_tasks;
133 /* Tasks blocked in RCU read-side critical */ 133 /* Tasks blocked in RCU read-side critical */
134 /* section. Tasks are placed at the head */ 134 /* section. Tasks are placed at the head */
135 /* of this list and age towards the tail. */ 135 /* of this list and age towards the tail. */
136 struct list_head *gp_tasks; 136 struct list_head *gp_tasks;
137 /* Pointer to the first task blocking the */ 137 /* Pointer to the first task blocking the */
138 /* current grace period, or NULL if there */ 138 /* current grace period, or NULL if there */
139 /* is no such task. */ 139 /* is no such task. */
140 struct list_head *exp_tasks; 140 struct list_head *exp_tasks;
141 /* Pointer to the first task blocking the */ 141 /* Pointer to the first task blocking the */
142 /* current expedited grace period, or NULL */ 142 /* current expedited grace period, or NULL */
143 /* if there is no such task. If there */ 143 /* if there is no such task. If there */
144 /* is no current expedited grace period, */ 144 /* is no current expedited grace period, */
145 /* then there can cannot be any such task. */ 145 /* then there can cannot be any such task. */
146 #ifdef CONFIG_RCU_BOOST 146 #ifdef CONFIG_RCU_BOOST
147 struct list_head *boost_tasks; 147 struct list_head *boost_tasks;
148 /* Pointer to first task that needs to be */ 148 /* Pointer to first task that needs to be */
149 /* priority boosted, or NULL if no priority */ 149 /* priority boosted, or NULL if no priority */
150 /* boosting is needed for this rcu_node */ 150 /* boosting is needed for this rcu_node */
151 /* structure. If there are no tasks */ 151 /* structure. If there are no tasks */
152 /* queued on this rcu_node structure that */ 152 /* queued on this rcu_node structure that */
153 /* are blocking the current grace period, */ 153 /* are blocking the current grace period, */
154 /* there can be no such task. */ 154 /* there can be no such task. */
155 unsigned long boost_time; 155 unsigned long boost_time;
156 /* When to start boosting (jiffies). */ 156 /* When to start boosting (jiffies). */
157 struct task_struct *boost_kthread_task; 157 struct task_struct *boost_kthread_task;
158 /* kthread that takes care of priority */ 158 /* kthread that takes care of priority */
159 /* boosting for this rcu_node structure. */ 159 /* boosting for this rcu_node structure. */
160 wait_queue_head_t boost_wq; 160 wait_queue_head_t boost_wq;
161 /* Wait queue on which to park the boost */ 161 /* Wait queue on which to park the boost */
162 /* kthread. */ 162 /* kthread. */
163 unsigned int boost_kthread_status; 163 unsigned int boost_kthread_status;
164 /* State of boost_kthread_task for tracing. */ 164 /* State of boost_kthread_task for tracing. */
165 unsigned long n_tasks_boosted; 165 unsigned long n_tasks_boosted;
166 /* Total number of tasks boosted. */ 166 /* Total number of tasks boosted. */
167 unsigned long n_exp_boosts; 167 unsigned long n_exp_boosts;
168 /* Number of tasks boosted for expedited GP. */ 168 /* Number of tasks boosted for expedited GP. */
169 unsigned long n_normal_boosts; 169 unsigned long n_normal_boosts;
170 /* Number of tasks boosted for normal GP. */ 170 /* Number of tasks boosted for normal GP. */
171 unsigned long n_balk_blkd_tasks; 171 unsigned long n_balk_blkd_tasks;
172 /* Refused to boost: no blocked tasks. */ 172 /* Refused to boost: no blocked tasks. */
173 unsigned long n_balk_exp_gp_tasks; 173 unsigned long n_balk_exp_gp_tasks;
174 /* Refused to boost: nothing blocking GP. */ 174 /* Refused to boost: nothing blocking GP. */
175 unsigned long n_balk_boost_tasks; 175 unsigned long n_balk_boost_tasks;
176 /* Refused to boost: already boosting. */ 176 /* Refused to boost: already boosting. */
177 unsigned long n_balk_notblocked; 177 unsigned long n_balk_notblocked;
178 /* Refused to boost: RCU RS CS still running. */ 178 /* Refused to boost: RCU RS CS still running. */
179 unsigned long n_balk_notyet; 179 unsigned long n_balk_notyet;
180 /* Refused to boost: not yet time. */ 180 /* Refused to boost: not yet time. */
181 unsigned long n_balk_nos; 181 unsigned long n_balk_nos;
182 /* Refused to boost: not sure why, though. */ 182 /* Refused to boost: not sure why, though. */
183 /* This can happen due to race conditions. */ 183 /* This can happen due to race conditions. */
184 #endif /* #ifdef CONFIG_RCU_BOOST */ 184 #endif /* #ifdef CONFIG_RCU_BOOST */
185 struct task_struct *node_kthread_task; 185 struct task_struct *node_kthread_task;
186 /* kthread that takes care of this rcu_node */ 186 /* kthread that takes care of this rcu_node */
187 /* structure, for example, awakening the */ 187 /* structure, for example, awakening the */
188 /* per-CPU kthreads as needed. */ 188 /* per-CPU kthreads as needed. */
189 wait_queue_head_t node_wq; 189 wait_queue_head_t node_wq;
190 /* Wait queue on which to park the per-node */ 190 /* Wait queue on which to park the per-node */
191 /* kthread. */ 191 /* kthread. */
192 unsigned int node_kthread_status; 192 unsigned int node_kthread_status;
193 /* State of node_kthread_task for tracing. */ 193 /* State of node_kthread_task for tracing. */
194 } ____cacheline_internodealigned_in_smp; 194 } ____cacheline_internodealigned_in_smp;
195 195
196 /* 196 /*
197 * Do a full breadth-first scan of the rcu_node structures for the 197 * Do a full breadth-first scan of the rcu_node structures for the
198 * specified rcu_state structure. 198 * specified rcu_state structure.
199 */ 199 */
200 #define rcu_for_each_node_breadth_first(rsp, rnp) \ 200 #define rcu_for_each_node_breadth_first(rsp, rnp) \
201 for ((rnp) = &(rsp)->node[0]; \ 201 for ((rnp) = &(rsp)->node[0]; \
202 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) 202 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
203 203
204 /* 204 /*
205 * Do a breadth-first scan of the non-leaf rcu_node structures for the 205 * Do a breadth-first scan of the non-leaf rcu_node structures for the
206 * specified rcu_state structure. Note that if there is a singleton 206 * specified rcu_state structure. Note that if there is a singleton
207 * rcu_node tree with but one rcu_node structure, this loop is a no-op. 207 * rcu_node tree with but one rcu_node structure, this loop is a no-op.
208 */ 208 */
209 #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ 209 #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
210 for ((rnp) = &(rsp)->node[0]; \ 210 for ((rnp) = &(rsp)->node[0]; \
211 (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++) 211 (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++)
212 212
213 /* 213 /*
214 * Scan the leaves of the rcu_node hierarchy for the specified rcu_state 214 * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
215 * structure. Note that if there is a singleton rcu_node tree with but 215 * structure. Note that if there is a singleton rcu_node tree with but
216 * one rcu_node structure, this loop -will- visit the rcu_node structure. 216 * one rcu_node structure, this loop -will- visit the rcu_node structure.
217 * It is still a leaf node, even if it is also the root node. 217 * It is still a leaf node, even if it is also the root node.
218 */ 218 */
219 #define rcu_for_each_leaf_node(rsp, rnp) \ 219 #define rcu_for_each_leaf_node(rsp, rnp) \
220 for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ 220 for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \
221 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) 221 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
222 222
223 /* Index values for nxttail array in struct rcu_data. */ 223 /* Index values for nxttail array in struct rcu_data. */
224 #define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ 224 #define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */
225 #define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */ 225 #define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */
226 #define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */ 226 #define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */
227 #define RCU_NEXT_TAIL 3 227 #define RCU_NEXT_TAIL 3
228 #define RCU_NEXT_SIZE 4 228 #define RCU_NEXT_SIZE 4
229 229
230 /* Per-CPU data for read-copy update. */ 230 /* Per-CPU data for read-copy update. */
231 struct rcu_data { 231 struct rcu_data {
232 /* 1) quiescent-state and grace-period handling : */ 232 /* 1) quiescent-state and grace-period handling : */
233 unsigned long completed; /* Track rsp->completed gp number */ 233 unsigned long completed; /* Track rsp->completed gp number */
234 /* in order to detect GP end. */ 234 /* in order to detect GP end. */
235 unsigned long gpnum; /* Highest gp number that this CPU */ 235 unsigned long gpnum; /* Highest gp number that this CPU */
236 /* is aware of having started. */ 236 /* is aware of having started. */
237 unsigned long passed_quiesc_completed; 237 unsigned long passed_quiesc_completed;
238 /* Value of completed at time of qs. */ 238 /* Value of completed at time of qs. */
239 bool passed_quiesc; /* User-mode/idle loop etc. */ 239 bool passed_quiesc; /* User-mode/idle loop etc. */
240 bool qs_pending; /* Core waits for quiesc state. */ 240 bool qs_pending; /* Core waits for quiesc state. */
241 bool beenonline; /* CPU online at least once. */ 241 bool beenonline; /* CPU online at least once. */
242 bool preemptible; /* Preemptible RCU? */ 242 bool preemptible; /* Preemptible RCU? */
243 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ 243 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
244 unsigned long grpmask; /* Mask to apply to leaf qsmask. */ 244 unsigned long grpmask; /* Mask to apply to leaf qsmask. */
245 245
246 /* 2) batch handling */ 246 /* 2) batch handling */
247 /* 247 /*
248 * If nxtlist is not NULL, it is partitioned as follows. 248 * If nxtlist is not NULL, it is partitioned as follows.
249 * Any of the partitions might be empty, in which case the 249 * Any of the partitions might be empty, in which case the
250 * pointer to that partition will be equal to the pointer for 250 * pointer to that partition will be equal to the pointer for
251 * the following partition. When the list is empty, all of 251 * the following partition. When the list is empty, all of
252 * the nxttail elements point to the ->nxtlist pointer itself, 252 * the nxttail elements point to the ->nxtlist pointer itself,
253 * which in that case is NULL. 253 * which in that case is NULL.
254 * 254 *
255 * [nxtlist, *nxttail[RCU_DONE_TAIL]): 255 * [nxtlist, *nxttail[RCU_DONE_TAIL]):
256 * Entries that batch # <= ->completed 256 * Entries that batch # <= ->completed
257 * The grace period for these entries has completed, and 257 * The grace period for these entries has completed, and
258 * the other grace-period-completed entries may be moved 258 * the other grace-period-completed entries may be moved
259 * here temporarily in rcu_process_callbacks(). 259 * here temporarily in rcu_process_callbacks().
260 * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]): 260 * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
261 * Entries that batch # <= ->completed - 1: waiting for current GP 261 * Entries that batch # <= ->completed - 1: waiting for current GP
262 * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]): 262 * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
263 * Entries known to have arrived before current GP ended 263 * Entries known to have arrived before current GP ended
264 * [*nxttail[RCU_NEXT_READY_TAIL], *nxttail[RCU_NEXT_TAIL]): 264 * [*nxttail[RCU_NEXT_READY_TAIL], *nxttail[RCU_NEXT_TAIL]):
265 * Entries that might have arrived after current GP ended 265 * Entries that might have arrived after current GP ended
266 * Note that the value of *nxttail[RCU_NEXT_TAIL] will 266 * Note that the value of *nxttail[RCU_NEXT_TAIL] will
267 * always be NULL, as this is the end of the list. 267 * always be NULL, as this is the end of the list.
268 */ 268 */
269 struct rcu_head *nxtlist; 269 struct rcu_head *nxtlist;
270 struct rcu_head **nxttail[RCU_NEXT_SIZE]; 270 struct rcu_head **nxttail[RCU_NEXT_SIZE];
271 long qlen; /* # of queued callbacks */ 271 long qlen; /* # of queued callbacks */
272 long qlen_last_fqs_check; 272 long qlen_last_fqs_check;
273 /* qlen at last check for QS forcing */ 273 /* qlen at last check for QS forcing */
274 unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ 274 unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */
275 unsigned long n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */ 275 unsigned long n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */
276 unsigned long n_cbs_adopted; /* RCU cbs adopted from dying CPU */ 276 unsigned long n_cbs_adopted; /* RCU cbs adopted from dying CPU */
277 unsigned long n_force_qs_snap; 277 unsigned long n_force_qs_snap;
278 /* did other CPU force QS recently? */ 278 /* did other CPU force QS recently? */
279 long blimit; /* Upper limit on a processed batch */ 279 long blimit; /* Upper limit on a processed batch */
280 280
281 #ifdef CONFIG_NO_HZ 281 #ifdef CONFIG_NO_HZ
282 /* 3) dynticks interface. */ 282 /* 3) dynticks interface. */
283 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ 283 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */
284 int dynticks_snap; /* Per-GP tracking for dynticks. */ 284 int dynticks_snap; /* Per-GP tracking for dynticks. */
285 #endif /* #ifdef CONFIG_NO_HZ */ 285 #endif /* #ifdef CONFIG_NO_HZ */
286 286
287 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ 287 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */
288 #ifdef CONFIG_NO_HZ 288 #ifdef CONFIG_NO_HZ
289 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ 289 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */
290 #endif /* #ifdef CONFIG_NO_HZ */ 290 #endif /* #ifdef CONFIG_NO_HZ */
291 unsigned long offline_fqs; /* Kicked due to being offline. */ 291 unsigned long offline_fqs; /* Kicked due to being offline. */
292 unsigned long resched_ipi; /* Sent a resched IPI. */ 292 unsigned long resched_ipi; /* Sent a resched IPI. */
293 293
294 /* 5) __rcu_pending() statistics. */ 294 /* 5) __rcu_pending() statistics. */
295 unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */ 295 unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */
296 unsigned long n_rp_qs_pending; 296 unsigned long n_rp_qs_pending;
297 unsigned long n_rp_report_qs; 297 unsigned long n_rp_report_qs;
298 unsigned long n_rp_cb_ready; 298 unsigned long n_rp_cb_ready;
299 unsigned long n_rp_cpu_needs_gp; 299 unsigned long n_rp_cpu_needs_gp;
300 unsigned long n_rp_gp_completed; 300 unsigned long n_rp_gp_completed;
301 unsigned long n_rp_gp_started; 301 unsigned long n_rp_gp_started;
302 unsigned long n_rp_need_fqs; 302 unsigned long n_rp_need_fqs;
303 unsigned long n_rp_need_nothing; 303 unsigned long n_rp_need_nothing;
304 304
305 int cpu; 305 int cpu;
306 }; 306 };
307 307
308 /* Values for signaled field in struct rcu_state. */ 308 /* Values for signaled field in struct rcu_state. */
309 #define RCU_GP_IDLE 0 /* No grace period in progress. */ 309 #define RCU_GP_IDLE 0 /* No grace period in progress. */
310 #define RCU_GP_INIT 1 /* Grace period being initialized. */ 310 #define RCU_GP_INIT 1 /* Grace period being initialized. */
311 #define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */ 311 #define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */
312 #define RCU_FORCE_QS 3 /* Need to force quiescent state. */ 312 #define RCU_FORCE_QS 3 /* Need to force quiescent state. */
313 #ifdef CONFIG_NO_HZ 313 #ifdef CONFIG_NO_HZ
314 #define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK 314 #define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK
315 #else /* #ifdef CONFIG_NO_HZ */ 315 #else /* #ifdef CONFIG_NO_HZ */
316 #define RCU_SIGNAL_INIT RCU_FORCE_QS 316 #define RCU_SIGNAL_INIT RCU_FORCE_QS
317 #endif /* #else #ifdef CONFIG_NO_HZ */ 317 #endif /* #else #ifdef CONFIG_NO_HZ */
318 318
319 #define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ 319 #define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
320 320
321 #ifdef CONFIG_PROVE_RCU 321 #ifdef CONFIG_PROVE_RCU
322 #define RCU_STALL_DELAY_DELTA (5 * HZ) 322 #define RCU_STALL_DELAY_DELTA (5 * HZ)
323 #else 323 #else
324 #define RCU_STALL_DELAY_DELTA 0 324 #define RCU_STALL_DELAY_DELTA 0
325 #endif 325 #endif
326 326
327 #define RCU_SECONDS_TILL_STALL_CHECK (CONFIG_RCU_CPU_STALL_TIMEOUT * HZ + \ 327 #define RCU_SECONDS_TILL_STALL_CHECK (CONFIG_RCU_CPU_STALL_TIMEOUT * HZ + \
328 RCU_STALL_DELAY_DELTA) 328 RCU_STALL_DELAY_DELTA)
329 /* for rsp->jiffies_stall */ 329 /* for rsp->jiffies_stall */
330 #define RCU_SECONDS_TILL_STALL_RECHECK (3 * RCU_SECONDS_TILL_STALL_CHECK + 30) 330 #define RCU_SECONDS_TILL_STALL_RECHECK (3 * RCU_SECONDS_TILL_STALL_CHECK + 30)
331 /* for rsp->jiffies_stall */ 331 /* for rsp->jiffies_stall */
332 #define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ 332 #define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
333 /* to take at least one */ 333 /* to take at least one */
334 /* scheduling clock irq */ 334 /* scheduling clock irq */
335 /* before ratting on them. */ 335 /* before ratting on them. */
336 336
337 337
338 /* 338 /*
339 * RCU global state, including node hierarchy. This hierarchy is 339 * RCU global state, including node hierarchy. This hierarchy is
340 * represented in "heap" form in a dense array. The root (first level) 340 * represented in "heap" form in a dense array. The root (first level)
341 * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second 341 * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second
342 * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]), 342 * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]),
343 * and the third level in ->node[m+1] and following (->node[m+1] referenced 343 * and the third level in ->node[m+1] and following (->node[m+1] referenced
344 * by ->level[2]). The number of levels is determined by the number of 344 * by ->level[2]). The number of levels is determined by the number of
345 * CPUs and by CONFIG_RCU_FANOUT. Small systems will have a "hierarchy" 345 * CPUs and by CONFIG_RCU_FANOUT. Small systems will have a "hierarchy"
346 * consisting of a single rcu_node. 346 * consisting of a single rcu_node.
347 */ 347 */
348 struct rcu_state { 348 struct rcu_state {
349 struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ 349 struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */
350 struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ 350 struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */
351 u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ 351 u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */
352 u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ 352 u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */
353 struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ 353 struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
354 354
355 /* The following fields are guarded by the root rcu_node's lock. */ 355 /* The following fields are guarded by the root rcu_node's lock. */
356 356
357 u8 signaled ____cacheline_internodealigned_in_smp; 357 u8 signaled ____cacheline_internodealigned_in_smp;
358 /* Force QS state. */ 358 /* Force QS state. */
359 u8 fqs_active; /* force_quiescent_state() */ 359 u8 fqs_active; /* force_quiescent_state() */
360 /* is running. */ 360 /* is running. */
361 u8 fqs_need_gp; /* A CPU was prevented from */ 361 u8 fqs_need_gp; /* A CPU was prevented from */
362 /* starting a new grace */ 362 /* starting a new grace */
363 /* period because */ 363 /* period because */
364 /* force_quiescent_state() */ 364 /* force_quiescent_state() */
365 /* was running. */ 365 /* was running. */
366 unsigned long gpnum; /* Current gp number. */ 366 unsigned long gpnum; /* Current gp number. */
367 unsigned long completed; /* # of last completed gp. */ 367 unsigned long completed; /* # of last completed gp. */
368 368
369 /* End of fields guarded by root rcu_node's lock. */ 369 /* End of fields guarded by root rcu_node's lock. */
370 370
371 raw_spinlock_t onofflock; /* exclude on/offline and */ 371 raw_spinlock_t onofflock; /* exclude on/offline and */
372 /* starting new GP. */ 372 /* starting new GP. */
373 raw_spinlock_t fqslock; /* Only one task forcing */ 373 raw_spinlock_t fqslock; /* Only one task forcing */
374 /* quiescent states. */ 374 /* quiescent states. */
375 unsigned long jiffies_force_qs; /* Time at which to invoke */ 375 unsigned long jiffies_force_qs; /* Time at which to invoke */
376 /* force_quiescent_state(). */ 376 /* force_quiescent_state(). */
377 unsigned long n_force_qs; /* Number of calls to */ 377 unsigned long n_force_qs; /* Number of calls to */
378 /* force_quiescent_state(). */ 378 /* force_quiescent_state(). */
379 unsigned long n_force_qs_lh; /* ~Number of calls leaving */ 379 unsigned long n_force_qs_lh; /* ~Number of calls leaving */
380 /* due to lock unavailable. */ 380 /* due to lock unavailable. */
381 unsigned long n_force_qs_ngp; /* Number of calls leaving */ 381 unsigned long n_force_qs_ngp; /* Number of calls leaving */
382 /* due to no GP active. */ 382 /* due to no GP active. */
383 unsigned long gp_start; /* Time at which GP started, */ 383 unsigned long gp_start; /* Time at which GP started, */
384 /* but in jiffies. */ 384 /* but in jiffies. */
385 unsigned long jiffies_stall; /* Time at which to check */ 385 unsigned long jiffies_stall; /* Time at which to check */
386 /* for CPU stalls. */ 386 /* for CPU stalls. */
387 unsigned long gp_max; /* Maximum GP duration in */ 387 unsigned long gp_max; /* Maximum GP duration in */
388 /* jiffies. */ 388 /* jiffies. */
389 char *name; /* Name of structure. */ 389 char *name; /* Name of structure. */
390 }; 390 };
391 391
392 /* Return values for rcu_preempt_offline_tasks(). */ 392 /* Return values for rcu_preempt_offline_tasks(). */
393 393
394 #define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */ 394 #define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */
395 /* GP were moved to root. */ 395 /* GP were moved to root. */
396 #define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */ 396 #define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */
397 /* GP were moved to root. */ 397 /* GP were moved to root. */
398 398
399 /* 399 /*
400 * RCU implementation internal declarations: 400 * RCU implementation internal declarations:
401 */ 401 */
402 extern struct rcu_state rcu_sched_state; 402 extern struct rcu_state rcu_sched_state;
403 DECLARE_PER_CPU(struct rcu_data, rcu_sched_data); 403 DECLARE_PER_CPU(struct rcu_data, rcu_sched_data);
404 404
405 extern struct rcu_state rcu_bh_state; 405 extern struct rcu_state rcu_bh_state;
406 DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); 406 DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
407 407
408 #ifdef CONFIG_TREE_PREEMPT_RCU 408 #ifdef CONFIG_TREE_PREEMPT_RCU
409 extern struct rcu_state rcu_preempt_state; 409 extern struct rcu_state rcu_preempt_state;
410 DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); 410 DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
411 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 411 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
412 412
413 #ifndef RCU_TREE_NONCORE 413 #ifndef RCU_TREE_NONCORE
414 414
415 /* Forward declarations for rcutree_plugin.h */ 415 /* Forward declarations for rcutree_plugin.h */
416 static void rcu_bootup_announce(void); 416 static void rcu_bootup_announce(void);
417 long rcu_batches_completed(void); 417 long rcu_batches_completed(void);
418 static void rcu_preempt_note_context_switch(int cpu); 418 static void rcu_preempt_note_context_switch(int cpu);
419 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); 419 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
420 #ifdef CONFIG_HOTPLUG_CPU 420 #ifdef CONFIG_HOTPLUG_CPU
421 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, 421 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
422 unsigned long flags); 422 unsigned long flags);
423 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 423 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
424 static void rcu_print_detail_task_stall(struct rcu_state *rsp); 424 static void rcu_print_detail_task_stall(struct rcu_state *rsp);
425 static void rcu_print_task_stall(struct rcu_node *rnp); 425 static void rcu_print_task_stall(struct rcu_node *rnp);
426 static void rcu_preempt_stall_reset(void); 426 static void rcu_preempt_stall_reset(void);
427 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); 427 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
428 #ifdef CONFIG_HOTPLUG_CPU 428 #ifdef CONFIG_HOTPLUG_CPU
429 static int rcu_preempt_offline_tasks(struct rcu_state *rsp, 429 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
430 struct rcu_node *rnp, 430 struct rcu_node *rnp,
431 struct rcu_data *rdp); 431 struct rcu_data *rdp);
432 static void rcu_preempt_offline_cpu(int cpu); 432 static void rcu_preempt_offline_cpu(int cpu);
433 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 433 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
434 static void rcu_preempt_check_callbacks(int cpu); 434 static void rcu_preempt_check_callbacks(int cpu);
435 static void rcu_preempt_process_callbacks(void); 435 static void rcu_preempt_process_callbacks(void);
436 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); 436 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
437 #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) 437 #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
438 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp); 438 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp);
439 #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ 439 #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
440 static int rcu_preempt_pending(int cpu); 440 static int rcu_preempt_pending(int cpu);
441 static int rcu_preempt_needs_cpu(int cpu); 441 static int rcu_preempt_needs_cpu(int cpu);
442 static void __cpuinit rcu_preempt_init_percpu_data(int cpu); 442 static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
443 static void rcu_preempt_send_cbs_to_online(void); 443 static void rcu_preempt_send_cbs_to_online(void);
444 static void __init __rcu_init_preempt(void); 444 static void __init __rcu_init_preempt(void);
445 static void rcu_needs_cpu_flush(void); 445 static void rcu_needs_cpu_flush(void);
446 static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp); 446 static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp);
447 static void rcu_initiate_boost(struct rcu_node *rnp); 447 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
448 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, 448 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
449 cpumask_var_t cm); 449 cpumask_var_t cm);
450 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); 450 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
451 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, 451 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
452 struct rcu_node *rnp, 452 struct rcu_node *rnp,
453 int rnp_index); 453 int rnp_index);
454 #ifdef CONFIG_HOTPLUG_CPU
455 static void rcu_stop_boost_kthread(struct rcu_node *rnp);
456 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
457 454
458 #endif /* #ifndef RCU_TREE_NONCORE */ 455 #endif /* #ifndef RCU_TREE_NONCORE */
459 456
kernel/rcutree_plugin.h
1 /* 1 /*
2 * Read-Copy Update mechanism for mutual exclusion (tree-based version) 2 * Read-Copy Update mechanism for mutual exclusion (tree-based version)
3 * Internal non-public definitions that provide either classic 3 * Internal non-public definitions that provide either classic
4 * or preemptible semantics. 4 * or preemptible semantics.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or 8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version. 9 * (at your option) any later version.
10 * 10 *
11 * This program is distributed in the hope that it will be useful, 11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details. 14 * GNU General Public License for more details.
15 * 15 *
16 * You should have received a copy of the GNU General Public License 16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software 17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 * 19 *
20 * Copyright Red Hat, 2009 20 * Copyright Red Hat, 2009
21 * Copyright IBM Corporation, 2009 21 * Copyright IBM Corporation, 2009
22 * 22 *
23 * Author: Ingo Molnar <mingo@elte.hu> 23 * Author: Ingo Molnar <mingo@elte.hu>
24 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> 24 * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
25 */ 25 */
26 26
27 #include <linux/delay.h> 27 #include <linux/delay.h>
28 #include <linux/stop_machine.h> 28 #include <linux/stop_machine.h>
29 29
30 /* 30 /*
31 * Check the RCU kernel configuration parameters and print informative 31 * Check the RCU kernel configuration parameters and print informative
32 * messages about anything out of the ordinary. If you like #ifdef, you 32 * messages about anything out of the ordinary. If you like #ifdef, you
33 * will love this function. 33 * will love this function.
34 */ 34 */
35 static void __init rcu_bootup_announce_oddness(void) 35 static void __init rcu_bootup_announce_oddness(void)
36 { 36 {
37 #ifdef CONFIG_RCU_TRACE 37 #ifdef CONFIG_RCU_TRACE
38 printk(KERN_INFO "\tRCU debugfs-based tracing is enabled.\n"); 38 printk(KERN_INFO "\tRCU debugfs-based tracing is enabled.\n");
39 #endif 39 #endif
40 #if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32) 40 #if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)
41 printk(KERN_INFO "\tCONFIG_RCU_FANOUT set to non-default value of %d\n", 41 printk(KERN_INFO "\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
42 CONFIG_RCU_FANOUT); 42 CONFIG_RCU_FANOUT);
43 #endif 43 #endif
44 #ifdef CONFIG_RCU_FANOUT_EXACT 44 #ifdef CONFIG_RCU_FANOUT_EXACT
45 printk(KERN_INFO "\tHierarchical RCU autobalancing is disabled.\n"); 45 printk(KERN_INFO "\tHierarchical RCU autobalancing is disabled.\n");
46 #endif 46 #endif
47 #ifdef CONFIG_RCU_FAST_NO_HZ 47 #ifdef CONFIG_RCU_FAST_NO_HZ
48 printk(KERN_INFO 48 printk(KERN_INFO
49 "\tRCU dyntick-idle grace-period acceleration is enabled.\n"); 49 "\tRCU dyntick-idle grace-period acceleration is enabled.\n");
50 #endif 50 #endif
51 #ifdef CONFIG_PROVE_RCU 51 #ifdef CONFIG_PROVE_RCU
52 printk(KERN_INFO "\tRCU lockdep checking is enabled.\n"); 52 printk(KERN_INFO "\tRCU lockdep checking is enabled.\n");
53 #endif 53 #endif
54 #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE 54 #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE
55 printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); 55 printk(KERN_INFO "\tRCU torture testing starts during boot.\n");
56 #endif 56 #endif
57 #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) 57 #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE)
58 printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); 58 printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n");
59 #endif 59 #endif
60 #if NUM_RCU_LVL_4 != 0 60 #if NUM_RCU_LVL_4 != 0
61 printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); 61 printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n");
62 #endif 62 #endif
63 } 63 }
64 64
65 #ifdef CONFIG_TREE_PREEMPT_RCU 65 #ifdef CONFIG_TREE_PREEMPT_RCU
66 66
67 struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); 67 struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
68 DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); 68 DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
69 static struct rcu_state *rcu_state = &rcu_preempt_state; 69 static struct rcu_state *rcu_state = &rcu_preempt_state;
70 70
71 static int rcu_preempted_readers_exp(struct rcu_node *rnp); 71 static int rcu_preempted_readers_exp(struct rcu_node *rnp);
72 72
73 /* 73 /*
74 * Tell them what RCU they are running. 74 * Tell them what RCU they are running.
75 */ 75 */
76 static void __init rcu_bootup_announce(void) 76 static void __init rcu_bootup_announce(void)
77 { 77 {
78 printk(KERN_INFO "Preemptible hierarchical RCU implementation.\n"); 78 printk(KERN_INFO "Preemptible hierarchical RCU implementation.\n");
79 rcu_bootup_announce_oddness(); 79 rcu_bootup_announce_oddness();
80 } 80 }
81 81
82 /* 82 /*
83 * Return the number of RCU-preempt batches processed thus far 83 * Return the number of RCU-preempt batches processed thus far
84 * for debug and statistics. 84 * for debug and statistics.
85 */ 85 */
86 long rcu_batches_completed_preempt(void) 86 long rcu_batches_completed_preempt(void)
87 { 87 {
88 return rcu_preempt_state.completed; 88 return rcu_preempt_state.completed;
89 } 89 }
90 EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt); 90 EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
91 91
92 /* 92 /*
93 * Return the number of RCU batches processed thus far for debug & stats. 93 * Return the number of RCU batches processed thus far for debug & stats.
94 */ 94 */
95 long rcu_batches_completed(void) 95 long rcu_batches_completed(void)
96 { 96 {
97 return rcu_batches_completed_preempt(); 97 return rcu_batches_completed_preempt();
98 } 98 }
99 EXPORT_SYMBOL_GPL(rcu_batches_completed); 99 EXPORT_SYMBOL_GPL(rcu_batches_completed);
100 100
101 /* 101 /*
102 * Force a quiescent state for preemptible RCU. 102 * Force a quiescent state for preemptible RCU.
103 */ 103 */
104 void rcu_force_quiescent_state(void) 104 void rcu_force_quiescent_state(void)
105 { 105 {
106 force_quiescent_state(&rcu_preempt_state, 0); 106 force_quiescent_state(&rcu_preempt_state, 0);
107 } 107 }
108 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 108 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
109 109
110 /* 110 /*
111 * Record a preemptible-RCU quiescent state for the specified CPU. Note 111 * Record a preemptible-RCU quiescent state for the specified CPU. Note
112 * that this just means that the task currently running on the CPU is 112 * that this just means that the task currently running on the CPU is
113 * not in a quiescent state. There might be any number of tasks blocked 113 * not in a quiescent state. There might be any number of tasks blocked
114 * while in an RCU read-side critical section. 114 * while in an RCU read-side critical section.
115 * 115 *
116 * Unlike the other rcu_*_qs() functions, callers to this function 116 * Unlike the other rcu_*_qs() functions, callers to this function
117 * must disable irqs in order to protect the assignment to 117 * must disable irqs in order to protect the assignment to
118 * ->rcu_read_unlock_special. 118 * ->rcu_read_unlock_special.
119 */ 119 */
120 static void rcu_preempt_qs(int cpu) 120 static void rcu_preempt_qs(int cpu)
121 { 121 {
122 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 122 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
123 123
124 rdp->passed_quiesc_completed = rdp->gpnum - 1; 124 rdp->passed_quiesc_completed = rdp->gpnum - 1;
125 barrier(); 125 barrier();
126 rdp->passed_quiesc = 1; 126 rdp->passed_quiesc = 1;
127 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 127 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
128 } 128 }
129 129
130 /* 130 /*
131 * We have entered the scheduler, and the current task might soon be 131 * We have entered the scheduler, and the current task might soon be
132 * context-switched away from. If this task is in an RCU read-side 132 * context-switched away from. If this task is in an RCU read-side
133 * critical section, we will no longer be able to rely on the CPU to 133 * critical section, we will no longer be able to rely on the CPU to
134 * record that fact, so we enqueue the task on the blkd_tasks list. 134 * record that fact, so we enqueue the task on the blkd_tasks list.
135 * The task will dequeue itself when it exits the outermost enclosing 135 * The task will dequeue itself when it exits the outermost enclosing
136 * RCU read-side critical section. Therefore, the current grace period 136 * RCU read-side critical section. Therefore, the current grace period
137 * cannot be permitted to complete until the blkd_tasks list entries 137 * cannot be permitted to complete until the blkd_tasks list entries
138 * predating the current grace period drain, in other words, until 138 * predating the current grace period drain, in other words, until
139 * rnp->gp_tasks becomes NULL. 139 * rnp->gp_tasks becomes NULL.
140 * 140 *
141 * Caller must disable preemption. 141 * Caller must disable preemption.
142 */ 142 */
143 static void rcu_preempt_note_context_switch(int cpu) 143 static void rcu_preempt_note_context_switch(int cpu)
144 { 144 {
145 struct task_struct *t = current; 145 struct task_struct *t = current;
146 unsigned long flags; 146 unsigned long flags;
147 struct rcu_data *rdp; 147 struct rcu_data *rdp;
148 struct rcu_node *rnp; 148 struct rcu_node *rnp;
149 149
150 if (t->rcu_read_lock_nesting && 150 if (t->rcu_read_lock_nesting &&
151 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { 151 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
152 152
153 /* Possibly blocking in an RCU read-side critical section. */ 153 /* Possibly blocking in an RCU read-side critical section. */
154 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); 154 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
155 rnp = rdp->mynode; 155 rnp = rdp->mynode;
156 raw_spin_lock_irqsave(&rnp->lock, flags); 156 raw_spin_lock_irqsave(&rnp->lock, flags);
157 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 157 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
158 t->rcu_blocked_node = rnp; 158 t->rcu_blocked_node = rnp;
159 159
160 /* 160 /*
161 * If this CPU has already checked in, then this task 161 * If this CPU has already checked in, then this task
162 * will hold up the next grace period rather than the 162 * will hold up the next grace period rather than the
163 * current grace period. Queue the task accordingly. 163 * current grace period. Queue the task accordingly.
164 * If the task is queued for the current grace period 164 * If the task is queued for the current grace period
165 * (i.e., this CPU has not yet passed through a quiescent 165 * (i.e., this CPU has not yet passed through a quiescent
166 * state for the current grace period), then as long 166 * state for the current grace period), then as long
167 * as that task remains queued, the current grace period 167 * as that task remains queued, the current grace period
168 * cannot end. Note that there is some uncertainty as 168 * cannot end. Note that there is some uncertainty as
169 * to exactly when the current grace period started. 169 * to exactly when the current grace period started.
170 * We take a conservative approach, which can result 170 * We take a conservative approach, which can result
171 * in unnecessarily waiting on tasks that started very 171 * in unnecessarily waiting on tasks that started very
172 * slightly after the current grace period began. C'est 172 * slightly after the current grace period began. C'est
173 * la vie!!! 173 * la vie!!!
174 * 174 *
175 * But first, note that the current CPU must still be 175 * But first, note that the current CPU must still be
176 * on line! 176 * on line!
177 */ 177 */
178 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); 178 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
179 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); 179 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
180 if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) { 180 if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
181 list_add(&t->rcu_node_entry, rnp->gp_tasks->prev); 181 list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
182 rnp->gp_tasks = &t->rcu_node_entry; 182 rnp->gp_tasks = &t->rcu_node_entry;
183 #ifdef CONFIG_RCU_BOOST 183 #ifdef CONFIG_RCU_BOOST
184 if (rnp->boost_tasks != NULL) 184 if (rnp->boost_tasks != NULL)
185 rnp->boost_tasks = rnp->gp_tasks; 185 rnp->boost_tasks = rnp->gp_tasks;
186 #endif /* #ifdef CONFIG_RCU_BOOST */ 186 #endif /* #ifdef CONFIG_RCU_BOOST */
187 } else { 187 } else {
188 list_add(&t->rcu_node_entry, &rnp->blkd_tasks); 188 list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
189 if (rnp->qsmask & rdp->grpmask) 189 if (rnp->qsmask & rdp->grpmask)
190 rnp->gp_tasks = &t->rcu_node_entry; 190 rnp->gp_tasks = &t->rcu_node_entry;
191 } 191 }
192 raw_spin_unlock_irqrestore(&rnp->lock, flags); 192 raw_spin_unlock_irqrestore(&rnp->lock, flags);
193 } 193 }
194 194
195 /* 195 /*
196 * Either we were not in an RCU read-side critical section to 196 * Either we were not in an RCU read-side critical section to
197 * begin with, or we have now recorded that critical section 197 * begin with, or we have now recorded that critical section
198 * globally. Either way, we can now note a quiescent state 198 * globally. Either way, we can now note a quiescent state
199 * for this CPU. Again, if we were in an RCU read-side critical 199 * for this CPU. Again, if we were in an RCU read-side critical
200 * section, and if that critical section was blocking the current 200 * section, and if that critical section was blocking the current
201 * grace period, then the fact that the task has been enqueued 201 * grace period, then the fact that the task has been enqueued
202 * means that we continue to block the current grace period. 202 * means that we continue to block the current grace period.
203 */ 203 */
204 local_irq_save(flags); 204 local_irq_save(flags);
205 rcu_preempt_qs(cpu); 205 rcu_preempt_qs(cpu);
206 local_irq_restore(flags); 206 local_irq_restore(flags);
207 } 207 }
208 208
209 /* 209 /*
210 * Tree-preemptible RCU implementation for rcu_read_lock(). 210 * Tree-preemptible RCU implementation for rcu_read_lock().
211 * Just increment ->rcu_read_lock_nesting, shared state will be updated 211 * Just increment ->rcu_read_lock_nesting, shared state will be updated
212 * if we block. 212 * if we block.
213 */ 213 */
214 void __rcu_read_lock(void) 214 void __rcu_read_lock(void)
215 { 215 {
216 current->rcu_read_lock_nesting++; 216 current->rcu_read_lock_nesting++;
217 barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */ 217 barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */
218 } 218 }
219 EXPORT_SYMBOL_GPL(__rcu_read_lock); 219 EXPORT_SYMBOL_GPL(__rcu_read_lock);
220 220
221 /* 221 /*
222 * Check for preempted RCU readers blocking the current grace period 222 * Check for preempted RCU readers blocking the current grace period
223 * for the specified rcu_node structure. If the caller needs a reliable 223 * for the specified rcu_node structure. If the caller needs a reliable
224 * answer, it must hold the rcu_node's ->lock. 224 * answer, it must hold the rcu_node's ->lock.
225 */ 225 */
226 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) 226 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
227 { 227 {
228 return rnp->gp_tasks != NULL; 228 return rnp->gp_tasks != NULL;
229 } 229 }
230 230
231 /* 231 /*
232 * Record a quiescent state for all tasks that were previously queued 232 * Record a quiescent state for all tasks that were previously queued
233 * on the specified rcu_node structure and that were blocking the current 233 * on the specified rcu_node structure and that were blocking the current
234 * RCU grace period. The caller must hold the specified rnp->lock with 234 * RCU grace period. The caller must hold the specified rnp->lock with
235 * irqs disabled, and this lock is released upon return, but irqs remain 235 * irqs disabled, and this lock is released upon return, but irqs remain
236 * disabled. 236 * disabled.
237 */ 237 */
238 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) 238 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
239 __releases(rnp->lock) 239 __releases(rnp->lock)
240 { 240 {
241 unsigned long mask; 241 unsigned long mask;
242 struct rcu_node *rnp_p; 242 struct rcu_node *rnp_p;
243 243
244 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { 244 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
245 raw_spin_unlock_irqrestore(&rnp->lock, flags); 245 raw_spin_unlock_irqrestore(&rnp->lock, flags);
246 return; /* Still need more quiescent states! */ 246 return; /* Still need more quiescent states! */
247 } 247 }
248 248
249 rnp_p = rnp->parent; 249 rnp_p = rnp->parent;
250 if (rnp_p == NULL) { 250 if (rnp_p == NULL) {
251 /* 251 /*
252 * Either there is only one rcu_node in the tree, 252 * Either there is only one rcu_node in the tree,
253 * or tasks were kicked up to root rcu_node due to 253 * or tasks were kicked up to root rcu_node due to
254 * CPUs going offline. 254 * CPUs going offline.
255 */ 255 */
256 rcu_report_qs_rsp(&rcu_preempt_state, flags); 256 rcu_report_qs_rsp(&rcu_preempt_state, flags);
257 return; 257 return;
258 } 258 }
259 259
260 /* Report up the rest of the hierarchy. */ 260 /* Report up the rest of the hierarchy. */
261 mask = rnp->grpmask; 261 mask = rnp->grpmask;
262 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 262 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
263 raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */ 263 raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */
264 rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags); 264 rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
265 } 265 }
266 266
267 /* 267 /*
268 * Advance a ->blkd_tasks-list pointer to the next entry, instead 268 * Advance a ->blkd_tasks-list pointer to the next entry, instead
269 * returning NULL if at the end of the list. 269 * returning NULL if at the end of the list.
270 */ 270 */
271 static struct list_head *rcu_next_node_entry(struct task_struct *t, 271 static struct list_head *rcu_next_node_entry(struct task_struct *t,
272 struct rcu_node *rnp) 272 struct rcu_node *rnp)
273 { 273 {
274 struct list_head *np; 274 struct list_head *np;
275 275
276 np = t->rcu_node_entry.next; 276 np = t->rcu_node_entry.next;
277 if (np == &rnp->blkd_tasks) 277 if (np == &rnp->blkd_tasks)
278 np = NULL; 278 np = NULL;
279 return np; 279 return np;
280 } 280 }
281 281
282 /* 282 /*
283 * Handle special cases during rcu_read_unlock(), such as needing to 283 * Handle special cases during rcu_read_unlock(), such as needing to
284 * notify RCU core processing or task having blocked during the RCU 284 * notify RCU core processing or task having blocked during the RCU
285 * read-side critical section. 285 * read-side critical section.
286 */ 286 */
287 static void rcu_read_unlock_special(struct task_struct *t) 287 static void rcu_read_unlock_special(struct task_struct *t)
288 { 288 {
289 int empty; 289 int empty;
290 int empty_exp; 290 int empty_exp;
291 unsigned long flags; 291 unsigned long flags;
292 struct list_head *np; 292 struct list_head *np;
293 struct rcu_node *rnp; 293 struct rcu_node *rnp;
294 int special; 294 int special;
295 295
296 /* NMI handlers cannot block and cannot safely manipulate state. */ 296 /* NMI handlers cannot block and cannot safely manipulate state. */
297 if (in_nmi()) 297 if (in_nmi())
298 return; 298 return;
299 299
300 local_irq_save(flags); 300 local_irq_save(flags);
301 301
302 /* 302 /*
303 * If RCU core is waiting for this CPU to exit critical section, 303 * If RCU core is waiting for this CPU to exit critical section,
304 * let it know that we have done so. 304 * let it know that we have done so.
305 */ 305 */
306 special = t->rcu_read_unlock_special; 306 special = t->rcu_read_unlock_special;
307 if (special & RCU_READ_UNLOCK_NEED_QS) { 307 if (special & RCU_READ_UNLOCK_NEED_QS) {
308 rcu_preempt_qs(smp_processor_id()); 308 rcu_preempt_qs(smp_processor_id());
309 } 309 }
310 310
311 /* Hardware IRQ handlers cannot block. */ 311 /* Hardware IRQ handlers cannot block. */
312 if (in_irq()) { 312 if (in_irq()) {
313 local_irq_restore(flags); 313 local_irq_restore(flags);
314 return; 314 return;
315 } 315 }
316 316
317 /* Clean up if blocked during RCU read-side critical section. */ 317 /* Clean up if blocked during RCU read-side critical section. */
318 if (special & RCU_READ_UNLOCK_BLOCKED) { 318 if (special & RCU_READ_UNLOCK_BLOCKED) {
319 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; 319 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
320 320
321 /* 321 /*
322 * Remove this task from the list it blocked on. The 322 * Remove this task from the list it blocked on. The
323 * task can migrate while we acquire the lock, but at 323 * task can migrate while we acquire the lock, but at
324 * most one time. So at most two passes through loop. 324 * most one time. So at most two passes through loop.
325 */ 325 */
326 for (;;) { 326 for (;;) {
327 rnp = t->rcu_blocked_node; 327 rnp = t->rcu_blocked_node;
328 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 328 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
329 if (rnp == t->rcu_blocked_node) 329 if (rnp == t->rcu_blocked_node)
330 break; 330 break;
331 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 331 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
332 } 332 }
333 empty = !rcu_preempt_blocked_readers_cgp(rnp); 333 empty = !rcu_preempt_blocked_readers_cgp(rnp);
334 empty_exp = !rcu_preempted_readers_exp(rnp); 334 empty_exp = !rcu_preempted_readers_exp(rnp);
335 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ 335 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
336 np = rcu_next_node_entry(t, rnp); 336 np = rcu_next_node_entry(t, rnp);
337 list_del_init(&t->rcu_node_entry); 337 list_del_init(&t->rcu_node_entry);
338 if (&t->rcu_node_entry == rnp->gp_tasks) 338 if (&t->rcu_node_entry == rnp->gp_tasks)
339 rnp->gp_tasks = np; 339 rnp->gp_tasks = np;
340 if (&t->rcu_node_entry == rnp->exp_tasks) 340 if (&t->rcu_node_entry == rnp->exp_tasks)
341 rnp->exp_tasks = np; 341 rnp->exp_tasks = np;
342 #ifdef CONFIG_RCU_BOOST 342 #ifdef CONFIG_RCU_BOOST
343 if (&t->rcu_node_entry == rnp->boost_tasks) 343 if (&t->rcu_node_entry == rnp->boost_tasks)
344 rnp->boost_tasks = np; 344 rnp->boost_tasks = np;
345 #endif /* #ifdef CONFIG_RCU_BOOST */ 345 #endif /* #ifdef CONFIG_RCU_BOOST */
346 t->rcu_blocked_node = NULL; 346 t->rcu_blocked_node = NULL;
347 347
348 /* 348 /*
349 * If this was the last task on the current list, and if 349 * If this was the last task on the current list, and if
350 * we aren't waiting on any CPUs, report the quiescent state. 350 * we aren't waiting on any CPUs, report the quiescent state.
351 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. 351 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock.
352 */ 352 */
353 if (empty) 353 if (empty)
354 raw_spin_unlock_irqrestore(&rnp->lock, flags); 354 raw_spin_unlock_irqrestore(&rnp->lock, flags);
355 else 355 else
356 rcu_report_unblock_qs_rnp(rnp, flags); 356 rcu_report_unblock_qs_rnp(rnp, flags);
357 357
358 #ifdef CONFIG_RCU_BOOST 358 #ifdef CONFIG_RCU_BOOST
359 /* Unboost if we were boosted. */ 359 /* Unboost if we were boosted. */
360 if (special & RCU_READ_UNLOCK_BOOSTED) { 360 if (special & RCU_READ_UNLOCK_BOOSTED) {
361 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED; 361 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
362 rt_mutex_unlock(t->rcu_boost_mutex); 362 rt_mutex_unlock(t->rcu_boost_mutex);
363 t->rcu_boost_mutex = NULL; 363 t->rcu_boost_mutex = NULL;
364 } 364 }
365 #endif /* #ifdef CONFIG_RCU_BOOST */ 365 #endif /* #ifdef CONFIG_RCU_BOOST */
366 366
367 /* 367 /*
368 * If this was the last task on the expedited lists, 368 * If this was the last task on the expedited lists,
369 * then we need to report up the rcu_node hierarchy. 369 * then we need to report up the rcu_node hierarchy.
370 */ 370 */
371 if (!empty_exp && !rcu_preempted_readers_exp(rnp)) 371 if (!empty_exp && !rcu_preempted_readers_exp(rnp))
372 rcu_report_exp_rnp(&rcu_preempt_state, rnp); 372 rcu_report_exp_rnp(&rcu_preempt_state, rnp);
373 } else { 373 } else {
374 local_irq_restore(flags); 374 local_irq_restore(flags);
375 } 375 }
376 } 376 }
377 377
378 /* 378 /*
379 * Tree-preemptible RCU implementation for rcu_read_unlock(). 379 * Tree-preemptible RCU implementation for rcu_read_unlock().
380 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost 380 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
381 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then 381 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
382 * invoke rcu_read_unlock_special() to clean up after a context switch 382 * invoke rcu_read_unlock_special() to clean up after a context switch
383 * in an RCU read-side critical section and other special cases. 383 * in an RCU read-side critical section and other special cases.
384 */ 384 */
385 void __rcu_read_unlock(void) 385 void __rcu_read_unlock(void)
386 { 386 {
387 struct task_struct *t = current; 387 struct task_struct *t = current;
388 388
389 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ 389 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */
390 --t->rcu_read_lock_nesting; 390 --t->rcu_read_lock_nesting;
391 barrier(); /* decrement before load of ->rcu_read_unlock_special */ 391 barrier(); /* decrement before load of ->rcu_read_unlock_special */
392 if (t->rcu_read_lock_nesting == 0 && 392 if (t->rcu_read_lock_nesting == 0 &&
393 unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) 393 unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
394 rcu_read_unlock_special(t); 394 rcu_read_unlock_special(t);
395 #ifdef CONFIG_PROVE_LOCKING 395 #ifdef CONFIG_PROVE_LOCKING
396 WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0); 396 WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0);
397 #endif /* #ifdef CONFIG_PROVE_LOCKING */ 397 #endif /* #ifdef CONFIG_PROVE_LOCKING */
398 } 398 }
399 EXPORT_SYMBOL_GPL(__rcu_read_unlock); 399 EXPORT_SYMBOL_GPL(__rcu_read_unlock);
400 400
401 #ifdef CONFIG_RCU_CPU_STALL_VERBOSE 401 #ifdef CONFIG_RCU_CPU_STALL_VERBOSE
402 402
403 /* 403 /*
404 * Dump detailed information for all tasks blocking the current RCU 404 * Dump detailed information for all tasks blocking the current RCU
405 * grace period on the specified rcu_node structure. 405 * grace period on the specified rcu_node structure.
406 */ 406 */
407 static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) 407 static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
408 { 408 {
409 unsigned long flags; 409 unsigned long flags;
410 struct task_struct *t; 410 struct task_struct *t;
411 411
412 if (!rcu_preempt_blocked_readers_cgp(rnp)) 412 if (!rcu_preempt_blocked_readers_cgp(rnp))
413 return; 413 return;
414 raw_spin_lock_irqsave(&rnp->lock, flags); 414 raw_spin_lock_irqsave(&rnp->lock, flags);
415 t = list_entry(rnp->gp_tasks, 415 t = list_entry(rnp->gp_tasks,
416 struct task_struct, rcu_node_entry); 416 struct task_struct, rcu_node_entry);
417 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) 417 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
418 sched_show_task(t); 418 sched_show_task(t);
419 raw_spin_unlock_irqrestore(&rnp->lock, flags); 419 raw_spin_unlock_irqrestore(&rnp->lock, flags);
420 } 420 }
421 421
422 /* 422 /*
423 * Dump detailed information for all tasks blocking the current RCU 423 * Dump detailed information for all tasks blocking the current RCU
424 * grace period. 424 * grace period.
425 */ 425 */
426 static void rcu_print_detail_task_stall(struct rcu_state *rsp) 426 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
427 { 427 {
428 struct rcu_node *rnp = rcu_get_root(rsp); 428 struct rcu_node *rnp = rcu_get_root(rsp);
429 429
430 rcu_print_detail_task_stall_rnp(rnp); 430 rcu_print_detail_task_stall_rnp(rnp);
431 rcu_for_each_leaf_node(rsp, rnp) 431 rcu_for_each_leaf_node(rsp, rnp)
432 rcu_print_detail_task_stall_rnp(rnp); 432 rcu_print_detail_task_stall_rnp(rnp);
433 } 433 }
434 434
435 #else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ 435 #else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
436 436
437 static void rcu_print_detail_task_stall(struct rcu_state *rsp) 437 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
438 { 438 {
439 } 439 }
440 440
441 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ 441 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
442 442
443 /* 443 /*
444 * Scan the current list of tasks blocked within RCU read-side critical 444 * Scan the current list of tasks blocked within RCU read-side critical
445 * sections, printing out the tid of each. 445 * sections, printing out the tid of each.
446 */ 446 */
447 static void rcu_print_task_stall(struct rcu_node *rnp) 447 static void rcu_print_task_stall(struct rcu_node *rnp)
448 { 448 {
449 struct task_struct *t; 449 struct task_struct *t;
450 450
451 if (!rcu_preempt_blocked_readers_cgp(rnp)) 451 if (!rcu_preempt_blocked_readers_cgp(rnp))
452 return; 452 return;
453 t = list_entry(rnp->gp_tasks, 453 t = list_entry(rnp->gp_tasks,
454 struct task_struct, rcu_node_entry); 454 struct task_struct, rcu_node_entry);
455 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) 455 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
456 printk(" P%d", t->pid); 456 printk(" P%d", t->pid);
457 } 457 }
458 458
459 /* 459 /*
460 * Suppress preemptible RCU's CPU stall warnings by pushing the 460 * Suppress preemptible RCU's CPU stall warnings by pushing the
461 * time of the next stall-warning message comfortably far into the 461 * time of the next stall-warning message comfortably far into the
462 * future. 462 * future.
463 */ 463 */
464 static void rcu_preempt_stall_reset(void) 464 static void rcu_preempt_stall_reset(void)
465 { 465 {
466 rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; 466 rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2;
467 } 467 }
468 468
469 /* 469 /*
470 * Check that the list of blocked tasks for the newly completed grace 470 * Check that the list of blocked tasks for the newly completed grace
471 * period is in fact empty. It is a serious bug to complete a grace 471 * period is in fact empty. It is a serious bug to complete a grace
472 * period that still has RCU readers blocked! This function must be 472 * period that still has RCU readers blocked! This function must be
473 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock 473 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
474 * must be held by the caller. 474 * must be held by the caller.
475 * 475 *
476 * Also, if there are blocked tasks on the list, they automatically 476 * Also, if there are blocked tasks on the list, they automatically
477 * block the newly created grace period, so set up ->gp_tasks accordingly. 477 * block the newly created grace period, so set up ->gp_tasks accordingly.
478 */ 478 */
479 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 479 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
480 { 480 {
481 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); 481 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
482 if (!list_empty(&rnp->blkd_tasks)) 482 if (!list_empty(&rnp->blkd_tasks))
483 rnp->gp_tasks = rnp->blkd_tasks.next; 483 rnp->gp_tasks = rnp->blkd_tasks.next;
484 WARN_ON_ONCE(rnp->qsmask); 484 WARN_ON_ONCE(rnp->qsmask);
485 } 485 }
486 486
487 #ifdef CONFIG_HOTPLUG_CPU 487 #ifdef CONFIG_HOTPLUG_CPU
488 488
489 /* 489 /*
490 * Handle tasklist migration for case in which all CPUs covered by the 490 * Handle tasklist migration for case in which all CPUs covered by the
491 * specified rcu_node have gone offline. Move them up to the root 491 * specified rcu_node have gone offline. Move them up to the root
492 * rcu_node. The reason for not just moving them to the immediate 492 * rcu_node. The reason for not just moving them to the immediate
493 * parent is to remove the need for rcu_read_unlock_special() to 493 * parent is to remove the need for rcu_read_unlock_special() to
494 * make more than two attempts to acquire the target rcu_node's lock. 494 * make more than two attempts to acquire the target rcu_node's lock.
495 * Returns true if there were tasks blocking the current RCU grace 495 * Returns true if there were tasks blocking the current RCU grace
496 * period. 496 * period.
497 * 497 *
498 * Returns 1 if there was previously a task blocking the current grace 498 * Returns 1 if there was previously a task blocking the current grace
499 * period on the specified rcu_node structure. 499 * period on the specified rcu_node structure.
500 * 500 *
501 * The caller must hold rnp->lock with irqs disabled. 501 * The caller must hold rnp->lock with irqs disabled.
502 */ 502 */
503 static int rcu_preempt_offline_tasks(struct rcu_state *rsp, 503 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
504 struct rcu_node *rnp, 504 struct rcu_node *rnp,
505 struct rcu_data *rdp) 505 struct rcu_data *rdp)
506 { 506 {
507 struct list_head *lp; 507 struct list_head *lp;
508 struct list_head *lp_root; 508 struct list_head *lp_root;
509 int retval = 0; 509 int retval = 0;
510 struct rcu_node *rnp_root = rcu_get_root(rsp); 510 struct rcu_node *rnp_root = rcu_get_root(rsp);
511 struct task_struct *t; 511 struct task_struct *t;
512 512
513 if (rnp == rnp_root) { 513 if (rnp == rnp_root) {
514 WARN_ONCE(1, "Last CPU thought to be offlined?"); 514 WARN_ONCE(1, "Last CPU thought to be offlined?");
515 return 0; /* Shouldn't happen: at least one CPU online. */ 515 return 0; /* Shouldn't happen: at least one CPU online. */
516 } 516 }
517 517
518 /* If we are on an internal node, complain bitterly. */ 518 /* If we are on an internal node, complain bitterly. */
519 WARN_ON_ONCE(rnp != rdp->mynode); 519 WARN_ON_ONCE(rnp != rdp->mynode);
520 520
521 /* 521 /*
522 * Move tasks up to root rcu_node. Don't try to get fancy for 522 * Move tasks up to root rcu_node. Don't try to get fancy for
523 * this corner-case operation -- just put this node's tasks 523 * this corner-case operation -- just put this node's tasks
524 * at the head of the root node's list, and update the root node's 524 * at the head of the root node's list, and update the root node's
525 * ->gp_tasks and ->exp_tasks pointers to those of this node's, 525 * ->gp_tasks and ->exp_tasks pointers to those of this node's,
526 * if non-NULL. This might result in waiting for more tasks than 526 * if non-NULL. This might result in waiting for more tasks than
527 * absolutely necessary, but this is a good performance/complexity 527 * absolutely necessary, but this is a good performance/complexity
528 * tradeoff. 528 * tradeoff.
529 */ 529 */
530 if (rcu_preempt_blocked_readers_cgp(rnp)) 530 if (rcu_preempt_blocked_readers_cgp(rnp))
531 retval |= RCU_OFL_TASKS_NORM_GP; 531 retval |= RCU_OFL_TASKS_NORM_GP;
532 if (rcu_preempted_readers_exp(rnp)) 532 if (rcu_preempted_readers_exp(rnp))
533 retval |= RCU_OFL_TASKS_EXP_GP; 533 retval |= RCU_OFL_TASKS_EXP_GP;
534 lp = &rnp->blkd_tasks; 534 lp = &rnp->blkd_tasks;
535 lp_root = &rnp_root->blkd_tasks; 535 lp_root = &rnp_root->blkd_tasks;
536 while (!list_empty(lp)) { 536 while (!list_empty(lp)) {
537 t = list_entry(lp->next, typeof(*t), rcu_node_entry); 537 t = list_entry(lp->next, typeof(*t), rcu_node_entry);
538 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ 538 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
539 list_del(&t->rcu_node_entry); 539 list_del(&t->rcu_node_entry);
540 t->rcu_blocked_node = rnp_root; 540 t->rcu_blocked_node = rnp_root;
541 list_add(&t->rcu_node_entry, lp_root); 541 list_add(&t->rcu_node_entry, lp_root);
542 if (&t->rcu_node_entry == rnp->gp_tasks) 542 if (&t->rcu_node_entry == rnp->gp_tasks)
543 rnp_root->gp_tasks = rnp->gp_tasks; 543 rnp_root->gp_tasks = rnp->gp_tasks;
544 if (&t->rcu_node_entry == rnp->exp_tasks) 544 if (&t->rcu_node_entry == rnp->exp_tasks)
545 rnp_root->exp_tasks = rnp->exp_tasks; 545 rnp_root->exp_tasks = rnp->exp_tasks;
546 #ifdef CONFIG_RCU_BOOST 546 #ifdef CONFIG_RCU_BOOST
547 if (&t->rcu_node_entry == rnp->boost_tasks) 547 if (&t->rcu_node_entry == rnp->boost_tasks)
548 rnp_root->boost_tasks = rnp->boost_tasks; 548 rnp_root->boost_tasks = rnp->boost_tasks;
549 #endif /* #ifdef CONFIG_RCU_BOOST */ 549 #endif /* #ifdef CONFIG_RCU_BOOST */
550 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ 550 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
551 } 551 }
552 552
553 #ifdef CONFIG_RCU_BOOST 553 #ifdef CONFIG_RCU_BOOST
554 /* In case root is being boosted and leaf is not. */ 554 /* In case root is being boosted and leaf is not. */
555 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ 555 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
556 if (rnp_root->boost_tasks != NULL && 556 if (rnp_root->boost_tasks != NULL &&
557 rnp_root->boost_tasks != rnp_root->gp_tasks) 557 rnp_root->boost_tasks != rnp_root->gp_tasks)
558 rnp_root->boost_tasks = rnp_root->gp_tasks; 558 rnp_root->boost_tasks = rnp_root->gp_tasks;
559 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ 559 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
560 #endif /* #ifdef CONFIG_RCU_BOOST */ 560 #endif /* #ifdef CONFIG_RCU_BOOST */
561 561
562 rnp->gp_tasks = NULL; 562 rnp->gp_tasks = NULL;
563 rnp->exp_tasks = NULL; 563 rnp->exp_tasks = NULL;
564 return retval; 564 return retval;
565 } 565 }
566 566
567 /* 567 /*
568 * Do CPU-offline processing for preemptible RCU. 568 * Do CPU-offline processing for preemptible RCU.
569 */ 569 */
570 static void rcu_preempt_offline_cpu(int cpu) 570 static void rcu_preempt_offline_cpu(int cpu)
571 { 571 {
572 __rcu_offline_cpu(cpu, &rcu_preempt_state); 572 __rcu_offline_cpu(cpu, &rcu_preempt_state);
573 } 573 }
574 574
575 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 575 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
576 576
577 /* 577 /*
578 * Check for a quiescent state from the current CPU. When a task blocks, 578 * Check for a quiescent state from the current CPU. When a task blocks,
579 * the task is recorded in the corresponding CPU's rcu_node structure, 579 * the task is recorded in the corresponding CPU's rcu_node structure,
580 * which is checked elsewhere. 580 * which is checked elsewhere.
581 * 581 *
582 * Caller must disable hard irqs. 582 * Caller must disable hard irqs.
583 */ 583 */
584 static void rcu_preempt_check_callbacks(int cpu) 584 static void rcu_preempt_check_callbacks(int cpu)
585 { 585 {
586 struct task_struct *t = current; 586 struct task_struct *t = current;
587 587
588 if (t->rcu_read_lock_nesting == 0) { 588 if (t->rcu_read_lock_nesting == 0) {
589 rcu_preempt_qs(cpu); 589 rcu_preempt_qs(cpu);
590 return; 590 return;
591 } 591 }
592 if (per_cpu(rcu_preempt_data, cpu).qs_pending) 592 if (per_cpu(rcu_preempt_data, cpu).qs_pending)
593 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; 593 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
594 } 594 }
595 595
596 /* 596 /*
597 * Process callbacks for preemptible RCU. 597 * Process callbacks for preemptible RCU.
598 */ 598 */
599 static void rcu_preempt_process_callbacks(void) 599 static void rcu_preempt_process_callbacks(void)
600 { 600 {
601 __rcu_process_callbacks(&rcu_preempt_state, 601 __rcu_process_callbacks(&rcu_preempt_state,
602 &__get_cpu_var(rcu_preempt_data)); 602 &__get_cpu_var(rcu_preempt_data));
603 } 603 }
604 604
605 /* 605 /*
606 * Queue a preemptible-RCU callback for invocation after a grace period. 606 * Queue a preemptible-RCU callback for invocation after a grace period.
607 */ 607 */
608 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 608 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
609 { 609 {
610 __call_rcu(head, func, &rcu_preempt_state); 610 __call_rcu(head, func, &rcu_preempt_state);
611 } 611 }
612 EXPORT_SYMBOL_GPL(call_rcu); 612 EXPORT_SYMBOL_GPL(call_rcu);
613 613
614 /** 614 /**
615 * synchronize_rcu - wait until a grace period has elapsed. 615 * synchronize_rcu - wait until a grace period has elapsed.
616 * 616 *
617 * Control will return to the caller some time after a full grace 617 * Control will return to the caller some time after a full grace
618 * period has elapsed, in other words after all currently executing RCU 618 * period has elapsed, in other words after all currently executing RCU
619 * read-side critical sections have completed. Note, however, that 619 * read-side critical sections have completed. Note, however, that
620 * upon return from synchronize_rcu(), the caller might well be executing 620 * upon return from synchronize_rcu(), the caller might well be executing
621 * concurrently with new RCU read-side critical sections that began while 621 * concurrently with new RCU read-side critical sections that began while
622 * synchronize_rcu() was waiting. RCU read-side critical sections are 622 * synchronize_rcu() was waiting. RCU read-side critical sections are
623 * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. 623 * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
624 */ 624 */
625 void synchronize_rcu(void) 625 void synchronize_rcu(void)
626 { 626 {
627 struct rcu_synchronize rcu; 627 struct rcu_synchronize rcu;
628 628
629 if (!rcu_scheduler_active) 629 if (!rcu_scheduler_active)
630 return; 630 return;
631 631
632 init_rcu_head_on_stack(&rcu.head); 632 init_rcu_head_on_stack(&rcu.head);
633 init_completion(&rcu.completion); 633 init_completion(&rcu.completion);
634 /* Will wake me after RCU finished. */ 634 /* Will wake me after RCU finished. */
635 call_rcu(&rcu.head, wakeme_after_rcu); 635 call_rcu(&rcu.head, wakeme_after_rcu);
636 /* Wait for it. */ 636 /* Wait for it. */
637 wait_for_completion(&rcu.completion); 637 wait_for_completion(&rcu.completion);
638 destroy_rcu_head_on_stack(&rcu.head); 638 destroy_rcu_head_on_stack(&rcu.head);
639 } 639 }
640 EXPORT_SYMBOL_GPL(synchronize_rcu); 640 EXPORT_SYMBOL_GPL(synchronize_rcu);
641 641
642 static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); 642 static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
643 static long sync_rcu_preempt_exp_count; 643 static long sync_rcu_preempt_exp_count;
644 static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); 644 static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
645 645
646 /* 646 /*
647 * Return non-zero if there are any tasks in RCU read-side critical 647 * Return non-zero if there are any tasks in RCU read-side critical
648 * sections blocking the current preemptible-RCU expedited grace period. 648 * sections blocking the current preemptible-RCU expedited grace period.
649 * If there is no preemptible-RCU expedited grace period currently in 649 * If there is no preemptible-RCU expedited grace period currently in
650 * progress, returns zero unconditionally. 650 * progress, returns zero unconditionally.
651 */ 651 */
652 static int rcu_preempted_readers_exp(struct rcu_node *rnp) 652 static int rcu_preempted_readers_exp(struct rcu_node *rnp)
653 { 653 {
654 return rnp->exp_tasks != NULL; 654 return rnp->exp_tasks != NULL;
655 } 655 }
656 656
657 /* 657 /*
658 * return non-zero if there is no RCU expedited grace period in progress 658 * return non-zero if there is no RCU expedited grace period in progress
659 * for the specified rcu_node structure, in other words, if all CPUs and 659 * for the specified rcu_node structure, in other words, if all CPUs and
660 * tasks covered by the specified rcu_node structure have done their bit 660 * tasks covered by the specified rcu_node structure have done their bit
661 * for the current expedited grace period. Works only for preemptible 661 * for the current expedited grace period. Works only for preemptible
662 * RCU -- other RCU implementation use other means. 662 * RCU -- other RCU implementation use other means.
663 * 663 *
664 * Caller must hold sync_rcu_preempt_exp_mutex. 664 * Caller must hold sync_rcu_preempt_exp_mutex.
665 */ 665 */
666 static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) 666 static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
667 { 667 {
668 return !rcu_preempted_readers_exp(rnp) && 668 return !rcu_preempted_readers_exp(rnp) &&
669 ACCESS_ONCE(rnp->expmask) == 0; 669 ACCESS_ONCE(rnp->expmask) == 0;
670 } 670 }
671 671
672 /* 672 /*
673 * Report the exit from RCU read-side critical section for the last task 673 * Report the exit from RCU read-side critical section for the last task
674 * that queued itself during or before the current expedited preemptible-RCU 674 * that queued itself during or before the current expedited preemptible-RCU
675 * grace period. This event is reported either to the rcu_node structure on 675 * grace period. This event is reported either to the rcu_node structure on
676 * which the task was queued or to one of that rcu_node structure's ancestors, 676 * which the task was queued or to one of that rcu_node structure's ancestors,
677 * recursively up the tree. (Calm down, calm down, we do the recursion 677 * recursively up the tree. (Calm down, calm down, we do the recursion
678 * iteratively!) 678 * iteratively!)
679 * 679 *
680 * Caller must hold sync_rcu_preempt_exp_mutex. 680 * Caller must hold sync_rcu_preempt_exp_mutex.
681 */ 681 */
682 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) 682 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
683 { 683 {
684 unsigned long flags; 684 unsigned long flags;
685 unsigned long mask; 685 unsigned long mask;
686 686
687 raw_spin_lock_irqsave(&rnp->lock, flags); 687 raw_spin_lock_irqsave(&rnp->lock, flags);
688 for (;;) { 688 for (;;) {
689 if (!sync_rcu_preempt_exp_done(rnp)) 689 if (!sync_rcu_preempt_exp_done(rnp))
690 break; 690 break;
691 if (rnp->parent == NULL) { 691 if (rnp->parent == NULL) {
692 wake_up(&sync_rcu_preempt_exp_wq); 692 wake_up(&sync_rcu_preempt_exp_wq);
693 break; 693 break;
694 } 694 }
695 mask = rnp->grpmask; 695 mask = rnp->grpmask;
696 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 696 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
697 rnp = rnp->parent; 697 rnp = rnp->parent;
698 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 698 raw_spin_lock(&rnp->lock); /* irqs already disabled */
699 rnp->expmask &= ~mask; 699 rnp->expmask &= ~mask;
700 } 700 }
701 raw_spin_unlock_irqrestore(&rnp->lock, flags); 701 raw_spin_unlock_irqrestore(&rnp->lock, flags);
702 } 702 }
703 703
704 /* 704 /*
705 * Snapshot the tasks blocking the newly started preemptible-RCU expedited 705 * Snapshot the tasks blocking the newly started preemptible-RCU expedited
706 * grace period for the specified rcu_node structure. If there are no such 706 * grace period for the specified rcu_node structure. If there are no such
707 * tasks, report it up the rcu_node hierarchy. 707 * tasks, report it up the rcu_node hierarchy.
708 * 708 *
709 * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock. 709 * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock.
710 */ 710 */
711 static void 711 static void
712 sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) 712 sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
713 { 713 {
714 unsigned long flags;
714 int must_wait = 0; 715 int must_wait = 0;
715 716
716 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 717 raw_spin_lock_irqsave(&rnp->lock, flags);
717 if (!list_empty(&rnp->blkd_tasks)) { 718 if (list_empty(&rnp->blkd_tasks))
719 raw_spin_unlock_irqrestore(&rnp->lock, flags);
720 else {
718 rnp->exp_tasks = rnp->blkd_tasks.next; 721 rnp->exp_tasks = rnp->blkd_tasks.next;
719 rcu_initiate_boost(rnp); 722 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
720 must_wait = 1; 723 must_wait = 1;
721 } 724 }
722 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
723 if (!must_wait) 725 if (!must_wait)
724 rcu_report_exp_rnp(rsp, rnp); 726 rcu_report_exp_rnp(rsp, rnp);
725 } 727 }
726 728
727 /* 729 /*
728 * Wait for an rcu-preempt grace period, but expedite it. The basic idea 730 * Wait for an rcu-preempt grace period, but expedite it. The basic idea
729 * is to invoke synchronize_sched_expedited() to push all the tasks to 731 * is to invoke synchronize_sched_expedited() to push all the tasks to
730 * the ->blkd_tasks lists and wait for this list to drain. 732 * the ->blkd_tasks lists and wait for this list to drain.
731 */ 733 */
732 void synchronize_rcu_expedited(void) 734 void synchronize_rcu_expedited(void)
733 { 735 {
734 unsigned long flags; 736 unsigned long flags;
735 struct rcu_node *rnp; 737 struct rcu_node *rnp;
736 struct rcu_state *rsp = &rcu_preempt_state; 738 struct rcu_state *rsp = &rcu_preempt_state;
737 long snap; 739 long snap;
738 int trycount = 0; 740 int trycount = 0;
739 741
740 smp_mb(); /* Caller's modifications seen first by other CPUs. */ 742 smp_mb(); /* Caller's modifications seen first by other CPUs. */
741 snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1; 743 snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
742 smp_mb(); /* Above access cannot bleed into critical section. */ 744 smp_mb(); /* Above access cannot bleed into critical section. */
743 745
744 /* 746 /*
745 * Acquire lock, falling back to synchronize_rcu() if too many 747 * Acquire lock, falling back to synchronize_rcu() if too many
746 * lock-acquisition failures. Of course, if someone does the 748 * lock-acquisition failures. Of course, if someone does the
747 * expedited grace period for us, just leave. 749 * expedited grace period for us, just leave.
748 */ 750 */
749 while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { 751 while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
750 if (trycount++ < 10) 752 if (trycount++ < 10)
751 udelay(trycount * num_online_cpus()); 753 udelay(trycount * num_online_cpus());
752 else { 754 else {
753 synchronize_rcu(); 755 synchronize_rcu();
754 return; 756 return;
755 } 757 }
756 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) 758 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
757 goto mb_ret; /* Others did our work for us. */ 759 goto mb_ret; /* Others did our work for us. */
758 } 760 }
759 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) 761 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
760 goto unlock_mb_ret; /* Others did our work for us. */ 762 goto unlock_mb_ret; /* Others did our work for us. */
761 763
762 /* force all RCU readers onto ->blkd_tasks lists. */ 764 /* force all RCU readers onto ->blkd_tasks lists. */
763 synchronize_sched_expedited(); 765 synchronize_sched_expedited();
764 766
765 raw_spin_lock_irqsave(&rsp->onofflock, flags); 767 raw_spin_lock_irqsave(&rsp->onofflock, flags);
766 768
767 /* Initialize ->expmask for all non-leaf rcu_node structures. */ 769 /* Initialize ->expmask for all non-leaf rcu_node structures. */
768 rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { 770 rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
769 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 771 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
770 rnp->expmask = rnp->qsmaskinit; 772 rnp->expmask = rnp->qsmaskinit;
771 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 773 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
772 } 774 }
773 775
774 /* Snapshot current state of ->blkd_tasks lists. */ 776 /* Snapshot current state of ->blkd_tasks lists. */
775 rcu_for_each_leaf_node(rsp, rnp) 777 rcu_for_each_leaf_node(rsp, rnp)
776 sync_rcu_preempt_exp_init(rsp, rnp); 778 sync_rcu_preempt_exp_init(rsp, rnp);
777 if (NUM_RCU_NODES > 1) 779 if (NUM_RCU_NODES > 1)
778 sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); 780 sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
779 781
780 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 782 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
781 783
782 /* Wait for snapshotted ->blkd_tasks lists to drain. */ 784 /* Wait for snapshotted ->blkd_tasks lists to drain. */
783 rnp = rcu_get_root(rsp); 785 rnp = rcu_get_root(rsp);
784 wait_event(sync_rcu_preempt_exp_wq, 786 wait_event(sync_rcu_preempt_exp_wq,
785 sync_rcu_preempt_exp_done(rnp)); 787 sync_rcu_preempt_exp_done(rnp));
786 788
787 /* Clean up and exit. */ 789 /* Clean up and exit. */
788 smp_mb(); /* ensure expedited GP seen before counter increment. */ 790 smp_mb(); /* ensure expedited GP seen before counter increment. */
789 ACCESS_ONCE(sync_rcu_preempt_exp_count)++; 791 ACCESS_ONCE(sync_rcu_preempt_exp_count)++;
790 unlock_mb_ret: 792 unlock_mb_ret:
791 mutex_unlock(&sync_rcu_preempt_exp_mutex); 793 mutex_unlock(&sync_rcu_preempt_exp_mutex);
792 mb_ret: 794 mb_ret:
793 smp_mb(); /* ensure subsequent action seen after grace period. */ 795 smp_mb(); /* ensure subsequent action seen after grace period. */
794 } 796 }
795 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 797 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
796 798
797 /* 799 /*
798 * Check to see if there is any immediate preemptible-RCU-related work 800 * Check to see if there is any immediate preemptible-RCU-related work
799 * to be done. 801 * to be done.
800 */ 802 */
801 static int rcu_preempt_pending(int cpu) 803 static int rcu_preempt_pending(int cpu)
802 { 804 {
803 return __rcu_pending(&rcu_preempt_state, 805 return __rcu_pending(&rcu_preempt_state,
804 &per_cpu(rcu_preempt_data, cpu)); 806 &per_cpu(rcu_preempt_data, cpu));
805 } 807 }
806 808
807 /* 809 /*
808 * Does preemptible RCU need the CPU to stay out of dynticks mode? 810 * Does preemptible RCU need the CPU to stay out of dynticks mode?
809 */ 811 */
810 static int rcu_preempt_needs_cpu(int cpu) 812 static int rcu_preempt_needs_cpu(int cpu)
811 { 813 {
812 return !!per_cpu(rcu_preempt_data, cpu).nxtlist; 814 return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
813 } 815 }
814 816
815 /** 817 /**
816 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. 818 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
817 */ 819 */
818 void rcu_barrier(void) 820 void rcu_barrier(void)
819 { 821 {
820 _rcu_barrier(&rcu_preempt_state, call_rcu); 822 _rcu_barrier(&rcu_preempt_state, call_rcu);
821 } 823 }
822 EXPORT_SYMBOL_GPL(rcu_barrier); 824 EXPORT_SYMBOL_GPL(rcu_barrier);
823 825
824 /* 826 /*
825 * Initialize preemptible RCU's per-CPU data. 827 * Initialize preemptible RCU's per-CPU data.
826 */ 828 */
827 static void __cpuinit rcu_preempt_init_percpu_data(int cpu) 829 static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
828 { 830 {
829 rcu_init_percpu_data(cpu, &rcu_preempt_state, 1); 831 rcu_init_percpu_data(cpu, &rcu_preempt_state, 1);
830 } 832 }
831 833
832 /* 834 /*
833 * Move preemptible RCU's callbacks from dying CPU to other online CPU. 835 * Move preemptible RCU's callbacks from dying CPU to other online CPU.
834 */ 836 */
835 static void rcu_preempt_send_cbs_to_online(void) 837 static void rcu_preempt_send_cbs_to_online(void)
836 { 838 {
837 rcu_send_cbs_to_online(&rcu_preempt_state); 839 rcu_send_cbs_to_online(&rcu_preempt_state);
838 } 840 }
839 841
840 /* 842 /*
841 * Initialize preemptible RCU's state structures. 843 * Initialize preemptible RCU's state structures.
842 */ 844 */
843 static void __init __rcu_init_preempt(void) 845 static void __init __rcu_init_preempt(void)
844 { 846 {
845 rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); 847 rcu_init_one(&rcu_preempt_state, &rcu_preempt_data);
846 } 848 }
847 849
848 /* 850 /*
849 * Check for a task exiting while in a preemptible-RCU read-side 851 * Check for a task exiting while in a preemptible-RCU read-side
850 * critical section, clean up if so. No need to issue warnings, 852 * critical section, clean up if so. No need to issue warnings,
851 * as debug_check_no_locks_held() already does this if lockdep 853 * as debug_check_no_locks_held() already does this if lockdep
852 * is enabled. 854 * is enabled.
853 */ 855 */
854 void exit_rcu(void) 856 void exit_rcu(void)
855 { 857 {
856 struct task_struct *t = current; 858 struct task_struct *t = current;
857 859
858 if (t->rcu_read_lock_nesting == 0) 860 if (t->rcu_read_lock_nesting == 0)
859 return; 861 return;
860 t->rcu_read_lock_nesting = 1; 862 t->rcu_read_lock_nesting = 1;
861 __rcu_read_unlock(); 863 __rcu_read_unlock();
862 } 864 }
863 865
864 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 866 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
865 867
866 static struct rcu_state *rcu_state = &rcu_sched_state; 868 static struct rcu_state *rcu_state = &rcu_sched_state;
867 869
868 /* 870 /*
869 * Tell them what RCU they are running. 871 * Tell them what RCU they are running.
870 */ 872 */
871 static void __init rcu_bootup_announce(void) 873 static void __init rcu_bootup_announce(void)
872 { 874 {
873 printk(KERN_INFO "Hierarchical RCU implementation.\n"); 875 printk(KERN_INFO "Hierarchical RCU implementation.\n");
874 rcu_bootup_announce_oddness(); 876 rcu_bootup_announce_oddness();
875 } 877 }
876 878
877 /* 879 /*
878 * Return the number of RCU batches processed thus far for debug & stats. 880 * Return the number of RCU batches processed thus far for debug & stats.
879 */ 881 */
880 long rcu_batches_completed(void) 882 long rcu_batches_completed(void)
881 { 883 {
882 return rcu_batches_completed_sched(); 884 return rcu_batches_completed_sched();
883 } 885 }
884 EXPORT_SYMBOL_GPL(rcu_batches_completed); 886 EXPORT_SYMBOL_GPL(rcu_batches_completed);
885 887
886 /* 888 /*
887 * Force a quiescent state for RCU, which, because there is no preemptible 889 * Force a quiescent state for RCU, which, because there is no preemptible
888 * RCU, becomes the same as rcu-sched. 890 * RCU, becomes the same as rcu-sched.
889 */ 891 */
890 void rcu_force_quiescent_state(void) 892 void rcu_force_quiescent_state(void)
891 { 893 {
892 rcu_sched_force_quiescent_state(); 894 rcu_sched_force_quiescent_state();
893 } 895 }
894 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 896 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
895 897
896 /* 898 /*
897 * Because preemptible RCU does not exist, we never have to check for 899 * Because preemptible RCU does not exist, we never have to check for
898 * CPUs being in quiescent states. 900 * CPUs being in quiescent states.
899 */ 901 */
900 static void rcu_preempt_note_context_switch(int cpu) 902 static void rcu_preempt_note_context_switch(int cpu)
901 { 903 {
902 } 904 }
903 905
904 /* 906 /*
905 * Because preemptible RCU does not exist, there are never any preempted 907 * Because preemptible RCU does not exist, there are never any preempted
906 * RCU readers. 908 * RCU readers.
907 */ 909 */
908 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) 910 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
909 { 911 {
910 return 0; 912 return 0;
911 } 913 }
912 914
913 #ifdef CONFIG_HOTPLUG_CPU 915 #ifdef CONFIG_HOTPLUG_CPU
914 916
915 /* Because preemptible RCU does not exist, no quieting of tasks. */ 917 /* Because preemptible RCU does not exist, no quieting of tasks. */
916 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) 918 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
917 { 919 {
918 raw_spin_unlock_irqrestore(&rnp->lock, flags); 920 raw_spin_unlock_irqrestore(&rnp->lock, flags);
919 } 921 }
920 922
921 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 923 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
922 924
923 /* 925 /*
924 * Because preemptible RCU does not exist, we never have to check for 926 * Because preemptible RCU does not exist, we never have to check for
925 * tasks blocked within RCU read-side critical sections. 927 * tasks blocked within RCU read-side critical sections.
926 */ 928 */
927 static void rcu_print_detail_task_stall(struct rcu_state *rsp) 929 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
928 { 930 {
929 } 931 }
930 932
931 /* 933 /*
932 * Because preemptible RCU does not exist, we never have to check for 934 * Because preemptible RCU does not exist, we never have to check for
933 * tasks blocked within RCU read-side critical sections. 935 * tasks blocked within RCU read-side critical sections.
934 */ 936 */
935 static void rcu_print_task_stall(struct rcu_node *rnp) 937 static void rcu_print_task_stall(struct rcu_node *rnp)
936 { 938 {
937 } 939 }
938 940
939 /* 941 /*
940 * Because preemptible RCU does not exist, there is no need to suppress 942 * Because preemptible RCU does not exist, there is no need to suppress
941 * its CPU stall warnings. 943 * its CPU stall warnings.
942 */ 944 */
943 static void rcu_preempt_stall_reset(void) 945 static void rcu_preempt_stall_reset(void)
944 { 946 {
945 } 947 }
946 948
947 /* 949 /*
948 * Because there is no preemptible RCU, there can be no readers blocked, 950 * Because there is no preemptible RCU, there can be no readers blocked,
949 * so there is no need to check for blocked tasks. So check only for 951 * so there is no need to check for blocked tasks. So check only for
950 * bogus qsmask values. 952 * bogus qsmask values.
951 */ 953 */
952 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 954 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
953 { 955 {
954 WARN_ON_ONCE(rnp->qsmask); 956 WARN_ON_ONCE(rnp->qsmask);
955 } 957 }
956 958
957 #ifdef CONFIG_HOTPLUG_CPU 959 #ifdef CONFIG_HOTPLUG_CPU
958 960
959 /* 961 /*
960 * Because preemptible RCU does not exist, it never needs to migrate 962 * Because preemptible RCU does not exist, it never needs to migrate
961 * tasks that were blocked within RCU read-side critical sections, and 963 * tasks that were blocked within RCU read-side critical sections, and
962 * such non-existent tasks cannot possibly have been blocking the current 964 * such non-existent tasks cannot possibly have been blocking the current
963 * grace period. 965 * grace period.
964 */ 966 */
965 static int rcu_preempt_offline_tasks(struct rcu_state *rsp, 967 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
966 struct rcu_node *rnp, 968 struct rcu_node *rnp,
967 struct rcu_data *rdp) 969 struct rcu_data *rdp)
968 { 970 {
969 return 0; 971 return 0;
970 } 972 }
971 973
972 /* 974 /*
973 * Because preemptible RCU does not exist, it never needs CPU-offline 975 * Because preemptible RCU does not exist, it never needs CPU-offline
974 * processing. 976 * processing.
975 */ 977 */
976 static void rcu_preempt_offline_cpu(int cpu) 978 static void rcu_preempt_offline_cpu(int cpu)
977 { 979 {
978 } 980 }
979 981
980 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 982 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
981 983
982 /* 984 /*
983 * Because preemptible RCU does not exist, it never has any callbacks 985 * Because preemptible RCU does not exist, it never has any callbacks
984 * to check. 986 * to check.
985 */ 987 */
986 static void rcu_preempt_check_callbacks(int cpu) 988 static void rcu_preempt_check_callbacks(int cpu)
987 { 989 {
988 } 990 }
989 991
990 /* 992 /*
991 * Because preemptible RCU does not exist, it never has any callbacks 993 * Because preemptible RCU does not exist, it never has any callbacks
992 * to process. 994 * to process.
993 */ 995 */
994 static void rcu_preempt_process_callbacks(void) 996 static void rcu_preempt_process_callbacks(void)
995 { 997 {
996 } 998 }
997 999
998 /* 1000 /*
999 * Wait for an rcu-preempt grace period, but make it happen quickly. 1001 * Wait for an rcu-preempt grace period, but make it happen quickly.
1000 * But because preemptible RCU does not exist, map to rcu-sched. 1002 * But because preemptible RCU does not exist, map to rcu-sched.
1001 */ 1003 */
1002 void synchronize_rcu_expedited(void) 1004 void synchronize_rcu_expedited(void)
1003 { 1005 {
1004 synchronize_sched_expedited(); 1006 synchronize_sched_expedited();
1005 } 1007 }
1006 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 1008 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
1007 1009
1008 #ifdef CONFIG_HOTPLUG_CPU 1010 #ifdef CONFIG_HOTPLUG_CPU
1009 1011
1010 /* 1012 /*
1011 * Because preemptible RCU does not exist, there is never any need to 1013 * Because preemptible RCU does not exist, there is never any need to
1012 * report on tasks preempted in RCU read-side critical sections during 1014 * report on tasks preempted in RCU read-side critical sections during
1013 * expedited RCU grace periods. 1015 * expedited RCU grace periods.
1014 */ 1016 */
1015 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) 1017 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
1016 { 1018 {
1017 return; 1019 return;
1018 } 1020 }
1019 1021
1020 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 1022 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
1021 1023
1022 /* 1024 /*
1023 * Because preemptible RCU does not exist, it never has any work to do. 1025 * Because preemptible RCU does not exist, it never has any work to do.
1024 */ 1026 */
1025 static int rcu_preempt_pending(int cpu) 1027 static int rcu_preempt_pending(int cpu)
1026 { 1028 {
1027 return 0; 1029 return 0;
1028 } 1030 }
1029 1031
1030 /* 1032 /*
1031 * Because preemptible RCU does not exist, it never needs any CPU. 1033 * Because preemptible RCU does not exist, it never needs any CPU.
1032 */ 1034 */
1033 static int rcu_preempt_needs_cpu(int cpu) 1035 static int rcu_preempt_needs_cpu(int cpu)
1034 { 1036 {
1035 return 0; 1037 return 0;
1036 } 1038 }
1037 1039
1038 /* 1040 /*
1039 * Because preemptible RCU does not exist, rcu_barrier() is just 1041 * Because preemptible RCU does not exist, rcu_barrier() is just
1040 * another name for rcu_barrier_sched(). 1042 * another name for rcu_barrier_sched().
1041 */ 1043 */
1042 void rcu_barrier(void) 1044 void rcu_barrier(void)
1043 { 1045 {
1044 rcu_barrier_sched(); 1046 rcu_barrier_sched();
1045 } 1047 }
1046 EXPORT_SYMBOL_GPL(rcu_barrier); 1048 EXPORT_SYMBOL_GPL(rcu_barrier);
1047 1049
1048 /* 1050 /*
1049 * Because preemptible RCU does not exist, there is no per-CPU 1051 * Because preemptible RCU does not exist, there is no per-CPU
1050 * data to initialize. 1052 * data to initialize.
1051 */ 1053 */
1052 static void __cpuinit rcu_preempt_init_percpu_data(int cpu) 1054 static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
1053 { 1055 {
1054 } 1056 }
1055 1057
1056 /* 1058 /*
1057 * Because there is no preemptible RCU, there are no callbacks to move. 1059 * Because there is no preemptible RCU, there are no callbacks to move.
1058 */ 1060 */
1059 static void rcu_preempt_send_cbs_to_online(void) 1061 static void rcu_preempt_send_cbs_to_online(void)
1060 { 1062 {
1061 } 1063 }
1062 1064
1063 /* 1065 /*
1064 * Because preemptible RCU does not exist, it need not be initialized. 1066 * Because preemptible RCU does not exist, it need not be initialized.
1065 */ 1067 */
1066 static void __init __rcu_init_preempt(void) 1068 static void __init __rcu_init_preempt(void)
1067 { 1069 {
1068 } 1070 }
1069 1071
1070 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ 1072 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
1071 1073
1072 #ifdef CONFIG_RCU_BOOST 1074 #ifdef CONFIG_RCU_BOOST
1073 1075
1074 #include "rtmutex_common.h" 1076 #include "rtmutex_common.h"
1075 1077
1076 #ifdef CONFIG_RCU_TRACE 1078 #ifdef CONFIG_RCU_TRACE
1077 1079
1078 static void rcu_initiate_boost_trace(struct rcu_node *rnp) 1080 static void rcu_initiate_boost_trace(struct rcu_node *rnp)
1079 { 1081 {
1080 if (list_empty(&rnp->blkd_tasks)) 1082 if (list_empty(&rnp->blkd_tasks))
1081 rnp->n_balk_blkd_tasks++; 1083 rnp->n_balk_blkd_tasks++;
1082 else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL) 1084 else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)
1083 rnp->n_balk_exp_gp_tasks++; 1085 rnp->n_balk_exp_gp_tasks++;
1084 else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL) 1086 else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL)
1085 rnp->n_balk_boost_tasks++; 1087 rnp->n_balk_boost_tasks++;
1086 else if (rnp->gp_tasks != NULL && rnp->qsmask != 0) 1088 else if (rnp->gp_tasks != NULL && rnp->qsmask != 0)
1087 rnp->n_balk_notblocked++; 1089 rnp->n_balk_notblocked++;
1088 else if (rnp->gp_tasks != NULL && 1090 else if (rnp->gp_tasks != NULL &&
1089 ULONG_CMP_LT(jiffies, rnp->boost_time)) 1091 ULONG_CMP_LT(jiffies, rnp->boost_time))
1090 rnp->n_balk_notyet++; 1092 rnp->n_balk_notyet++;
1091 else 1093 else
1092 rnp->n_balk_nos++; 1094 rnp->n_balk_nos++;
1093 } 1095 }
1094 1096
1095 #else /* #ifdef CONFIG_RCU_TRACE */ 1097 #else /* #ifdef CONFIG_RCU_TRACE */
1096 1098
1097 static void rcu_initiate_boost_trace(struct rcu_node *rnp) 1099 static void rcu_initiate_boost_trace(struct rcu_node *rnp)
1098 { 1100 {
1099 } 1101 }
1100 1102
1101 #endif /* #else #ifdef CONFIG_RCU_TRACE */ 1103 #endif /* #else #ifdef CONFIG_RCU_TRACE */
1102 1104
1103 /* 1105 /*
1104 * Carry out RCU priority boosting on the task indicated by ->exp_tasks 1106 * Carry out RCU priority boosting on the task indicated by ->exp_tasks
1105 * or ->boost_tasks, advancing the pointer to the next task in the 1107 * or ->boost_tasks, advancing the pointer to the next task in the
1106 * ->blkd_tasks list. 1108 * ->blkd_tasks list.
1107 * 1109 *
1108 * Note that irqs must be enabled: boosting the task can block. 1110 * Note that irqs must be enabled: boosting the task can block.
1109 * Returns 1 if there are more tasks needing to be boosted. 1111 * Returns 1 if there are more tasks needing to be boosted.
1110 */ 1112 */
1111 static int rcu_boost(struct rcu_node *rnp) 1113 static int rcu_boost(struct rcu_node *rnp)
1112 { 1114 {
1113 unsigned long flags; 1115 unsigned long flags;
1114 struct rt_mutex mtx; 1116 struct rt_mutex mtx;
1115 struct task_struct *t; 1117 struct task_struct *t;
1116 struct list_head *tb; 1118 struct list_head *tb;
1117 1119
1118 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) 1120 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL)
1119 return 0; /* Nothing left to boost. */ 1121 return 0; /* Nothing left to boost. */
1120 1122
1121 raw_spin_lock_irqsave(&rnp->lock, flags); 1123 raw_spin_lock_irqsave(&rnp->lock, flags);
1122 1124
1123 /* 1125 /*
1124 * Recheck under the lock: all tasks in need of boosting 1126 * Recheck under the lock: all tasks in need of boosting
1125 * might exit their RCU read-side critical sections on their own. 1127 * might exit their RCU read-side critical sections on their own.
1126 */ 1128 */
1127 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) { 1129 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
1128 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1130 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1129 return 0; 1131 return 0;
1130 } 1132 }
1131 1133
1132 /* 1134 /*
1133 * Preferentially boost tasks blocking expedited grace periods. 1135 * Preferentially boost tasks blocking expedited grace periods.
1134 * This cannot starve the normal grace periods because a second 1136 * This cannot starve the normal grace periods because a second
1135 * expedited grace period must boost all blocked tasks, including 1137 * expedited grace period must boost all blocked tasks, including
1136 * those blocking the pre-existing normal grace period. 1138 * those blocking the pre-existing normal grace period.
1137 */ 1139 */
1138 if (rnp->exp_tasks != NULL) { 1140 if (rnp->exp_tasks != NULL) {
1139 tb = rnp->exp_tasks; 1141 tb = rnp->exp_tasks;
1140 rnp->n_exp_boosts++; 1142 rnp->n_exp_boosts++;
1141 } else { 1143 } else {
1142 tb = rnp->boost_tasks; 1144 tb = rnp->boost_tasks;
1143 rnp->n_normal_boosts++; 1145 rnp->n_normal_boosts++;
1144 } 1146 }
1145 rnp->n_tasks_boosted++; 1147 rnp->n_tasks_boosted++;
1146 1148
1147 /* 1149 /*
1148 * We boost task t by manufacturing an rt_mutex that appears to 1150 * We boost task t by manufacturing an rt_mutex that appears to
1149 * be held by task t. We leave a pointer to that rt_mutex where 1151 * be held by task t. We leave a pointer to that rt_mutex where
1150 * task t can find it, and task t will release the mutex when it 1152 * task t can find it, and task t will release the mutex when it
1151 * exits its outermost RCU read-side critical section. Then 1153 * exits its outermost RCU read-side critical section. Then
1152 * simply acquiring this artificial rt_mutex will boost task 1154 * simply acquiring this artificial rt_mutex will boost task
1153 * t's priority. (Thanks to tglx for suggesting this approach!) 1155 * t's priority. (Thanks to tglx for suggesting this approach!)
1154 * 1156 *
1155 * Note that task t must acquire rnp->lock to remove itself from 1157 * Note that task t must acquire rnp->lock to remove itself from
1156 * the ->blkd_tasks list, which it will do from exit() if from 1158 * the ->blkd_tasks list, which it will do from exit() if from
1157 * nowhere else. We therefore are guaranteed that task t will 1159 * nowhere else. We therefore are guaranteed that task t will
1158 * stay around at least until we drop rnp->lock. Note that 1160 * stay around at least until we drop rnp->lock. Note that
1159 * rnp->lock also resolves races between our priority boosting 1161 * rnp->lock also resolves races between our priority boosting
1160 * and task t's exiting its outermost RCU read-side critical 1162 * and task t's exiting its outermost RCU read-side critical
1161 * section. 1163 * section.
1162 */ 1164 */
1163 t = container_of(tb, struct task_struct, rcu_node_entry); 1165 t = container_of(tb, struct task_struct, rcu_node_entry);
1164 rt_mutex_init_proxy_locked(&mtx, t); 1166 rt_mutex_init_proxy_locked(&mtx, t);
1165 t->rcu_boost_mutex = &mtx; 1167 t->rcu_boost_mutex = &mtx;
1166 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED; 1168 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
1167 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1169 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1168 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ 1170 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
1169 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ 1171 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
1170 1172
1171 return rnp->exp_tasks != NULL || rnp->boost_tasks != NULL; 1173 return rnp->exp_tasks != NULL || rnp->boost_tasks != NULL;
1172 } 1174 }
1173 1175
1174 /* 1176 /*
1175 * Timer handler to initiate waking up of boost kthreads that 1177 * Timer handler to initiate waking up of boost kthreads that
1176 * have yielded the CPU due to excessive numbers of tasks to 1178 * have yielded the CPU due to excessive numbers of tasks to
1177 * boost. We wake up the per-rcu_node kthread, which in turn 1179 * boost. We wake up the per-rcu_node kthread, which in turn
1178 * will wake up the booster kthread. 1180 * will wake up the booster kthread.
1179 */ 1181 */
1180 static void rcu_boost_kthread_timer(unsigned long arg) 1182 static void rcu_boost_kthread_timer(unsigned long arg)
1181 { 1183 {
1182 unsigned long flags; 1184 invoke_rcu_node_kthread((struct rcu_node *)arg);
1183 struct rcu_node *rnp = (struct rcu_node *)arg;
1184
1185 raw_spin_lock_irqsave(&rnp->lock, flags);
1186 invoke_rcu_node_kthread(rnp);
1187 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1188 } 1185 }
1189 1186
1190 /* 1187 /*
1191 * Priority-boosting kthread. One per leaf rcu_node and one for the 1188 * Priority-boosting kthread. One per leaf rcu_node and one for the
1192 * root rcu_node. 1189 * root rcu_node.
1193 */ 1190 */
1194 static int rcu_boost_kthread(void *arg) 1191 static int rcu_boost_kthread(void *arg)
1195 { 1192 {
1196 struct rcu_node *rnp = (struct rcu_node *)arg; 1193 struct rcu_node *rnp = (struct rcu_node *)arg;
1197 int spincnt = 0; 1194 int spincnt = 0;
1198 int more2boost; 1195 int more2boost;
1199 1196
1200 for (;;) { 1197 for (;;) {
1201 rnp->boost_kthread_status = RCU_KTHREAD_WAITING; 1198 rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
1202 wait_event_interruptible(rnp->boost_wq, rnp->boost_tasks || 1199 wait_event_interruptible(rnp->boost_wq, rnp->boost_tasks ||
1203 rnp->exp_tasks || 1200 rnp->exp_tasks);
1204 kthread_should_stop());
1205 if (kthread_should_stop())
1206 break;
1207 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; 1201 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
1208 more2boost = rcu_boost(rnp); 1202 more2boost = rcu_boost(rnp);
1209 if (more2boost) 1203 if (more2boost)
1210 spincnt++; 1204 spincnt++;
1211 else 1205 else
1212 spincnt = 0; 1206 spincnt = 0;
1213 if (spincnt > 10) { 1207 if (spincnt > 10) {
1214 rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp); 1208 rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp);
1215 spincnt = 0; 1209 spincnt = 0;
1216 } 1210 }
1217 } 1211 }
1218 rnp->boost_kthread_status = RCU_KTHREAD_STOPPED; 1212 /* NOTREACHED */
1219 return 0; 1213 return 0;
1220 } 1214 }
1221 1215
1222 /* 1216 /*
1223 * Check to see if it is time to start boosting RCU readers that are 1217 * Check to see if it is time to start boosting RCU readers that are
1224 * blocking the current grace period, and, if so, tell the per-rcu_node 1218 * blocking the current grace period, and, if so, tell the per-rcu_node
1225 * kthread to start boosting them. If there is an expedited grace 1219 * kthread to start boosting them. If there is an expedited grace
1226 * period in progress, it is always time to boost. 1220 * period in progress, it is always time to boost.
1227 * 1221 *
1228 * The caller must hold rnp->lock. 1222 * The caller must hold rnp->lock, which this function releases,
1223 * but irqs remain disabled. The ->boost_kthread_task is immortal,
1224 * so we don't need to worry about it going away.
1229 */ 1225 */
1230 static void rcu_initiate_boost(struct rcu_node *rnp) 1226 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1231 { 1227 {
1232 struct task_struct *t; 1228 struct task_struct *t;
1233 1229
1234 if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) { 1230 if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
1235 rnp->n_balk_exp_gp_tasks++; 1231 rnp->n_balk_exp_gp_tasks++;
1232 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1236 return; 1233 return;
1237 } 1234 }
1238 if (rnp->exp_tasks != NULL || 1235 if (rnp->exp_tasks != NULL ||
1239 (rnp->gp_tasks != NULL && 1236 (rnp->gp_tasks != NULL &&
1240 rnp->boost_tasks == NULL && 1237 rnp->boost_tasks == NULL &&
1241 rnp->qsmask == 0 && 1238 rnp->qsmask == 0 &&
1242 ULONG_CMP_GE(jiffies, rnp->boost_time))) { 1239 ULONG_CMP_GE(jiffies, rnp->boost_time))) {
1243 if (rnp->exp_tasks == NULL) 1240 if (rnp->exp_tasks == NULL)
1244 rnp->boost_tasks = rnp->gp_tasks; 1241 rnp->boost_tasks = rnp->gp_tasks;
1242 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1245 t = rnp->boost_kthread_task; 1243 t = rnp->boost_kthread_task;
1246 if (t != NULL) 1244 if (t != NULL)
1247 wake_up_process(t); 1245 wake_up_process(t);
1248 } else 1246 } else {
1249 rcu_initiate_boost_trace(rnp); 1247 rcu_initiate_boost_trace(rnp);
1248 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1249 }
1250 } 1250 }
1251 1251
1252 /* 1252 /*
1253 * Set the affinity of the boost kthread. The CPU-hotplug locks are 1253 * Set the affinity of the boost kthread. The CPU-hotplug locks are
1254 * held, so no one should be messing with the existence of the boost 1254 * held, so no one should be messing with the existence of the boost
1255 * kthread. 1255 * kthread.
1256 */ 1256 */
1257 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, 1257 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
1258 cpumask_var_t cm) 1258 cpumask_var_t cm)
1259 { 1259 {
1260 struct task_struct *t; 1260 struct task_struct *t;
1261 1261
1262 t = rnp->boost_kthread_task; 1262 t = rnp->boost_kthread_task;
1263 if (t != NULL) 1263 if (t != NULL)
1264 set_cpus_allowed_ptr(rnp->boost_kthread_task, cm); 1264 set_cpus_allowed_ptr(rnp->boost_kthread_task, cm);
1265 } 1265 }
1266 1266
1267 #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000) 1267 #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
1268 1268
1269 /* 1269 /*
1270 * Do priority-boost accounting for the start of a new grace period. 1270 * Do priority-boost accounting for the start of a new grace period.
1271 */ 1271 */
1272 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) 1272 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1273 { 1273 {
1274 rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES; 1274 rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
1275 } 1275 }
1276 1276
1277 /* 1277 /*
1278 * Initialize the RCU-boost waitqueue. 1278 * Initialize the RCU-boost waitqueue.
1279 */ 1279 */
1280 static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp) 1280 static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
1281 { 1281 {
1282 init_waitqueue_head(&rnp->boost_wq); 1282 init_waitqueue_head(&rnp->boost_wq);
1283 } 1283 }
1284 1284
1285 /* 1285 /*
1286 * Create an RCU-boost kthread for the specified node if one does not 1286 * Create an RCU-boost kthread for the specified node if one does not
1287 * already exist. We only create this kthread for preemptible RCU. 1287 * already exist. We only create this kthread for preemptible RCU.
1288 * Returns zero if all is well, a negated errno otherwise. 1288 * Returns zero if all is well, a negated errno otherwise.
1289 */ 1289 */
1290 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, 1290 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1291 struct rcu_node *rnp, 1291 struct rcu_node *rnp,
1292 int rnp_index) 1292 int rnp_index)
1293 { 1293 {
1294 unsigned long flags; 1294 unsigned long flags;
1295 struct sched_param sp; 1295 struct sched_param sp;
1296 struct task_struct *t; 1296 struct task_struct *t;
1297 1297
1298 if (&rcu_preempt_state != rsp) 1298 if (&rcu_preempt_state != rsp)
1299 return 0; 1299 return 0;
1300 if (rnp->boost_kthread_task != NULL) 1300 if (rnp->boost_kthread_task != NULL)
1301 return 0; 1301 return 0;
1302 t = kthread_create(rcu_boost_kthread, (void *)rnp, 1302 t = kthread_create(rcu_boost_kthread, (void *)rnp,
1303 "rcub%d", rnp_index); 1303 "rcub%d", rnp_index);
1304 if (IS_ERR(t)) 1304 if (IS_ERR(t))
1305 return PTR_ERR(t); 1305 return PTR_ERR(t);
1306 raw_spin_lock_irqsave(&rnp->lock, flags); 1306 raw_spin_lock_irqsave(&rnp->lock, flags);
1307 rnp->boost_kthread_task = t; 1307 rnp->boost_kthread_task = t;
1308 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1308 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1309 wake_up_process(t); 1309 wake_up_process(t);
1310 sp.sched_priority = RCU_KTHREAD_PRIO; 1310 sp.sched_priority = RCU_KTHREAD_PRIO;
1311 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1311 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1312 return 0; 1312 return 0;
1313 } 1313 }
1314 1314
1315 #ifdef CONFIG_HOTPLUG_CPU 1315 #else /* #ifdef CONFIG_RCU_BOOST */
1316 1316
1317 static void rcu_stop_boost_kthread(struct rcu_node *rnp) 1317 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1318 { 1318 {
1319 unsigned long flags;
1320 struct task_struct *t;
1321
1322 raw_spin_lock_irqsave(&rnp->lock, flags);
1323 t = rnp->boost_kthread_task;
1324 rnp->boost_kthread_task = NULL;
1325 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1319 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1326 if (t != NULL)
1327 kthread_stop(t);
1328 } 1320 }
1329 1321
1330 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
1331
1332 #else /* #ifdef CONFIG_RCU_BOOST */
1333
1334 static void rcu_initiate_boost(struct rcu_node *rnp)
1335 {
1336 }
1337
1338 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, 1322 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
1339 cpumask_var_t cm) 1323 cpumask_var_t cm)
1340 { 1324 {
1341 } 1325 }
1342 1326
1343 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) 1327 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1344 { 1328 {
1345 } 1329 }
1346 1330
1347 static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp) 1331 static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
1348 { 1332 {
1349 } 1333 }
1350 1334
1351 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, 1335 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1352 struct rcu_node *rnp, 1336 struct rcu_node *rnp,
1353 int rnp_index) 1337 int rnp_index)
1354 { 1338 {
1355 return 0; 1339 return 0;
1356 } 1340 }
1357
1358 #ifdef CONFIG_HOTPLUG_CPU
1359
1360 static void rcu_stop_boost_kthread(struct rcu_node *rnp)
1361 {
1362 }
1363
1364 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
1365 1341
1366 #endif /* #else #ifdef CONFIG_RCU_BOOST */ 1342 #endif /* #else #ifdef CONFIG_RCU_BOOST */
1367 1343
1368 #ifndef CONFIG_SMP 1344 #ifndef CONFIG_SMP
1369 1345
1370 void synchronize_sched_expedited(void) 1346 void synchronize_sched_expedited(void)
1371 { 1347 {
1372 cond_resched(); 1348 cond_resched();
1373 } 1349 }
1374 EXPORT_SYMBOL_GPL(synchronize_sched_expedited); 1350 EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
1375 1351
1376 #else /* #ifndef CONFIG_SMP */ 1352 #else /* #ifndef CONFIG_SMP */
1377 1353
1378 static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0); 1354 static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
1379 static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0); 1355 static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
1380 1356
1381 static int synchronize_sched_expedited_cpu_stop(void *data) 1357 static int synchronize_sched_expedited_cpu_stop(void *data)
1382 { 1358 {
1383 /* 1359 /*
1384 * There must be a full memory barrier on each affected CPU 1360 * There must be a full memory barrier on each affected CPU
1385 * between the time that try_stop_cpus() is called and the 1361 * between the time that try_stop_cpus() is called and the
1386 * time that it returns. 1362 * time that it returns.
1387 * 1363 *
1388 * In the current initial implementation of cpu_stop, the 1364 * In the current initial implementation of cpu_stop, the
1389 * above condition is already met when the control reaches 1365 * above condition is already met when the control reaches
1390 * this point and the following smp_mb() is not strictly 1366 * this point and the following smp_mb() is not strictly
1391 * necessary. Do smp_mb() anyway for documentation and 1367 * necessary. Do smp_mb() anyway for documentation and
1392 * robustness against future implementation changes. 1368 * robustness against future implementation changes.
1393 */ 1369 */
1394 smp_mb(); /* See above comment block. */ 1370 smp_mb(); /* See above comment block. */
1395 return 0; 1371 return 0;
1396 } 1372 }
1397 1373
1398 /* 1374 /*
1399 * Wait for an rcu-sched grace period to elapse, but use "big hammer" 1375 * Wait for an rcu-sched grace period to elapse, but use "big hammer"
1400 * approach to force grace period to end quickly. This consumes 1376 * approach to force grace period to end quickly. This consumes
1401 * significant time on all CPUs, and is thus not recommended for 1377 * significant time on all CPUs, and is thus not recommended for
1402 * any sort of common-case code. 1378 * any sort of common-case code.
1403 * 1379 *
1404 * Note that it is illegal to call this function while holding any 1380 * Note that it is illegal to call this function while holding any
1405 * lock that is acquired by a CPU-hotplug notifier. Failing to 1381 * lock that is acquired by a CPU-hotplug notifier. Failing to
1406 * observe this restriction will result in deadlock. 1382 * observe this restriction will result in deadlock.
1407 * 1383 *
1408 * This implementation can be thought of as an application of ticket 1384 * This implementation can be thought of as an application of ticket
1409 * locking to RCU, with sync_sched_expedited_started and 1385 * locking to RCU, with sync_sched_expedited_started and
1410 * sync_sched_expedited_done taking on the roles of the halves 1386 * sync_sched_expedited_done taking on the roles of the halves
1411 * of the ticket-lock word. Each task atomically increments 1387 * of the ticket-lock word. Each task atomically increments
1412 * sync_sched_expedited_started upon entry, snapshotting the old value, 1388 * sync_sched_expedited_started upon entry, snapshotting the old value,
1413 * then attempts to stop all the CPUs. If this succeeds, then each 1389 * then attempts to stop all the CPUs. If this succeeds, then each
1414 * CPU will have executed a context switch, resulting in an RCU-sched 1390 * CPU will have executed a context switch, resulting in an RCU-sched
1415 * grace period. We are then done, so we use atomic_cmpxchg() to 1391 * grace period. We are then done, so we use atomic_cmpxchg() to
1416 * update sync_sched_expedited_done to match our snapshot -- but 1392 * update sync_sched_expedited_done to match our snapshot -- but
1417 * only if someone else has not already advanced past our snapshot. 1393 * only if someone else has not already advanced past our snapshot.
1418 * 1394 *
1419 * On the other hand, if try_stop_cpus() fails, we check the value 1395 * On the other hand, if try_stop_cpus() fails, we check the value
1420 * of sync_sched_expedited_done. If it has advanced past our 1396 * of sync_sched_expedited_done. If it has advanced past our
1421 * initial snapshot, then someone else must have forced a grace period 1397 * initial snapshot, then someone else must have forced a grace period
1422 * some time after we took our snapshot. In this case, our work is 1398 * some time after we took our snapshot. In this case, our work is
1423 * done for us, and we can simply return. Otherwise, we try again, 1399 * done for us, and we can simply return. Otherwise, we try again,
1424 * but keep our initial snapshot for purposes of checking for someone 1400 * but keep our initial snapshot for purposes of checking for someone
1425 * doing our work for us. 1401 * doing our work for us.
1426 * 1402 *
1427 * If we fail too many times in a row, we fall back to synchronize_sched(). 1403 * If we fail too many times in a row, we fall back to synchronize_sched().
1428 */ 1404 */
1429 void synchronize_sched_expedited(void) 1405 void synchronize_sched_expedited(void)
1430 { 1406 {
1431 int firstsnap, s, snap, trycount = 0; 1407 int firstsnap, s, snap, trycount = 0;
1432 1408
1433 /* Note that atomic_inc_return() implies full memory barrier. */ 1409 /* Note that atomic_inc_return() implies full memory barrier. */
1434 firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started); 1410 firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started);
1435 get_online_cpus(); 1411 get_online_cpus();
1436 1412
1437 /* 1413 /*
1438 * Each pass through the following loop attempts to force a 1414 * Each pass through the following loop attempts to force a
1439 * context switch on each CPU. 1415 * context switch on each CPU.
1440 */ 1416 */
1441 while (try_stop_cpus(cpu_online_mask, 1417 while (try_stop_cpus(cpu_online_mask,
1442 synchronize_sched_expedited_cpu_stop, 1418 synchronize_sched_expedited_cpu_stop,
1443 NULL) == -EAGAIN) { 1419 NULL) == -EAGAIN) {
1444 put_online_cpus(); 1420 put_online_cpus();
1445 1421
1446 /* No joy, try again later. Or just synchronize_sched(). */ 1422 /* No joy, try again later. Or just synchronize_sched(). */
1447 if (trycount++ < 10) 1423 if (trycount++ < 10)
1448 udelay(trycount * num_online_cpus()); 1424 udelay(trycount * num_online_cpus());
1449 else { 1425 else {
1450 synchronize_sched(); 1426 synchronize_sched();
1451 return; 1427 return;
1452 } 1428 }
1453 1429
1454 /* Check to see if someone else did our work for us. */ 1430 /* Check to see if someone else did our work for us. */
1455 s = atomic_read(&sync_sched_expedited_done); 1431 s = atomic_read(&sync_sched_expedited_done);
1456 if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) { 1432 if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) {
1457 smp_mb(); /* ensure test happens before caller kfree */ 1433 smp_mb(); /* ensure test happens before caller kfree */
1458 return; 1434 return;
1459 } 1435 }
1460 1436
1461 /* 1437 /*
1462 * Refetching sync_sched_expedited_started allows later 1438 * Refetching sync_sched_expedited_started allows later
1463 * callers to piggyback on our grace period. We subtract 1439 * callers to piggyback on our grace period. We subtract
1464 * 1 to get the same token that the last incrementer got. 1440 * 1 to get the same token that the last incrementer got.
1465 * We retry after they started, so our grace period works 1441 * We retry after they started, so our grace period works
1466 * for them, and they started after our first try, so their 1442 * for them, and they started after our first try, so their
1467 * grace period works for us. 1443 * grace period works for us.
1468 */ 1444 */
1469 get_online_cpus(); 1445 get_online_cpus();
1470 snap = atomic_read(&sync_sched_expedited_started) - 1; 1446 snap = atomic_read(&sync_sched_expedited_started) - 1;
1471 smp_mb(); /* ensure read is before try_stop_cpus(). */ 1447 smp_mb(); /* ensure read is before try_stop_cpus(). */
1472 } 1448 }
1473 1449
1474 /* 1450 /*
1475 * Everyone up to our most recent fetch is covered by our grace 1451 * Everyone up to our most recent fetch is covered by our grace
1476 * period. Update the counter, but only if our work is still 1452 * period. Update the counter, but only if our work is still
1477 * relevant -- which it won't be if someone who started later 1453 * relevant -- which it won't be if someone who started later
1478 * than we did beat us to the punch. 1454 * than we did beat us to the punch.
1479 */ 1455 */
1480 do { 1456 do {
1481 s = atomic_read(&sync_sched_expedited_done); 1457 s = atomic_read(&sync_sched_expedited_done);
1482 if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) { 1458 if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) {
1483 smp_mb(); /* ensure test happens before caller kfree */ 1459 smp_mb(); /* ensure test happens before caller kfree */
1484 break; 1460 break;
1485 } 1461 }
1486 } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s); 1462 } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s);
1487 1463
1488 put_online_cpus(); 1464 put_online_cpus();
1489 } 1465 }
1490 EXPORT_SYMBOL_GPL(synchronize_sched_expedited); 1466 EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
1491 1467
1492 #endif /* #else #ifndef CONFIG_SMP */ 1468 #endif /* #else #ifndef CONFIG_SMP */
1493 1469
1494 #if !defined(CONFIG_RCU_FAST_NO_HZ) 1470 #if !defined(CONFIG_RCU_FAST_NO_HZ)
1495 1471
1496 /* 1472 /*
1497 * Check to see if any future RCU-related work will need to be done 1473 * Check to see if any future RCU-related work will need to be done
1498 * by the current CPU, even if none need be done immediately, returning 1474 * by the current CPU, even if none need be done immediately, returning
1499 * 1 if so. This function is part of the RCU implementation; it is -not- 1475 * 1 if so. This function is part of the RCU implementation; it is -not-
1500 * an exported member of the RCU API. 1476 * an exported member of the RCU API.
1501 * 1477 *
1502 * Because we have preemptible RCU, just check whether this CPU needs 1478 * Because we have preemptible RCU, just check whether this CPU needs
1503 * any flavor of RCU. Do not chew up lots of CPU cycles with preemption 1479 * any flavor of RCU. Do not chew up lots of CPU cycles with preemption
1504 * disabled in a most-likely vain attempt to cause RCU not to need this CPU. 1480 * disabled in a most-likely vain attempt to cause RCU not to need this CPU.
1505 */ 1481 */
1506 int rcu_needs_cpu(int cpu) 1482 int rcu_needs_cpu(int cpu)
1507 { 1483 {
1508 return rcu_needs_cpu_quick_check(cpu); 1484 return rcu_needs_cpu_quick_check(cpu);
1509 } 1485 }
1510 1486
1511 /* 1487 /*
1512 * Check to see if we need to continue a callback-flush operations to 1488 * Check to see if we need to continue a callback-flush operations to
1513 * allow the last CPU to enter dyntick-idle mode. But fast dyntick-idle 1489 * allow the last CPU to enter dyntick-idle mode. But fast dyntick-idle
1514 * entry is not configured, so we never do need to. 1490 * entry is not configured, so we never do need to.
1515 */ 1491 */
1516 static void rcu_needs_cpu_flush(void) 1492 static void rcu_needs_cpu_flush(void)
1517 { 1493 {
1518 } 1494 }
1519 1495
1520 #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 1496 #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
1521 1497
1522 #define RCU_NEEDS_CPU_FLUSHES 5 1498 #define RCU_NEEDS_CPU_FLUSHES 5
1523 static DEFINE_PER_CPU(int, rcu_dyntick_drain); 1499 static DEFINE_PER_CPU(int, rcu_dyntick_drain);
1524 static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); 1500 static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
1525 1501
1526 /* 1502 /*
1527 * Check to see if any future RCU-related work will need to be done 1503 * Check to see if any future RCU-related work will need to be done
1528 * by the current CPU, even if none need be done immediately, returning 1504 * by the current CPU, even if none need be done immediately, returning
1529 * 1 if so. This function is part of the RCU implementation; it is -not- 1505 * 1 if so. This function is part of the RCU implementation; it is -not-
1530 * an exported member of the RCU API. 1506 * an exported member of the RCU API.
1531 * 1507 *
1532 * Because we are not supporting preemptible RCU, attempt to accelerate 1508 * Because we are not supporting preemptible RCU, attempt to accelerate
1533 * any current grace periods so that RCU no longer needs this CPU, but 1509 * any current grace periods so that RCU no longer needs this CPU, but
1534 * only if all other CPUs are already in dynticks-idle mode. This will 1510 * only if all other CPUs are already in dynticks-idle mode. This will
1535 * allow the CPU cores to be powered down immediately, as opposed to after 1511 * allow the CPU cores to be powered down immediately, as opposed to after
1536 * waiting many milliseconds for grace periods to elapse. 1512 * waiting many milliseconds for grace periods to elapse.
1537 * 1513 *
1538 * Because it is not legal to invoke rcu_process_callbacks() with irqs 1514 * Because it is not legal to invoke rcu_process_callbacks() with irqs
1539 * disabled, we do one pass of force_quiescent_state(), then do a 1515 * disabled, we do one pass of force_quiescent_state(), then do a
1540 * invoke_rcu_cpu_kthread() to cause rcu_process_callbacks() to be invoked 1516 * invoke_rcu_cpu_kthread() to cause rcu_process_callbacks() to be invoked
1541 * later. The per-cpu rcu_dyntick_drain variable controls the sequencing. 1517 * later. The per-cpu rcu_dyntick_drain variable controls the sequencing.
1542 */ 1518 */
1543 int rcu_needs_cpu(int cpu) 1519 int rcu_needs_cpu(int cpu)
1544 { 1520 {
1545 int c = 0; 1521 int c = 0;
1546 int snap; 1522 int snap;
1547 int thatcpu; 1523 int thatcpu;
1548 1524
1549 /* Check for being in the holdoff period. */ 1525 /* Check for being in the holdoff period. */
1550 if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) 1526 if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies)
1551 return rcu_needs_cpu_quick_check(cpu); 1527 return rcu_needs_cpu_quick_check(cpu);
1552 1528
1553 /* Don't bother unless we are the last non-dyntick-idle CPU. */ 1529 /* Don't bother unless we are the last non-dyntick-idle CPU. */
1554 for_each_online_cpu(thatcpu) { 1530 for_each_online_cpu(thatcpu) {
1555 if (thatcpu == cpu) 1531 if (thatcpu == cpu)
1556 continue; 1532 continue;
1557 snap = atomic_add_return(0, &per_cpu(rcu_dynticks, 1533 snap = atomic_add_return(0, &per_cpu(rcu_dynticks,
1558 thatcpu).dynticks); 1534 thatcpu).dynticks);
1559 smp_mb(); /* Order sampling of snap with end of grace period. */ 1535 smp_mb(); /* Order sampling of snap with end of grace period. */
1560 if ((snap & 0x1) != 0) { 1536 if ((snap & 0x1) != 0) {
1561 per_cpu(rcu_dyntick_drain, cpu) = 0; 1537 per_cpu(rcu_dyntick_drain, cpu) = 0;
1562 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; 1538 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
1563 return rcu_needs_cpu_quick_check(cpu); 1539 return rcu_needs_cpu_quick_check(cpu);
1564 } 1540 }
1565 } 1541 }
1566 1542
1567 /* Check and update the rcu_dyntick_drain sequencing. */ 1543 /* Check and update the rcu_dyntick_drain sequencing. */
1568 if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { 1544 if (per_cpu(rcu_dyntick_drain, cpu) <= 0) {
1569 /* First time through, initialize the counter. */ 1545 /* First time through, initialize the counter. */
1570 per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES; 1546 per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES;
1571 } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { 1547 } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
1572 /* We have hit the limit, so time to give up. */ 1548 /* We have hit the limit, so time to give up. */
1573 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; 1549 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
1574 return rcu_needs_cpu_quick_check(cpu); 1550 return rcu_needs_cpu_quick_check(cpu);
1575 } 1551 }
1576 1552
1577 /* Do one step pushing remaining RCU callbacks through. */ 1553 /* Do one step pushing remaining RCU callbacks through. */
1578 if (per_cpu(rcu_sched_data, cpu).nxtlist) { 1554 if (per_cpu(rcu_sched_data, cpu).nxtlist) {
1579 rcu_sched_qs(cpu); 1555 rcu_sched_qs(cpu);
1580 force_quiescent_state(&rcu_sched_state, 0); 1556 force_quiescent_state(&rcu_sched_state, 0);
1581 c = c || per_cpu(rcu_sched_data, cpu).nxtlist; 1557 c = c || per_cpu(rcu_sched_data, cpu).nxtlist;
1582 } 1558 }
1583 if (per_cpu(rcu_bh_data, cpu).nxtlist) { 1559 if (per_cpu(rcu_bh_data, cpu).nxtlist) {
1584 rcu_bh_qs(cpu); 1560 rcu_bh_qs(cpu);
1585 force_quiescent_state(&rcu_bh_state, 0); 1561 force_quiescent_state(&rcu_bh_state, 0);
1586 c = c || per_cpu(rcu_bh_data, cpu).nxtlist; 1562 c = c || per_cpu(rcu_bh_data, cpu).nxtlist;
1587 } 1563 }
1588 1564
1589 /* If RCU callbacks are still pending, RCU still needs this CPU. */ 1565 /* If RCU callbacks are still pending, RCU still needs this CPU. */
1590 if (c) 1566 if (c)
1591 invoke_rcu_cpu_kthread(); 1567 invoke_rcu_cpu_kthread();
1592 return c; 1568 return c;
1593 } 1569 }
1594 1570
1595 /* 1571 /*
1596 * Check to see if we need to continue a callback-flush operations to 1572 * Check to see if we need to continue a callback-flush operations to
1597 * allow the last CPU to enter dyntick-idle mode. 1573 * allow the last CPU to enter dyntick-idle mode.
1598 */ 1574 */
1599 static void rcu_needs_cpu_flush(void) 1575 static void rcu_needs_cpu_flush(void)
1600 { 1576 {
1601 int cpu = smp_processor_id(); 1577 int cpu = smp_processor_id();
1602 unsigned long flags; 1578 unsigned long flags;
1603 1579