Commit 7cb92499000e3c86dae653077b1465458a039ef6

Authored by Paul E. McKenney
Committed by Paul E. McKenney
1 parent 3842a0832a

rcu: Permit dyntick-idle with callbacks pending

The current implementation of RCU_FAST_NO_HZ prevents CPUs from entering
dyntick-idle state if they have RCU callbacks pending.  Unfortunately,
this has the side-effect of often preventing them from entering this
state, especially if at least one other CPU is not in dyntick-idle state.
However, the resulting per-tick wakeup is wasteful in many cases: if the
CPU has already fully responded to the current RCU grace period, there
will be nothing for it to do until this grace period ends, which will
frequently take several jiffies.

This commit therefore permits a CPU that has done everything that the
current grace period has asked of it (rcu_pending() == 0) even if it
still as RCU callbacks pending.  However, such a CPU posts a timer to
wake it up several jiffies later (6 jiffies, based on experience with
grace-period lengths).  This wakeup is required to handle situations
that can result in all CPUs being in dyntick-idle mode, thus failing
to ever complete the current grace period.  If a CPU wakes up before
the timer goes off, then it cancels that timer, thus avoiding spurious
wakeups.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

Showing 4 changed files with 78 additions and 5 deletions Inline Diff

include/trace/events/rcu.h
1 #undef TRACE_SYSTEM 1 #undef TRACE_SYSTEM
2 #define TRACE_SYSTEM rcu 2 #define TRACE_SYSTEM rcu
3 3
4 #if !defined(_TRACE_RCU_H) || defined(TRACE_HEADER_MULTI_READ) 4 #if !defined(_TRACE_RCU_H) || defined(TRACE_HEADER_MULTI_READ)
5 #define _TRACE_RCU_H 5 #define _TRACE_RCU_H
6 6
7 #include <linux/tracepoint.h> 7 #include <linux/tracepoint.h>
8 8
9 /* 9 /*
10 * Tracepoint for start/end markers used for utilization calculations. 10 * Tracepoint for start/end markers used for utilization calculations.
11 * By convention, the string is of the following forms: 11 * By convention, the string is of the following forms:
12 * 12 *
13 * "Start <activity>" -- Mark the start of the specified activity, 13 * "Start <activity>" -- Mark the start of the specified activity,
14 * such as "context switch". Nesting is permitted. 14 * such as "context switch". Nesting is permitted.
15 * "End <activity>" -- Mark the end of the specified activity. 15 * "End <activity>" -- Mark the end of the specified activity.
16 * 16 *
17 * An "@" character within "<activity>" is a comment character: Data 17 * An "@" character within "<activity>" is a comment character: Data
18 * reduction scripts will ignore the "@" and the remainder of the line. 18 * reduction scripts will ignore the "@" and the remainder of the line.
19 */ 19 */
20 TRACE_EVENT(rcu_utilization, 20 TRACE_EVENT(rcu_utilization,
21 21
22 TP_PROTO(char *s), 22 TP_PROTO(char *s),
23 23
24 TP_ARGS(s), 24 TP_ARGS(s),
25 25
26 TP_STRUCT__entry( 26 TP_STRUCT__entry(
27 __field(char *, s) 27 __field(char *, s)
28 ), 28 ),
29 29
30 TP_fast_assign( 30 TP_fast_assign(
31 __entry->s = s; 31 __entry->s = s;
32 ), 32 ),
33 33
34 TP_printk("%s", __entry->s) 34 TP_printk("%s", __entry->s)
35 ); 35 );
36 36
37 #ifdef CONFIG_RCU_TRACE 37 #ifdef CONFIG_RCU_TRACE
38 38
39 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) 39 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
40 40
41 /* 41 /*
42 * Tracepoint for grace-period events: starting and ending a grace 42 * Tracepoint for grace-period events: starting and ending a grace
43 * period ("start" and "end", respectively), a CPU noting the start 43 * period ("start" and "end", respectively), a CPU noting the start
44 * of a new grace period or the end of an old grace period ("cpustart" 44 * of a new grace period or the end of an old grace period ("cpustart"
45 * and "cpuend", respectively), a CPU passing through a quiescent 45 * and "cpuend", respectively), a CPU passing through a quiescent
46 * state ("cpuqs"), a CPU coming online or going offline ("cpuonl" 46 * state ("cpuqs"), a CPU coming online or going offline ("cpuonl"
47 * and "cpuofl", respectively), and a CPU being kicked for being too 47 * and "cpuofl", respectively), and a CPU being kicked for being too
48 * long in dyntick-idle mode ("kick"). 48 * long in dyntick-idle mode ("kick").
49 */ 49 */
50 TRACE_EVENT(rcu_grace_period, 50 TRACE_EVENT(rcu_grace_period,
51 51
52 TP_PROTO(char *rcuname, unsigned long gpnum, char *gpevent), 52 TP_PROTO(char *rcuname, unsigned long gpnum, char *gpevent),
53 53
54 TP_ARGS(rcuname, gpnum, gpevent), 54 TP_ARGS(rcuname, gpnum, gpevent),
55 55
56 TP_STRUCT__entry( 56 TP_STRUCT__entry(
57 __field(char *, rcuname) 57 __field(char *, rcuname)
58 __field(unsigned long, gpnum) 58 __field(unsigned long, gpnum)
59 __field(char *, gpevent) 59 __field(char *, gpevent)
60 ), 60 ),
61 61
62 TP_fast_assign( 62 TP_fast_assign(
63 __entry->rcuname = rcuname; 63 __entry->rcuname = rcuname;
64 __entry->gpnum = gpnum; 64 __entry->gpnum = gpnum;
65 __entry->gpevent = gpevent; 65 __entry->gpevent = gpevent;
66 ), 66 ),
67 67
68 TP_printk("%s %lu %s", 68 TP_printk("%s %lu %s",
69 __entry->rcuname, __entry->gpnum, __entry->gpevent) 69 __entry->rcuname, __entry->gpnum, __entry->gpevent)
70 ); 70 );
71 71
72 /* 72 /*
73 * Tracepoint for grace-period-initialization events. These are 73 * Tracepoint for grace-period-initialization events. These are
74 * distinguished by the type of RCU, the new grace-period number, the 74 * distinguished by the type of RCU, the new grace-period number, the
75 * rcu_node structure level, the starting and ending CPU covered by the 75 * rcu_node structure level, the starting and ending CPU covered by the
76 * rcu_node structure, and the mask of CPUs that will be waited for. 76 * rcu_node structure, and the mask of CPUs that will be waited for.
77 * All but the type of RCU are extracted from the rcu_node structure. 77 * All but the type of RCU are extracted from the rcu_node structure.
78 */ 78 */
79 TRACE_EVENT(rcu_grace_period_init, 79 TRACE_EVENT(rcu_grace_period_init,
80 80
81 TP_PROTO(char *rcuname, unsigned long gpnum, u8 level, 81 TP_PROTO(char *rcuname, unsigned long gpnum, u8 level,
82 int grplo, int grphi, unsigned long qsmask), 82 int grplo, int grphi, unsigned long qsmask),
83 83
84 TP_ARGS(rcuname, gpnum, level, grplo, grphi, qsmask), 84 TP_ARGS(rcuname, gpnum, level, grplo, grphi, qsmask),
85 85
86 TP_STRUCT__entry( 86 TP_STRUCT__entry(
87 __field(char *, rcuname) 87 __field(char *, rcuname)
88 __field(unsigned long, gpnum) 88 __field(unsigned long, gpnum)
89 __field(u8, level) 89 __field(u8, level)
90 __field(int, grplo) 90 __field(int, grplo)
91 __field(int, grphi) 91 __field(int, grphi)
92 __field(unsigned long, qsmask) 92 __field(unsigned long, qsmask)
93 ), 93 ),
94 94
95 TP_fast_assign( 95 TP_fast_assign(
96 __entry->rcuname = rcuname; 96 __entry->rcuname = rcuname;
97 __entry->gpnum = gpnum; 97 __entry->gpnum = gpnum;
98 __entry->level = level; 98 __entry->level = level;
99 __entry->grplo = grplo; 99 __entry->grplo = grplo;
100 __entry->grphi = grphi; 100 __entry->grphi = grphi;
101 __entry->qsmask = qsmask; 101 __entry->qsmask = qsmask;
102 ), 102 ),
103 103
104 TP_printk("%s %lu %u %d %d %lx", 104 TP_printk("%s %lu %u %d %d %lx",
105 __entry->rcuname, __entry->gpnum, __entry->level, 105 __entry->rcuname, __entry->gpnum, __entry->level,
106 __entry->grplo, __entry->grphi, __entry->qsmask) 106 __entry->grplo, __entry->grphi, __entry->qsmask)
107 ); 107 );
108 108
109 /* 109 /*
110 * Tracepoint for tasks blocking within preemptible-RCU read-side 110 * Tracepoint for tasks blocking within preemptible-RCU read-side
111 * critical sections. Track the type of RCU (which one day might 111 * critical sections. Track the type of RCU (which one day might
112 * include SRCU), the grace-period number that the task is blocking 112 * include SRCU), the grace-period number that the task is blocking
113 * (the current or the next), and the task's PID. 113 * (the current or the next), and the task's PID.
114 */ 114 */
115 TRACE_EVENT(rcu_preempt_task, 115 TRACE_EVENT(rcu_preempt_task,
116 116
117 TP_PROTO(char *rcuname, int pid, unsigned long gpnum), 117 TP_PROTO(char *rcuname, int pid, unsigned long gpnum),
118 118
119 TP_ARGS(rcuname, pid, gpnum), 119 TP_ARGS(rcuname, pid, gpnum),
120 120
121 TP_STRUCT__entry( 121 TP_STRUCT__entry(
122 __field(char *, rcuname) 122 __field(char *, rcuname)
123 __field(unsigned long, gpnum) 123 __field(unsigned long, gpnum)
124 __field(int, pid) 124 __field(int, pid)
125 ), 125 ),
126 126
127 TP_fast_assign( 127 TP_fast_assign(
128 __entry->rcuname = rcuname; 128 __entry->rcuname = rcuname;
129 __entry->gpnum = gpnum; 129 __entry->gpnum = gpnum;
130 __entry->pid = pid; 130 __entry->pid = pid;
131 ), 131 ),
132 132
133 TP_printk("%s %lu %d", 133 TP_printk("%s %lu %d",
134 __entry->rcuname, __entry->gpnum, __entry->pid) 134 __entry->rcuname, __entry->gpnum, __entry->pid)
135 ); 135 );
136 136
137 /* 137 /*
138 * Tracepoint for tasks that blocked within a given preemptible-RCU 138 * Tracepoint for tasks that blocked within a given preemptible-RCU
139 * read-side critical section exiting that critical section. Track the 139 * read-side critical section exiting that critical section. Track the
140 * type of RCU (which one day might include SRCU) and the task's PID. 140 * type of RCU (which one day might include SRCU) and the task's PID.
141 */ 141 */
142 TRACE_EVENT(rcu_unlock_preempted_task, 142 TRACE_EVENT(rcu_unlock_preempted_task,
143 143
144 TP_PROTO(char *rcuname, unsigned long gpnum, int pid), 144 TP_PROTO(char *rcuname, unsigned long gpnum, int pid),
145 145
146 TP_ARGS(rcuname, gpnum, pid), 146 TP_ARGS(rcuname, gpnum, pid),
147 147
148 TP_STRUCT__entry( 148 TP_STRUCT__entry(
149 __field(char *, rcuname) 149 __field(char *, rcuname)
150 __field(unsigned long, gpnum) 150 __field(unsigned long, gpnum)
151 __field(int, pid) 151 __field(int, pid)
152 ), 152 ),
153 153
154 TP_fast_assign( 154 TP_fast_assign(
155 __entry->rcuname = rcuname; 155 __entry->rcuname = rcuname;
156 __entry->gpnum = gpnum; 156 __entry->gpnum = gpnum;
157 __entry->pid = pid; 157 __entry->pid = pid;
158 ), 158 ),
159 159
160 TP_printk("%s %lu %d", __entry->rcuname, __entry->gpnum, __entry->pid) 160 TP_printk("%s %lu %d", __entry->rcuname, __entry->gpnum, __entry->pid)
161 ); 161 );
162 162
163 /* 163 /*
164 * Tracepoint for quiescent-state-reporting events. These are 164 * Tracepoint for quiescent-state-reporting events. These are
165 * distinguished by the type of RCU, the grace-period number, the 165 * distinguished by the type of RCU, the grace-period number, the
166 * mask of quiescent lower-level entities, the rcu_node structure level, 166 * mask of quiescent lower-level entities, the rcu_node structure level,
167 * the starting and ending CPU covered by the rcu_node structure, and 167 * the starting and ending CPU covered by the rcu_node structure, and
168 * whether there are any blocked tasks blocking the current grace period. 168 * whether there are any blocked tasks blocking the current grace period.
169 * All but the type of RCU are extracted from the rcu_node structure. 169 * All but the type of RCU are extracted from the rcu_node structure.
170 */ 170 */
171 TRACE_EVENT(rcu_quiescent_state_report, 171 TRACE_EVENT(rcu_quiescent_state_report,
172 172
173 TP_PROTO(char *rcuname, unsigned long gpnum, 173 TP_PROTO(char *rcuname, unsigned long gpnum,
174 unsigned long mask, unsigned long qsmask, 174 unsigned long mask, unsigned long qsmask,
175 u8 level, int grplo, int grphi, int gp_tasks), 175 u8 level, int grplo, int grphi, int gp_tasks),
176 176
177 TP_ARGS(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks), 177 TP_ARGS(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks),
178 178
179 TP_STRUCT__entry( 179 TP_STRUCT__entry(
180 __field(char *, rcuname) 180 __field(char *, rcuname)
181 __field(unsigned long, gpnum) 181 __field(unsigned long, gpnum)
182 __field(unsigned long, mask) 182 __field(unsigned long, mask)
183 __field(unsigned long, qsmask) 183 __field(unsigned long, qsmask)
184 __field(u8, level) 184 __field(u8, level)
185 __field(int, grplo) 185 __field(int, grplo)
186 __field(int, grphi) 186 __field(int, grphi)
187 __field(u8, gp_tasks) 187 __field(u8, gp_tasks)
188 ), 188 ),
189 189
190 TP_fast_assign( 190 TP_fast_assign(
191 __entry->rcuname = rcuname; 191 __entry->rcuname = rcuname;
192 __entry->gpnum = gpnum; 192 __entry->gpnum = gpnum;
193 __entry->mask = mask; 193 __entry->mask = mask;
194 __entry->qsmask = qsmask; 194 __entry->qsmask = qsmask;
195 __entry->level = level; 195 __entry->level = level;
196 __entry->grplo = grplo; 196 __entry->grplo = grplo;
197 __entry->grphi = grphi; 197 __entry->grphi = grphi;
198 __entry->gp_tasks = gp_tasks; 198 __entry->gp_tasks = gp_tasks;
199 ), 199 ),
200 200
201 TP_printk("%s %lu %lx>%lx %u %d %d %u", 201 TP_printk("%s %lu %lx>%lx %u %d %d %u",
202 __entry->rcuname, __entry->gpnum, 202 __entry->rcuname, __entry->gpnum,
203 __entry->mask, __entry->qsmask, __entry->level, 203 __entry->mask, __entry->qsmask, __entry->level,
204 __entry->grplo, __entry->grphi, __entry->gp_tasks) 204 __entry->grplo, __entry->grphi, __entry->gp_tasks)
205 ); 205 );
206 206
207 /* 207 /*
208 * Tracepoint for quiescent states detected by force_quiescent_state(). 208 * Tracepoint for quiescent states detected by force_quiescent_state().
209 * These trace events include the type of RCU, the grace-period number 209 * These trace events include the type of RCU, the grace-period number
210 * that was blocked by the CPU, the CPU itself, and the type of quiescent 210 * that was blocked by the CPU, the CPU itself, and the type of quiescent
211 * state, which can be "dti" for dyntick-idle mode, "ofl" for CPU offline, 211 * state, which can be "dti" for dyntick-idle mode, "ofl" for CPU offline,
212 * or "kick" when kicking a CPU that has been in dyntick-idle mode for 212 * or "kick" when kicking a CPU that has been in dyntick-idle mode for
213 * too long. 213 * too long.
214 */ 214 */
215 TRACE_EVENT(rcu_fqs, 215 TRACE_EVENT(rcu_fqs,
216 216
217 TP_PROTO(char *rcuname, unsigned long gpnum, int cpu, char *qsevent), 217 TP_PROTO(char *rcuname, unsigned long gpnum, int cpu, char *qsevent),
218 218
219 TP_ARGS(rcuname, gpnum, cpu, qsevent), 219 TP_ARGS(rcuname, gpnum, cpu, qsevent),
220 220
221 TP_STRUCT__entry( 221 TP_STRUCT__entry(
222 __field(char *, rcuname) 222 __field(char *, rcuname)
223 __field(unsigned long, gpnum) 223 __field(unsigned long, gpnum)
224 __field(int, cpu) 224 __field(int, cpu)
225 __field(char *, qsevent) 225 __field(char *, qsevent)
226 ), 226 ),
227 227
228 TP_fast_assign( 228 TP_fast_assign(
229 __entry->rcuname = rcuname; 229 __entry->rcuname = rcuname;
230 __entry->gpnum = gpnum; 230 __entry->gpnum = gpnum;
231 __entry->cpu = cpu; 231 __entry->cpu = cpu;
232 __entry->qsevent = qsevent; 232 __entry->qsevent = qsevent;
233 ), 233 ),
234 234
235 TP_printk("%s %lu %d %s", 235 TP_printk("%s %lu %d %s",
236 __entry->rcuname, __entry->gpnum, 236 __entry->rcuname, __entry->gpnum,
237 __entry->cpu, __entry->qsevent) 237 __entry->cpu, __entry->qsevent)
238 ); 238 );
239 239
240 #endif /* #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) */ 240 #endif /* #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) */
241 241
242 /* 242 /*
243 * Tracepoint for dyntick-idle entry/exit events. These take a string 243 * Tracepoint for dyntick-idle entry/exit events. These take a string
244 * as argument: "Start" for entering dyntick-idle mode, "End" for 244 * as argument: "Start" for entering dyntick-idle mode, "End" for
245 * leaving it, "--=" for events moving towards idle, and "++=" for events 245 * leaving it, "--=" for events moving towards idle, and "++=" for events
246 * moving away from idle. "Error on entry: not idle task" and "Error on 246 * moving away from idle. "Error on entry: not idle task" and "Error on
247 * exit: not idle task" indicate that a non-idle task is erroneously 247 * exit: not idle task" indicate that a non-idle task is erroneously
248 * toying with the idle loop. 248 * toying with the idle loop.
249 * 249 *
250 * These events also take a pair of numbers, which indicate the nesting 250 * These events also take a pair of numbers, which indicate the nesting
251 * depth before and after the event of interest. Note that task-related 251 * depth before and after the event of interest. Note that task-related
252 * events use the upper bits of each number, while interrupt-related 252 * events use the upper bits of each number, while interrupt-related
253 * events use the lower bits. 253 * events use the lower bits.
254 */ 254 */
255 TRACE_EVENT(rcu_dyntick, 255 TRACE_EVENT(rcu_dyntick,
256 256
257 TP_PROTO(char *polarity, long long oldnesting, long long newnesting), 257 TP_PROTO(char *polarity, long long oldnesting, long long newnesting),
258 258
259 TP_ARGS(polarity, oldnesting, newnesting), 259 TP_ARGS(polarity, oldnesting, newnesting),
260 260
261 TP_STRUCT__entry( 261 TP_STRUCT__entry(
262 __field(char *, polarity) 262 __field(char *, polarity)
263 __field(long long, oldnesting) 263 __field(long long, oldnesting)
264 __field(long long, newnesting) 264 __field(long long, newnesting)
265 ), 265 ),
266 266
267 TP_fast_assign( 267 TP_fast_assign(
268 __entry->polarity = polarity; 268 __entry->polarity = polarity;
269 __entry->oldnesting = oldnesting; 269 __entry->oldnesting = oldnesting;
270 __entry->newnesting = newnesting; 270 __entry->newnesting = newnesting;
271 ), 271 ),
272 272
273 TP_printk("%s %llx %llx", __entry->polarity, 273 TP_printk("%s %llx %llx", __entry->polarity,
274 __entry->oldnesting, __entry->newnesting) 274 __entry->oldnesting, __entry->newnesting)
275 ); 275 );
276 276
277 /* 277 /*
278 * Tracepoint for RCU preparation for idle, the goal being to get RCU 278 * Tracepoint for RCU preparation for idle, the goal being to get RCU
279 * processing done so that the current CPU can shut off its scheduling 279 * processing done so that the current CPU can shut off its scheduling
280 * clock and enter dyntick-idle mode. One way to accomplish this is 280 * clock and enter dyntick-idle mode. One way to accomplish this is
281 * to drain all RCU callbacks from this CPU, and the other is to have 281 * to drain all RCU callbacks from this CPU, and the other is to have
282 * done everything RCU requires for the current grace period. In this 282 * done everything RCU requires for the current grace period. In this
283 * latter case, the CPU will be awakened at the end of the current grace 283 * latter case, the CPU will be awakened at the end of the current grace
284 * period in order to process the remainder of its callbacks. 284 * period in order to process the remainder of its callbacks.
285 * 285 *
286 * These tracepoints take a string as argument: 286 * These tracepoints take a string as argument:
287 * 287 *
288 * "No callbacks": Nothing to do, no callbacks on this CPU. 288 * "No callbacks": Nothing to do, no callbacks on this CPU.
289 * "In holdoff": Nothing to do, holding off after unsuccessful attempt. 289 * "In holdoff": Nothing to do, holding off after unsuccessful attempt.
290 * "Begin holdoff": Attempt failed, don't retry until next jiffy. 290 * "Begin holdoff": Attempt failed, don't retry until next jiffy.
291 * "Dyntick with callbacks": Entering dyntick-idle despite callbacks.
291 * "More callbacks": Still more callbacks, try again to clear them out. 292 * "More callbacks": Still more callbacks, try again to clear them out.
292 * "Callbacks drained": All callbacks processed, off to dyntick idle! 293 * "Callbacks drained": All callbacks processed, off to dyntick idle!
293 * "CPU awakened at GP end": 294 * "Timer": Timer fired to cause CPU to continue processing callbacks.
294 */ 295 */
295 TRACE_EVENT(rcu_prep_idle, 296 TRACE_EVENT(rcu_prep_idle,
296 297
297 TP_PROTO(char *reason), 298 TP_PROTO(char *reason),
298 299
299 TP_ARGS(reason), 300 TP_ARGS(reason),
300 301
301 TP_STRUCT__entry( 302 TP_STRUCT__entry(
302 __field(char *, reason) 303 __field(char *, reason)
303 ), 304 ),
304 305
305 TP_fast_assign( 306 TP_fast_assign(
306 __entry->reason = reason; 307 __entry->reason = reason;
307 ), 308 ),
308 309
309 TP_printk("%s", __entry->reason) 310 TP_printk("%s", __entry->reason)
310 ); 311 );
311 312
312 /* 313 /*
313 * Tracepoint for the registration of a single RCU callback function. 314 * Tracepoint for the registration of a single RCU callback function.
314 * The first argument is the type of RCU, the second argument is 315 * The first argument is the type of RCU, the second argument is
315 * a pointer to the RCU callback itself, and the third element is the 316 * a pointer to the RCU callback itself, and the third element is the
316 * new RCU callback queue length for the current CPU. 317 * new RCU callback queue length for the current CPU.
317 */ 318 */
318 TRACE_EVENT(rcu_callback, 319 TRACE_EVENT(rcu_callback,
319 320
320 TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen), 321 TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen),
321 322
322 TP_ARGS(rcuname, rhp, qlen), 323 TP_ARGS(rcuname, rhp, qlen),
323 324
324 TP_STRUCT__entry( 325 TP_STRUCT__entry(
325 __field(char *, rcuname) 326 __field(char *, rcuname)
326 __field(void *, rhp) 327 __field(void *, rhp)
327 __field(void *, func) 328 __field(void *, func)
328 __field(long, qlen) 329 __field(long, qlen)
329 ), 330 ),
330 331
331 TP_fast_assign( 332 TP_fast_assign(
332 __entry->rcuname = rcuname; 333 __entry->rcuname = rcuname;
333 __entry->rhp = rhp; 334 __entry->rhp = rhp;
334 __entry->func = rhp->func; 335 __entry->func = rhp->func;
335 __entry->qlen = qlen; 336 __entry->qlen = qlen;
336 ), 337 ),
337 338
338 TP_printk("%s rhp=%p func=%pf %ld", 339 TP_printk("%s rhp=%p func=%pf %ld",
339 __entry->rcuname, __entry->rhp, __entry->func, __entry->qlen) 340 __entry->rcuname, __entry->rhp, __entry->func, __entry->qlen)
340 ); 341 );
341 342
342 /* 343 /*
343 * Tracepoint for the registration of a single RCU callback of the special 344 * Tracepoint for the registration of a single RCU callback of the special
344 * kfree() form. The first argument is the RCU type, the second argument 345 * kfree() form. The first argument is the RCU type, the second argument
345 * is a pointer to the RCU callback, the third argument is the offset 346 * is a pointer to the RCU callback, the third argument is the offset
346 * of the callback within the enclosing RCU-protected data structure, 347 * of the callback within the enclosing RCU-protected data structure,
347 * and the fourth argument is the new RCU callback queue length for the 348 * and the fourth argument is the new RCU callback queue length for the
348 * current CPU. 349 * current CPU.
349 */ 350 */
350 TRACE_EVENT(rcu_kfree_callback, 351 TRACE_EVENT(rcu_kfree_callback,
351 352
352 TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset, 353 TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset,
353 long qlen), 354 long qlen),
354 355
355 TP_ARGS(rcuname, rhp, offset, qlen), 356 TP_ARGS(rcuname, rhp, offset, qlen),
356 357
357 TP_STRUCT__entry( 358 TP_STRUCT__entry(
358 __field(char *, rcuname) 359 __field(char *, rcuname)
359 __field(void *, rhp) 360 __field(void *, rhp)
360 __field(unsigned long, offset) 361 __field(unsigned long, offset)
361 __field(long, qlen) 362 __field(long, qlen)
362 ), 363 ),
363 364
364 TP_fast_assign( 365 TP_fast_assign(
365 __entry->rcuname = rcuname; 366 __entry->rcuname = rcuname;
366 __entry->rhp = rhp; 367 __entry->rhp = rhp;
367 __entry->offset = offset; 368 __entry->offset = offset;
368 __entry->qlen = qlen; 369 __entry->qlen = qlen;
369 ), 370 ),
370 371
371 TP_printk("%s rhp=%p func=%ld %ld", 372 TP_printk("%s rhp=%p func=%ld %ld",
372 __entry->rcuname, __entry->rhp, __entry->offset, 373 __entry->rcuname, __entry->rhp, __entry->offset,
373 __entry->qlen) 374 __entry->qlen)
374 ); 375 );
375 376
376 /* 377 /*
377 * Tracepoint for marking the beginning rcu_do_batch, performed to start 378 * Tracepoint for marking the beginning rcu_do_batch, performed to start
378 * RCU callback invocation. The first argument is the RCU flavor, 379 * RCU callback invocation. The first argument is the RCU flavor,
379 * the second is the total number of callbacks (including those that 380 * the second is the total number of callbacks (including those that
380 * are not yet ready to be invoked), and the third argument is the 381 * are not yet ready to be invoked), and the third argument is the
381 * current RCU-callback batch limit. 382 * current RCU-callback batch limit.
382 */ 383 */
383 TRACE_EVENT(rcu_batch_start, 384 TRACE_EVENT(rcu_batch_start,
384 385
385 TP_PROTO(char *rcuname, long qlen, int blimit), 386 TP_PROTO(char *rcuname, long qlen, int blimit),
386 387
387 TP_ARGS(rcuname, qlen, blimit), 388 TP_ARGS(rcuname, qlen, blimit),
388 389
389 TP_STRUCT__entry( 390 TP_STRUCT__entry(
390 __field(char *, rcuname) 391 __field(char *, rcuname)
391 __field(long, qlen) 392 __field(long, qlen)
392 __field(int, blimit) 393 __field(int, blimit)
393 ), 394 ),
394 395
395 TP_fast_assign( 396 TP_fast_assign(
396 __entry->rcuname = rcuname; 397 __entry->rcuname = rcuname;
397 __entry->qlen = qlen; 398 __entry->qlen = qlen;
398 __entry->blimit = blimit; 399 __entry->blimit = blimit;
399 ), 400 ),
400 401
401 TP_printk("%s CBs=%ld bl=%d", 402 TP_printk("%s CBs=%ld bl=%d",
402 __entry->rcuname, __entry->qlen, __entry->blimit) 403 __entry->rcuname, __entry->qlen, __entry->blimit)
403 ); 404 );
404 405
405 /* 406 /*
406 * Tracepoint for the invocation of a single RCU callback function. 407 * Tracepoint for the invocation of a single RCU callback function.
407 * The first argument is the type of RCU, and the second argument is 408 * The first argument is the type of RCU, and the second argument is
408 * a pointer to the RCU callback itself. 409 * a pointer to the RCU callback itself.
409 */ 410 */
410 TRACE_EVENT(rcu_invoke_callback, 411 TRACE_EVENT(rcu_invoke_callback,
411 412
412 TP_PROTO(char *rcuname, struct rcu_head *rhp), 413 TP_PROTO(char *rcuname, struct rcu_head *rhp),
413 414
414 TP_ARGS(rcuname, rhp), 415 TP_ARGS(rcuname, rhp),
415 416
416 TP_STRUCT__entry( 417 TP_STRUCT__entry(
417 __field(char *, rcuname) 418 __field(char *, rcuname)
418 __field(void *, rhp) 419 __field(void *, rhp)
419 __field(void *, func) 420 __field(void *, func)
420 ), 421 ),
421 422
422 TP_fast_assign( 423 TP_fast_assign(
423 __entry->rcuname = rcuname; 424 __entry->rcuname = rcuname;
424 __entry->rhp = rhp; 425 __entry->rhp = rhp;
425 __entry->func = rhp->func; 426 __entry->func = rhp->func;
426 ), 427 ),
427 428
428 TP_printk("%s rhp=%p func=%pf", 429 TP_printk("%s rhp=%p func=%pf",
429 __entry->rcuname, __entry->rhp, __entry->func) 430 __entry->rcuname, __entry->rhp, __entry->func)
430 ); 431 );
431 432
432 /* 433 /*
433 * Tracepoint for the invocation of a single RCU callback of the special 434 * Tracepoint for the invocation of a single RCU callback of the special
434 * kfree() form. The first argument is the RCU flavor, the second 435 * kfree() form. The first argument is the RCU flavor, the second
435 * argument is a pointer to the RCU callback, and the third argument 436 * argument is a pointer to the RCU callback, and the third argument
436 * is the offset of the callback within the enclosing RCU-protected 437 * is the offset of the callback within the enclosing RCU-protected
437 * data structure. 438 * data structure.
438 */ 439 */
439 TRACE_EVENT(rcu_invoke_kfree_callback, 440 TRACE_EVENT(rcu_invoke_kfree_callback,
440 441
441 TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset), 442 TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset),
442 443
443 TP_ARGS(rcuname, rhp, offset), 444 TP_ARGS(rcuname, rhp, offset),
444 445
445 TP_STRUCT__entry( 446 TP_STRUCT__entry(
446 __field(char *, rcuname) 447 __field(char *, rcuname)
447 __field(void *, rhp) 448 __field(void *, rhp)
448 __field(unsigned long, offset) 449 __field(unsigned long, offset)
449 ), 450 ),
450 451
451 TP_fast_assign( 452 TP_fast_assign(
452 __entry->rcuname = rcuname; 453 __entry->rcuname = rcuname;
453 __entry->rhp = rhp; 454 __entry->rhp = rhp;
454 __entry->offset = offset; 455 __entry->offset = offset;
455 ), 456 ),
456 457
457 TP_printk("%s rhp=%p func=%ld", 458 TP_printk("%s rhp=%p func=%ld",
458 __entry->rcuname, __entry->rhp, __entry->offset) 459 __entry->rcuname, __entry->rhp, __entry->offset)
459 ); 460 );
460 461
461 /* 462 /*
462 * Tracepoint for exiting rcu_do_batch after RCU callbacks have been 463 * Tracepoint for exiting rcu_do_batch after RCU callbacks have been
463 * invoked. The first argument is the name of the RCU flavor and 464 * invoked. The first argument is the name of the RCU flavor and
464 * the second argument is number of callbacks actually invoked. 465 * the second argument is number of callbacks actually invoked.
465 */ 466 */
466 TRACE_EVENT(rcu_batch_end, 467 TRACE_EVENT(rcu_batch_end,
467 468
468 TP_PROTO(char *rcuname, int callbacks_invoked), 469 TP_PROTO(char *rcuname, int callbacks_invoked),
469 470
470 TP_ARGS(rcuname, callbacks_invoked), 471 TP_ARGS(rcuname, callbacks_invoked),
471 472
472 TP_STRUCT__entry( 473 TP_STRUCT__entry(
473 __field(char *, rcuname) 474 __field(char *, rcuname)
474 __field(int, callbacks_invoked) 475 __field(int, callbacks_invoked)
475 ), 476 ),
476 477
477 TP_fast_assign( 478 TP_fast_assign(
478 __entry->rcuname = rcuname; 479 __entry->rcuname = rcuname;
479 __entry->callbacks_invoked = callbacks_invoked; 480 __entry->callbacks_invoked = callbacks_invoked;
480 ), 481 ),
481 482
482 TP_printk("%s CBs-invoked=%d", 483 TP_printk("%s CBs-invoked=%d",
483 __entry->rcuname, __entry->callbacks_invoked) 484 __entry->rcuname, __entry->callbacks_invoked)
484 ); 485 );
485 486
486 /* 487 /*
487 * Tracepoint for rcutorture readers. The first argument is the name 488 * Tracepoint for rcutorture readers. The first argument is the name
488 * of the RCU flavor from rcutorture's viewpoint and the second argument 489 * of the RCU flavor from rcutorture's viewpoint and the second argument
489 * is the callback address. 490 * is the callback address.
490 */ 491 */
491 TRACE_EVENT(rcu_torture_read, 492 TRACE_EVENT(rcu_torture_read,
492 493
493 TP_PROTO(char *rcutorturename, struct rcu_head *rhp), 494 TP_PROTO(char *rcutorturename, struct rcu_head *rhp),
494 495
495 TP_ARGS(rcutorturename, rhp), 496 TP_ARGS(rcutorturename, rhp),
496 497
497 TP_STRUCT__entry( 498 TP_STRUCT__entry(
498 __field(char *, rcutorturename) 499 __field(char *, rcutorturename)
499 __field(struct rcu_head *, rhp) 500 __field(struct rcu_head *, rhp)
500 ), 501 ),
501 502
502 TP_fast_assign( 503 TP_fast_assign(
503 __entry->rcutorturename = rcutorturename; 504 __entry->rcutorturename = rcutorturename;
504 __entry->rhp = rhp; 505 __entry->rhp = rhp;
505 ), 506 ),
506 507
507 TP_printk("%s torture read %p", 508 TP_printk("%s torture read %p",
508 __entry->rcutorturename, __entry->rhp) 509 __entry->rcutorturename, __entry->rhp)
509 ); 510 );
510 511
511 #else /* #ifdef CONFIG_RCU_TRACE */ 512 #else /* #ifdef CONFIG_RCU_TRACE */
512 513
513 #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) 514 #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
514 #define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, qsmask) do { } while (0) 515 #define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, qsmask) do { } while (0)
515 #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) 516 #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0)
516 #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) 517 #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
517 #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) 518 #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0)
518 #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) 519 #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0)
519 #define trace_rcu_dyntick(polarity, oldnesting, newnesting) do { } while (0) 520 #define trace_rcu_dyntick(polarity, oldnesting, newnesting) do { } while (0)
520 #define trace_rcu_prep_idle(reason) do { } while (0) 521 #define trace_rcu_prep_idle(reason) do { } while (0)
521 #define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) 522 #define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0)
522 #define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) 523 #define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0)
523 #define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) 524 #define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0)
524 #define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) 525 #define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0)
525 #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) 526 #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0)
526 #define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0) 527 #define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0)
527 #define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) 528 #define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
528 529
529 #endif /* #else #ifdef CONFIG_RCU_TRACE */ 530 #endif /* #else #ifdef CONFIG_RCU_TRACE */
530 531
531 #endif /* _TRACE_RCU_H */ 532 #endif /* _TRACE_RCU_H */
532 533
533 /* This part must be outside protection */ 534 /* This part must be outside protection */
534 #include <trace/define_trace.h> 535 #include <trace/define_trace.h>
535 536
1 /* 1 /*
2 * Read-Copy Update mechanism for mutual exclusion 2 * Read-Copy Update mechanism for mutual exclusion
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * 17 *
18 * Copyright IBM Corporation, 2008 18 * Copyright IBM Corporation, 2008
19 * 19 *
20 * Authors: Dipankar Sarma <dipankar@in.ibm.com> 20 * Authors: Dipankar Sarma <dipankar@in.ibm.com>
21 * Manfred Spraul <manfred@colorfullife.com> 21 * Manfred Spraul <manfred@colorfullife.com>
22 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version 22 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version
23 * 23 *
24 * Based on the original work by Paul McKenney <paulmck@us.ibm.com> 24 * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
25 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 25 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
26 * 26 *
27 * For detailed explanation of Read-Copy Update mechanism see - 27 * For detailed explanation of Read-Copy Update mechanism see -
28 * Documentation/RCU 28 * Documentation/RCU
29 */ 29 */
30 #include <linux/types.h> 30 #include <linux/types.h>
31 #include <linux/kernel.h> 31 #include <linux/kernel.h>
32 #include <linux/init.h> 32 #include <linux/init.h>
33 #include <linux/spinlock.h> 33 #include <linux/spinlock.h>
34 #include <linux/smp.h> 34 #include <linux/smp.h>
35 #include <linux/rcupdate.h> 35 #include <linux/rcupdate.h>
36 #include <linux/interrupt.h> 36 #include <linux/interrupt.h>
37 #include <linux/sched.h> 37 #include <linux/sched.h>
38 #include <linux/nmi.h> 38 #include <linux/nmi.h>
39 #include <linux/atomic.h> 39 #include <linux/atomic.h>
40 #include <linux/bitops.h> 40 #include <linux/bitops.h>
41 #include <linux/export.h> 41 #include <linux/export.h>
42 #include <linux/completion.h> 42 #include <linux/completion.h>
43 #include <linux/moduleparam.h> 43 #include <linux/moduleparam.h>
44 #include <linux/percpu.h> 44 #include <linux/percpu.h>
45 #include <linux/notifier.h> 45 #include <linux/notifier.h>
46 #include <linux/cpu.h> 46 #include <linux/cpu.h>
47 #include <linux/mutex.h> 47 #include <linux/mutex.h>
48 #include <linux/time.h> 48 #include <linux/time.h>
49 #include <linux/kernel_stat.h> 49 #include <linux/kernel_stat.h>
50 #include <linux/wait.h> 50 #include <linux/wait.h>
51 #include <linux/kthread.h> 51 #include <linux/kthread.h>
52 #include <linux/prefetch.h> 52 #include <linux/prefetch.h>
53 53
54 #include "rcutree.h" 54 #include "rcutree.h"
55 #include <trace/events/rcu.h> 55 #include <trace/events/rcu.h>
56 56
57 #include "rcu.h" 57 #include "rcu.h"
58 58
59 /* Data structures. */ 59 /* Data structures. */
60 60
61 static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; 61 static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
62 62
63 #define RCU_STATE_INITIALIZER(structname) { \ 63 #define RCU_STATE_INITIALIZER(structname) { \
64 .level = { &structname##_state.node[0] }, \ 64 .level = { &structname##_state.node[0] }, \
65 .levelcnt = { \ 65 .levelcnt = { \
66 NUM_RCU_LVL_0, /* root of hierarchy. */ \ 66 NUM_RCU_LVL_0, /* root of hierarchy. */ \
67 NUM_RCU_LVL_1, \ 67 NUM_RCU_LVL_1, \
68 NUM_RCU_LVL_2, \ 68 NUM_RCU_LVL_2, \
69 NUM_RCU_LVL_3, \ 69 NUM_RCU_LVL_3, \
70 NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ 70 NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \
71 }, \ 71 }, \
72 .fqs_state = RCU_GP_IDLE, \ 72 .fqs_state = RCU_GP_IDLE, \
73 .gpnum = -300, \ 73 .gpnum = -300, \
74 .completed = -300, \ 74 .completed = -300, \
75 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ 75 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \
76 .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ 76 .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \
77 .n_force_qs = 0, \ 77 .n_force_qs = 0, \
78 .n_force_qs_ngp = 0, \ 78 .n_force_qs_ngp = 0, \
79 .name = #structname, \ 79 .name = #structname, \
80 } 80 }
81 81
82 struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); 82 struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched);
83 DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); 83 DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
84 84
85 struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh); 85 struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh);
86 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 86 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
87 87
88 static struct rcu_state *rcu_state; 88 static struct rcu_state *rcu_state;
89 89
90 /* 90 /*
91 * The rcu_scheduler_active variable transitions from zero to one just 91 * The rcu_scheduler_active variable transitions from zero to one just
92 * before the first task is spawned. So when this variable is zero, RCU 92 * before the first task is spawned. So when this variable is zero, RCU
93 * can assume that there is but one task, allowing RCU to (for example) 93 * can assume that there is but one task, allowing RCU to (for example)
94 * optimized synchronize_sched() to a simple barrier(). When this variable 94 * optimized synchronize_sched() to a simple barrier(). When this variable
95 * is one, RCU must actually do all the hard work required to detect real 95 * is one, RCU must actually do all the hard work required to detect real
96 * grace periods. This variable is also used to suppress boot-time false 96 * grace periods. This variable is also used to suppress boot-time false
97 * positives from lockdep-RCU error checking. 97 * positives from lockdep-RCU error checking.
98 */ 98 */
99 int rcu_scheduler_active __read_mostly; 99 int rcu_scheduler_active __read_mostly;
100 EXPORT_SYMBOL_GPL(rcu_scheduler_active); 100 EXPORT_SYMBOL_GPL(rcu_scheduler_active);
101 101
102 /* 102 /*
103 * The rcu_scheduler_fully_active variable transitions from zero to one 103 * The rcu_scheduler_fully_active variable transitions from zero to one
104 * during the early_initcall() processing, which is after the scheduler 104 * during the early_initcall() processing, which is after the scheduler
105 * is capable of creating new tasks. So RCU processing (for example, 105 * is capable of creating new tasks. So RCU processing (for example,
106 * creating tasks for RCU priority boosting) must be delayed until after 106 * creating tasks for RCU priority boosting) must be delayed until after
107 * rcu_scheduler_fully_active transitions from zero to one. We also 107 * rcu_scheduler_fully_active transitions from zero to one. We also
108 * currently delay invocation of any RCU callbacks until after this point. 108 * currently delay invocation of any RCU callbacks until after this point.
109 * 109 *
110 * It might later prove better for people registering RCU callbacks during 110 * It might later prove better for people registering RCU callbacks during
111 * early boot to take responsibility for these callbacks, but one step at 111 * early boot to take responsibility for these callbacks, but one step at
112 * a time. 112 * a time.
113 */ 113 */
114 static int rcu_scheduler_fully_active __read_mostly; 114 static int rcu_scheduler_fully_active __read_mostly;
115 115
116 #ifdef CONFIG_RCU_BOOST 116 #ifdef CONFIG_RCU_BOOST
117 117
118 /* 118 /*
119 * Control variables for per-CPU and per-rcu_node kthreads. These 119 * Control variables for per-CPU and per-rcu_node kthreads. These
120 * handle all flavors of RCU. 120 * handle all flavors of RCU.
121 */ 121 */
122 static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); 122 static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
123 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); 123 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
124 DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); 124 DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu);
125 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); 125 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
126 DEFINE_PER_CPU(char, rcu_cpu_has_work); 126 DEFINE_PER_CPU(char, rcu_cpu_has_work);
127 127
128 #endif /* #ifdef CONFIG_RCU_BOOST */ 128 #endif /* #ifdef CONFIG_RCU_BOOST */
129 129
130 static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); 130 static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
131 static void invoke_rcu_core(void); 131 static void invoke_rcu_core(void);
132 static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); 132 static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
133 133
134 /* 134 /*
135 * Track the rcutorture test sequence number and the update version 135 * Track the rcutorture test sequence number and the update version
136 * number within a given test. The rcutorture_testseq is incremented 136 * number within a given test. The rcutorture_testseq is incremented
137 * on every rcutorture module load and unload, so has an odd value 137 * on every rcutorture module load and unload, so has an odd value
138 * when a test is running. The rcutorture_vernum is set to zero 138 * when a test is running. The rcutorture_vernum is set to zero
139 * when rcutorture starts and is incremented on each rcutorture update. 139 * when rcutorture starts and is incremented on each rcutorture update.
140 * These variables enable correlating rcutorture output with the 140 * These variables enable correlating rcutorture output with the
141 * RCU tracing information. 141 * RCU tracing information.
142 */ 142 */
143 unsigned long rcutorture_testseq; 143 unsigned long rcutorture_testseq;
144 unsigned long rcutorture_vernum; 144 unsigned long rcutorture_vernum;
145 145
146 /* 146 /*
147 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s 147 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
148 * permit this function to be invoked without holding the root rcu_node 148 * permit this function to be invoked without holding the root rcu_node
149 * structure's ->lock, but of course results can be subject to change. 149 * structure's ->lock, but of course results can be subject to change.
150 */ 150 */
151 static int rcu_gp_in_progress(struct rcu_state *rsp) 151 static int rcu_gp_in_progress(struct rcu_state *rsp)
152 { 152 {
153 return ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum); 153 return ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum);
154 } 154 }
155 155
156 /* 156 /*
157 * Note a quiescent state. Because we do not need to know 157 * Note a quiescent state. Because we do not need to know
158 * how many quiescent states passed, just if there was at least 158 * how many quiescent states passed, just if there was at least
159 * one since the start of the grace period, this just sets a flag. 159 * one since the start of the grace period, this just sets a flag.
160 * The caller must have disabled preemption. 160 * The caller must have disabled preemption.
161 */ 161 */
162 void rcu_sched_qs(int cpu) 162 void rcu_sched_qs(int cpu)
163 { 163 {
164 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); 164 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
165 165
166 rdp->passed_quiesce_gpnum = rdp->gpnum; 166 rdp->passed_quiesce_gpnum = rdp->gpnum;
167 barrier(); 167 barrier();
168 if (rdp->passed_quiesce == 0) 168 if (rdp->passed_quiesce == 0)
169 trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs"); 169 trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs");
170 rdp->passed_quiesce = 1; 170 rdp->passed_quiesce = 1;
171 } 171 }
172 172
173 void rcu_bh_qs(int cpu) 173 void rcu_bh_qs(int cpu)
174 { 174 {
175 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); 175 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
176 176
177 rdp->passed_quiesce_gpnum = rdp->gpnum; 177 rdp->passed_quiesce_gpnum = rdp->gpnum;
178 barrier(); 178 barrier();
179 if (rdp->passed_quiesce == 0) 179 if (rdp->passed_quiesce == 0)
180 trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs"); 180 trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs");
181 rdp->passed_quiesce = 1; 181 rdp->passed_quiesce = 1;
182 } 182 }
183 183
184 /* 184 /*
185 * Note a context switch. This is a quiescent state for RCU-sched, 185 * Note a context switch. This is a quiescent state for RCU-sched,
186 * and requires special handling for preemptible RCU. 186 * and requires special handling for preemptible RCU.
187 * The caller must have disabled preemption. 187 * The caller must have disabled preemption.
188 */ 188 */
189 void rcu_note_context_switch(int cpu) 189 void rcu_note_context_switch(int cpu)
190 { 190 {
191 trace_rcu_utilization("Start context switch"); 191 trace_rcu_utilization("Start context switch");
192 rcu_sched_qs(cpu); 192 rcu_sched_qs(cpu);
193 rcu_preempt_note_context_switch(cpu); 193 rcu_preempt_note_context_switch(cpu);
194 trace_rcu_utilization("End context switch"); 194 trace_rcu_utilization("End context switch");
195 } 195 }
196 EXPORT_SYMBOL_GPL(rcu_note_context_switch); 196 EXPORT_SYMBOL_GPL(rcu_note_context_switch);
197 197
198 DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 198 DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
199 .dynticks_nesting = DYNTICK_TASK_NESTING, 199 .dynticks_nesting = DYNTICK_TASK_NESTING,
200 .dynticks = ATOMIC_INIT(1), 200 .dynticks = ATOMIC_INIT(1),
201 }; 201 };
202 202
203 static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ 203 static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */
204 static int qhimark = 10000; /* If this many pending, ignore blimit. */ 204 static int qhimark = 10000; /* If this many pending, ignore blimit. */
205 static int qlowmark = 100; /* Once only this many pending, use blimit. */ 205 static int qlowmark = 100; /* Once only this many pending, use blimit. */
206 206
207 module_param(blimit, int, 0); 207 module_param(blimit, int, 0);
208 module_param(qhimark, int, 0); 208 module_param(qhimark, int, 0);
209 module_param(qlowmark, int, 0); 209 module_param(qlowmark, int, 0);
210 210
211 int rcu_cpu_stall_suppress __read_mostly; 211 int rcu_cpu_stall_suppress __read_mostly;
212 module_param(rcu_cpu_stall_suppress, int, 0644); 212 module_param(rcu_cpu_stall_suppress, int, 0644);
213 213
214 static void force_quiescent_state(struct rcu_state *rsp, int relaxed); 214 static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
215 static int rcu_pending(int cpu); 215 static int rcu_pending(int cpu);
216 216
217 /* 217 /*
218 * Return the number of RCU-sched batches processed thus far for debug & stats. 218 * Return the number of RCU-sched batches processed thus far for debug & stats.
219 */ 219 */
220 long rcu_batches_completed_sched(void) 220 long rcu_batches_completed_sched(void)
221 { 221 {
222 return rcu_sched_state.completed; 222 return rcu_sched_state.completed;
223 } 223 }
224 EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); 224 EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
225 225
226 /* 226 /*
227 * Return the number of RCU BH batches processed thus far for debug & stats. 227 * Return the number of RCU BH batches processed thus far for debug & stats.
228 */ 228 */
229 long rcu_batches_completed_bh(void) 229 long rcu_batches_completed_bh(void)
230 { 230 {
231 return rcu_bh_state.completed; 231 return rcu_bh_state.completed;
232 } 232 }
233 EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); 233 EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
234 234
235 /* 235 /*
236 * Force a quiescent state for RCU BH. 236 * Force a quiescent state for RCU BH.
237 */ 237 */
238 void rcu_bh_force_quiescent_state(void) 238 void rcu_bh_force_quiescent_state(void)
239 { 239 {
240 force_quiescent_state(&rcu_bh_state, 0); 240 force_quiescent_state(&rcu_bh_state, 0);
241 } 241 }
242 EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); 242 EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
243 243
244 /* 244 /*
245 * Record the number of times rcutorture tests have been initiated and 245 * Record the number of times rcutorture tests have been initiated and
246 * terminated. This information allows the debugfs tracing stats to be 246 * terminated. This information allows the debugfs tracing stats to be
247 * correlated to the rcutorture messages, even when the rcutorture module 247 * correlated to the rcutorture messages, even when the rcutorture module
248 * is being repeatedly loaded and unloaded. In other words, we cannot 248 * is being repeatedly loaded and unloaded. In other words, we cannot
249 * store this state in rcutorture itself. 249 * store this state in rcutorture itself.
250 */ 250 */
251 void rcutorture_record_test_transition(void) 251 void rcutorture_record_test_transition(void)
252 { 252 {
253 rcutorture_testseq++; 253 rcutorture_testseq++;
254 rcutorture_vernum = 0; 254 rcutorture_vernum = 0;
255 } 255 }
256 EXPORT_SYMBOL_GPL(rcutorture_record_test_transition); 256 EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
257 257
258 /* 258 /*
259 * Record the number of writer passes through the current rcutorture test. 259 * Record the number of writer passes through the current rcutorture test.
260 * This is also used to correlate debugfs tracing stats with the rcutorture 260 * This is also used to correlate debugfs tracing stats with the rcutorture
261 * messages. 261 * messages.
262 */ 262 */
263 void rcutorture_record_progress(unsigned long vernum) 263 void rcutorture_record_progress(unsigned long vernum)
264 { 264 {
265 rcutorture_vernum++; 265 rcutorture_vernum++;
266 } 266 }
267 EXPORT_SYMBOL_GPL(rcutorture_record_progress); 267 EXPORT_SYMBOL_GPL(rcutorture_record_progress);
268 268
269 /* 269 /*
270 * Force a quiescent state for RCU-sched. 270 * Force a quiescent state for RCU-sched.
271 */ 271 */
272 void rcu_sched_force_quiescent_state(void) 272 void rcu_sched_force_quiescent_state(void)
273 { 273 {
274 force_quiescent_state(&rcu_sched_state, 0); 274 force_quiescent_state(&rcu_sched_state, 0);
275 } 275 }
276 EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); 276 EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
277 277
278 /* 278 /*
279 * Does the CPU have callbacks ready to be invoked? 279 * Does the CPU have callbacks ready to be invoked?
280 */ 280 */
281 static int 281 static int
282 cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp) 282 cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
283 { 283 {
284 return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]; 284 return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL];
285 } 285 }
286 286
287 /* 287 /*
288 * Does the current CPU require a yet-as-unscheduled grace period? 288 * Does the current CPU require a yet-as-unscheduled grace period?
289 */ 289 */
290 static int 290 static int
291 cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) 291 cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
292 { 292 {
293 return *rdp->nxttail[RCU_DONE_TAIL] && !rcu_gp_in_progress(rsp); 293 return *rdp->nxttail[RCU_DONE_TAIL] && !rcu_gp_in_progress(rsp);
294 } 294 }
295 295
296 /* 296 /*
297 * Return the root node of the specified rcu_state structure. 297 * Return the root node of the specified rcu_state structure.
298 */ 298 */
299 static struct rcu_node *rcu_get_root(struct rcu_state *rsp) 299 static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
300 { 300 {
301 return &rsp->node[0]; 301 return &rsp->node[0];
302 } 302 }
303 303
304 #ifdef CONFIG_SMP 304 #ifdef CONFIG_SMP
305 305
306 /* 306 /*
307 * If the specified CPU is offline, tell the caller that it is in 307 * If the specified CPU is offline, tell the caller that it is in
308 * a quiescent state. Otherwise, whack it with a reschedule IPI. 308 * a quiescent state. Otherwise, whack it with a reschedule IPI.
309 * Grace periods can end up waiting on an offline CPU when that 309 * Grace periods can end up waiting on an offline CPU when that
310 * CPU is in the process of coming online -- it will be added to the 310 * CPU is in the process of coming online -- it will be added to the
311 * rcu_node bitmasks before it actually makes it online. The same thing 311 * rcu_node bitmasks before it actually makes it online. The same thing
312 * can happen while a CPU is in the process of coming online. Because this 312 * can happen while a CPU is in the process of coming online. Because this
313 * race is quite rare, we check for it after detecting that the grace 313 * race is quite rare, we check for it after detecting that the grace
314 * period has been delayed rather than checking each and every CPU 314 * period has been delayed rather than checking each and every CPU
315 * each and every time we start a new grace period. 315 * each and every time we start a new grace period.
316 */ 316 */
317 static int rcu_implicit_offline_qs(struct rcu_data *rdp) 317 static int rcu_implicit_offline_qs(struct rcu_data *rdp)
318 { 318 {
319 /* 319 /*
320 * If the CPU is offline, it is in a quiescent state. We can 320 * If the CPU is offline, it is in a quiescent state. We can
321 * trust its state not to change because interrupts are disabled. 321 * trust its state not to change because interrupts are disabled.
322 */ 322 */
323 if (cpu_is_offline(rdp->cpu)) { 323 if (cpu_is_offline(rdp->cpu)) {
324 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); 324 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl");
325 rdp->offline_fqs++; 325 rdp->offline_fqs++;
326 return 1; 326 return 1;
327 } 327 }
328 328
329 /* 329 /*
330 * The CPU is online, so send it a reschedule IPI. This forces 330 * The CPU is online, so send it a reschedule IPI. This forces
331 * it through the scheduler, and (inefficiently) also handles cases 331 * it through the scheduler, and (inefficiently) also handles cases
332 * where idle loops fail to inform RCU about the CPU being idle. 332 * where idle loops fail to inform RCU about the CPU being idle.
333 */ 333 */
334 if (rdp->cpu != smp_processor_id()) 334 if (rdp->cpu != smp_processor_id())
335 smp_send_reschedule(rdp->cpu); 335 smp_send_reschedule(rdp->cpu);
336 else 336 else
337 set_need_resched(); 337 set_need_resched();
338 rdp->resched_ipi++; 338 rdp->resched_ipi++;
339 return 0; 339 return 0;
340 } 340 }
341 341
342 #endif /* #ifdef CONFIG_SMP */ 342 #endif /* #ifdef CONFIG_SMP */
343 343
344 /* 344 /*
345 * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle 345 * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle
346 * 346 *
347 * If the new value of the ->dynticks_nesting counter now is zero, 347 * If the new value of the ->dynticks_nesting counter now is zero,
348 * we really have entered idle, and must do the appropriate accounting. 348 * we really have entered idle, and must do the appropriate accounting.
349 * The caller must have disabled interrupts. 349 * The caller must have disabled interrupts.
350 */ 350 */
351 static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) 351 static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
352 { 352 {
353 if (rdtp->dynticks_nesting) { 353 if (rdtp->dynticks_nesting) {
354 trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting); 354 trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting);
355 return; 355 return;
356 } 356 }
357 trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting); 357 trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting);
358 if (!is_idle_task(current)) { 358 if (!is_idle_task(current)) {
359 struct task_struct *idle = idle_task(smp_processor_id()); 359 struct task_struct *idle = idle_task(smp_processor_id());
360 360
361 trace_rcu_dyntick("Error on entry: not idle task", 361 trace_rcu_dyntick("Error on entry: not idle task",
362 oldval, rdtp->dynticks_nesting); 362 oldval, rdtp->dynticks_nesting);
363 ftrace_dump(DUMP_ALL); 363 ftrace_dump(DUMP_ALL);
364 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", 364 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
365 current->pid, current->comm, 365 current->pid, current->comm,
366 idle->pid, idle->comm); /* must be idle task! */ 366 idle->pid, idle->comm); /* must be idle task! */
367 } 367 }
368 rcu_prepare_for_idle(smp_processor_id()); 368 rcu_prepare_for_idle(smp_processor_id());
369 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ 369 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
370 smp_mb__before_atomic_inc(); /* See above. */ 370 smp_mb__before_atomic_inc(); /* See above. */
371 atomic_inc(&rdtp->dynticks); 371 atomic_inc(&rdtp->dynticks);
372 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ 372 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
373 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 373 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
374 } 374 }
375 375
376 /** 376 /**
377 * rcu_idle_enter - inform RCU that current CPU is entering idle 377 * rcu_idle_enter - inform RCU that current CPU is entering idle
378 * 378 *
379 * Enter idle mode, in other words, -leave- the mode in which RCU 379 * Enter idle mode, in other words, -leave- the mode in which RCU
380 * read-side critical sections can occur. (Though RCU read-side 380 * read-side critical sections can occur. (Though RCU read-side
381 * critical sections can occur in irq handlers in idle, a possibility 381 * critical sections can occur in irq handlers in idle, a possibility
382 * handled by irq_enter() and irq_exit().) 382 * handled by irq_enter() and irq_exit().)
383 * 383 *
384 * We crowbar the ->dynticks_nesting field to zero to allow for 384 * We crowbar the ->dynticks_nesting field to zero to allow for
385 * the possibility of usermode upcalls having messed up our count 385 * the possibility of usermode upcalls having messed up our count
386 * of interrupt nesting level during the prior busy period. 386 * of interrupt nesting level during the prior busy period.
387 */ 387 */
388 void rcu_idle_enter(void) 388 void rcu_idle_enter(void)
389 { 389 {
390 unsigned long flags; 390 unsigned long flags;
391 long long oldval; 391 long long oldval;
392 struct rcu_dynticks *rdtp; 392 struct rcu_dynticks *rdtp;
393 393
394 local_irq_save(flags); 394 local_irq_save(flags);
395 rdtp = &__get_cpu_var(rcu_dynticks); 395 rdtp = &__get_cpu_var(rcu_dynticks);
396 oldval = rdtp->dynticks_nesting; 396 oldval = rdtp->dynticks_nesting;
397 rdtp->dynticks_nesting = 0; 397 rdtp->dynticks_nesting = 0;
398 rcu_idle_enter_common(rdtp, oldval); 398 rcu_idle_enter_common(rdtp, oldval);
399 local_irq_restore(flags); 399 local_irq_restore(flags);
400 } 400 }
401 401
402 /** 402 /**
403 * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle 403 * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
404 * 404 *
405 * Exit from an interrupt handler, which might possibly result in entering 405 * Exit from an interrupt handler, which might possibly result in entering
406 * idle mode, in other words, leaving the mode in which read-side critical 406 * idle mode, in other words, leaving the mode in which read-side critical
407 * sections can occur. 407 * sections can occur.
408 * 408 *
409 * This code assumes that the idle loop never does anything that might 409 * This code assumes that the idle loop never does anything that might
410 * result in unbalanced calls to irq_enter() and irq_exit(). If your 410 * result in unbalanced calls to irq_enter() and irq_exit(). If your
411 * architecture violates this assumption, RCU will give you what you 411 * architecture violates this assumption, RCU will give you what you
412 * deserve, good and hard. But very infrequently and irreproducibly. 412 * deserve, good and hard. But very infrequently and irreproducibly.
413 * 413 *
414 * Use things like work queues to work around this limitation. 414 * Use things like work queues to work around this limitation.
415 * 415 *
416 * You have been warned. 416 * You have been warned.
417 */ 417 */
418 void rcu_irq_exit(void) 418 void rcu_irq_exit(void)
419 { 419 {
420 unsigned long flags; 420 unsigned long flags;
421 long long oldval; 421 long long oldval;
422 struct rcu_dynticks *rdtp; 422 struct rcu_dynticks *rdtp;
423 423
424 local_irq_save(flags); 424 local_irq_save(flags);
425 rdtp = &__get_cpu_var(rcu_dynticks); 425 rdtp = &__get_cpu_var(rcu_dynticks);
426 oldval = rdtp->dynticks_nesting; 426 oldval = rdtp->dynticks_nesting;
427 rdtp->dynticks_nesting--; 427 rdtp->dynticks_nesting--;
428 WARN_ON_ONCE(rdtp->dynticks_nesting < 0); 428 WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
429 rcu_idle_enter_common(rdtp, oldval); 429 rcu_idle_enter_common(rdtp, oldval);
430 local_irq_restore(flags); 430 local_irq_restore(flags);
431 } 431 }
432 432
433 /* 433 /*
434 * rcu_idle_exit_common - inform RCU that current CPU is moving away from idle 434 * rcu_idle_exit_common - inform RCU that current CPU is moving away from idle
435 * 435 *
436 * If the new value of the ->dynticks_nesting counter was previously zero, 436 * If the new value of the ->dynticks_nesting counter was previously zero,
437 * we really have exited idle, and must do the appropriate accounting. 437 * we really have exited idle, and must do the appropriate accounting.
438 * The caller must have disabled interrupts. 438 * The caller must have disabled interrupts.
439 */ 439 */
440 static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) 440 static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
441 { 441 {
442 if (oldval) { 442 if (oldval) {
443 trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting); 443 trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting);
444 return; 444 return;
445 } 445 }
446 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ 446 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */
447 atomic_inc(&rdtp->dynticks); 447 atomic_inc(&rdtp->dynticks);
448 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 448 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
449 smp_mb__after_atomic_inc(); /* See above. */ 449 smp_mb__after_atomic_inc(); /* See above. */
450 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 450 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
451 rcu_cleanup_after_idle(smp_processor_id());
451 trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); 452 trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting);
452 if (!is_idle_task(current)) { 453 if (!is_idle_task(current)) {
453 struct task_struct *idle = idle_task(smp_processor_id()); 454 struct task_struct *idle = idle_task(smp_processor_id());
454 455
455 trace_rcu_dyntick("Error on exit: not idle task", 456 trace_rcu_dyntick("Error on exit: not idle task",
456 oldval, rdtp->dynticks_nesting); 457 oldval, rdtp->dynticks_nesting);
457 ftrace_dump(DUMP_ALL); 458 ftrace_dump(DUMP_ALL);
458 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", 459 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
459 current->pid, current->comm, 460 current->pid, current->comm,
460 idle->pid, idle->comm); /* must be idle task! */ 461 idle->pid, idle->comm); /* must be idle task! */
461 } 462 }
462 } 463 }
463 464
464 /** 465 /**
465 * rcu_idle_exit - inform RCU that current CPU is leaving idle 466 * rcu_idle_exit - inform RCU that current CPU is leaving idle
466 * 467 *
467 * Exit idle mode, in other words, -enter- the mode in which RCU 468 * Exit idle mode, in other words, -enter- the mode in which RCU
468 * read-side critical sections can occur. 469 * read-side critical sections can occur.
469 * 470 *
470 * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NESTING to 471 * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NESTING to
471 * allow for the possibility of usermode upcalls messing up our count 472 * allow for the possibility of usermode upcalls messing up our count
472 * of interrupt nesting level during the busy period that is just 473 * of interrupt nesting level during the busy period that is just
473 * now starting. 474 * now starting.
474 */ 475 */
475 void rcu_idle_exit(void) 476 void rcu_idle_exit(void)
476 { 477 {
477 unsigned long flags; 478 unsigned long flags;
478 struct rcu_dynticks *rdtp; 479 struct rcu_dynticks *rdtp;
479 long long oldval; 480 long long oldval;
480 481
481 local_irq_save(flags); 482 local_irq_save(flags);
482 rdtp = &__get_cpu_var(rcu_dynticks); 483 rdtp = &__get_cpu_var(rcu_dynticks);
483 oldval = rdtp->dynticks_nesting; 484 oldval = rdtp->dynticks_nesting;
484 WARN_ON_ONCE(oldval != 0); 485 WARN_ON_ONCE(oldval != 0);
485 rdtp->dynticks_nesting = DYNTICK_TASK_NESTING; 486 rdtp->dynticks_nesting = DYNTICK_TASK_NESTING;
486 rcu_idle_exit_common(rdtp, oldval); 487 rcu_idle_exit_common(rdtp, oldval);
487 local_irq_restore(flags); 488 local_irq_restore(flags);
488 } 489 }
489 490
490 /** 491 /**
491 * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle 492 * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
492 * 493 *
493 * Enter an interrupt handler, which might possibly result in exiting 494 * Enter an interrupt handler, which might possibly result in exiting
494 * idle mode, in other words, entering the mode in which read-side critical 495 * idle mode, in other words, entering the mode in which read-side critical
495 * sections can occur. 496 * sections can occur.
496 * 497 *
497 * Note that the Linux kernel is fully capable of entering an interrupt 498 * Note that the Linux kernel is fully capable of entering an interrupt
498 * handler that it never exits, for example when doing upcalls to 499 * handler that it never exits, for example when doing upcalls to
499 * user mode! This code assumes that the idle loop never does upcalls to 500 * user mode! This code assumes that the idle loop never does upcalls to
500 * user mode. If your architecture does do upcalls from the idle loop (or 501 * user mode. If your architecture does do upcalls from the idle loop (or
501 * does anything else that results in unbalanced calls to the irq_enter() 502 * does anything else that results in unbalanced calls to the irq_enter()
502 * and irq_exit() functions), RCU will give you what you deserve, good 503 * and irq_exit() functions), RCU will give you what you deserve, good
503 * and hard. But very infrequently and irreproducibly. 504 * and hard. But very infrequently and irreproducibly.
504 * 505 *
505 * Use things like work queues to work around this limitation. 506 * Use things like work queues to work around this limitation.
506 * 507 *
507 * You have been warned. 508 * You have been warned.
508 */ 509 */
509 void rcu_irq_enter(void) 510 void rcu_irq_enter(void)
510 { 511 {
511 unsigned long flags; 512 unsigned long flags;
512 struct rcu_dynticks *rdtp; 513 struct rcu_dynticks *rdtp;
513 long long oldval; 514 long long oldval;
514 515
515 local_irq_save(flags); 516 local_irq_save(flags);
516 rdtp = &__get_cpu_var(rcu_dynticks); 517 rdtp = &__get_cpu_var(rcu_dynticks);
517 oldval = rdtp->dynticks_nesting; 518 oldval = rdtp->dynticks_nesting;
518 rdtp->dynticks_nesting++; 519 rdtp->dynticks_nesting++;
519 WARN_ON_ONCE(rdtp->dynticks_nesting == 0); 520 WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
520 rcu_idle_exit_common(rdtp, oldval); 521 rcu_idle_exit_common(rdtp, oldval);
521 local_irq_restore(flags); 522 local_irq_restore(flags);
522 } 523 }
523 524
524 /** 525 /**
525 * rcu_nmi_enter - inform RCU of entry to NMI context 526 * rcu_nmi_enter - inform RCU of entry to NMI context
526 * 527 *
527 * If the CPU was idle with dynamic ticks active, and there is no 528 * If the CPU was idle with dynamic ticks active, and there is no
528 * irq handler running, this updates rdtp->dynticks_nmi to let the 529 * irq handler running, this updates rdtp->dynticks_nmi to let the
529 * RCU grace-period handling know that the CPU is active. 530 * RCU grace-period handling know that the CPU is active.
530 */ 531 */
531 void rcu_nmi_enter(void) 532 void rcu_nmi_enter(void)
532 { 533 {
533 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 534 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
534 535
535 if (rdtp->dynticks_nmi_nesting == 0 && 536 if (rdtp->dynticks_nmi_nesting == 0 &&
536 (atomic_read(&rdtp->dynticks) & 0x1)) 537 (atomic_read(&rdtp->dynticks) & 0x1))
537 return; 538 return;
538 rdtp->dynticks_nmi_nesting++; 539 rdtp->dynticks_nmi_nesting++;
539 smp_mb__before_atomic_inc(); /* Force delay from prior write. */ 540 smp_mb__before_atomic_inc(); /* Force delay from prior write. */
540 atomic_inc(&rdtp->dynticks); 541 atomic_inc(&rdtp->dynticks);
541 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 542 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
542 smp_mb__after_atomic_inc(); /* See above. */ 543 smp_mb__after_atomic_inc(); /* See above. */
543 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 544 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
544 } 545 }
545 546
546 /** 547 /**
547 * rcu_nmi_exit - inform RCU of exit from NMI context 548 * rcu_nmi_exit - inform RCU of exit from NMI context
548 * 549 *
549 * If the CPU was idle with dynamic ticks active, and there is no 550 * If the CPU was idle with dynamic ticks active, and there is no
550 * irq handler running, this updates rdtp->dynticks_nmi to let the 551 * irq handler running, this updates rdtp->dynticks_nmi to let the
551 * RCU grace-period handling know that the CPU is no longer active. 552 * RCU grace-period handling know that the CPU is no longer active.
552 */ 553 */
553 void rcu_nmi_exit(void) 554 void rcu_nmi_exit(void)
554 { 555 {
555 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); 556 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
556 557
557 if (rdtp->dynticks_nmi_nesting == 0 || 558 if (rdtp->dynticks_nmi_nesting == 0 ||
558 --rdtp->dynticks_nmi_nesting != 0) 559 --rdtp->dynticks_nmi_nesting != 0)
559 return; 560 return;
560 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ 561 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
561 smp_mb__before_atomic_inc(); /* See above. */ 562 smp_mb__before_atomic_inc(); /* See above. */
562 atomic_inc(&rdtp->dynticks); 563 atomic_inc(&rdtp->dynticks);
563 smp_mb__after_atomic_inc(); /* Force delay to next write. */ 564 smp_mb__after_atomic_inc(); /* Force delay to next write. */
564 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 565 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
565 } 566 }
566 567
567 #ifdef CONFIG_PROVE_RCU 568 #ifdef CONFIG_PROVE_RCU
568 569
569 /** 570 /**
570 * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle 571 * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle
571 * 572 *
572 * If the current CPU is in its idle loop and is neither in an interrupt 573 * If the current CPU is in its idle loop and is neither in an interrupt
573 * or NMI handler, return true. 574 * or NMI handler, return true.
574 */ 575 */
575 int rcu_is_cpu_idle(void) 576 int rcu_is_cpu_idle(void)
576 { 577 {
577 int ret; 578 int ret;
578 579
579 preempt_disable(); 580 preempt_disable();
580 ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0; 581 ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0;
581 preempt_enable(); 582 preempt_enable();
582 return ret; 583 return ret;
583 } 584 }
584 EXPORT_SYMBOL(rcu_is_cpu_idle); 585 EXPORT_SYMBOL(rcu_is_cpu_idle);
585 586
586 #endif /* #ifdef CONFIG_PROVE_RCU */ 587 #endif /* #ifdef CONFIG_PROVE_RCU */
587 588
588 /** 589 /**
589 * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle 590 * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
590 * 591 *
591 * If the current CPU is idle or running at a first-level (not nested) 592 * If the current CPU is idle or running at a first-level (not nested)
592 * interrupt from idle, return true. The caller must have at least 593 * interrupt from idle, return true. The caller must have at least
593 * disabled preemption. 594 * disabled preemption.
594 */ 595 */
595 int rcu_is_cpu_rrupt_from_idle(void) 596 int rcu_is_cpu_rrupt_from_idle(void)
596 { 597 {
597 return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1; 598 return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
598 } 599 }
599 600
600 #ifdef CONFIG_SMP 601 #ifdef CONFIG_SMP
601 602
602 /* 603 /*
603 * Snapshot the specified CPU's dynticks counter so that we can later 604 * Snapshot the specified CPU's dynticks counter so that we can later
604 * credit them with an implicit quiescent state. Return 1 if this CPU 605 * credit them with an implicit quiescent state. Return 1 if this CPU
605 * is in dynticks idle mode, which is an extended quiescent state. 606 * is in dynticks idle mode, which is an extended quiescent state.
606 */ 607 */
607 static int dyntick_save_progress_counter(struct rcu_data *rdp) 608 static int dyntick_save_progress_counter(struct rcu_data *rdp)
608 { 609 {
609 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); 610 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
610 return (rdp->dynticks_snap & 0x1) == 0; 611 return (rdp->dynticks_snap & 0x1) == 0;
611 } 612 }
612 613
613 /* 614 /*
614 * Return true if the specified CPU has passed through a quiescent 615 * Return true if the specified CPU has passed through a quiescent
615 * state by virtue of being in or having passed through an dynticks 616 * state by virtue of being in or having passed through an dynticks
616 * idle state since the last call to dyntick_save_progress_counter() 617 * idle state since the last call to dyntick_save_progress_counter()
617 * for this same CPU. 618 * for this same CPU.
618 */ 619 */
619 static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) 620 static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
620 { 621 {
621 unsigned int curr; 622 unsigned int curr;
622 unsigned int snap; 623 unsigned int snap;
623 624
624 curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks); 625 curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks);
625 snap = (unsigned int)rdp->dynticks_snap; 626 snap = (unsigned int)rdp->dynticks_snap;
626 627
627 /* 628 /*
628 * If the CPU passed through or entered a dynticks idle phase with 629 * If the CPU passed through or entered a dynticks idle phase with
629 * no active irq/NMI handlers, then we can safely pretend that the CPU 630 * no active irq/NMI handlers, then we can safely pretend that the CPU
630 * already acknowledged the request to pass through a quiescent 631 * already acknowledged the request to pass through a quiescent
631 * state. Either way, that CPU cannot possibly be in an RCU 632 * state. Either way, that CPU cannot possibly be in an RCU
632 * read-side critical section that started before the beginning 633 * read-side critical section that started before the beginning
633 * of the current RCU grace period. 634 * of the current RCU grace period.
634 */ 635 */
635 if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) { 636 if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) {
636 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti"); 637 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti");
637 rdp->dynticks_fqs++; 638 rdp->dynticks_fqs++;
638 return 1; 639 return 1;
639 } 640 }
640 641
641 /* Go check for the CPU being offline. */ 642 /* Go check for the CPU being offline. */
642 return rcu_implicit_offline_qs(rdp); 643 return rcu_implicit_offline_qs(rdp);
643 } 644 }
644 645
645 #endif /* #ifdef CONFIG_SMP */ 646 #endif /* #ifdef CONFIG_SMP */
646 647
647 int rcu_cpu_stall_suppress __read_mostly; 648 int rcu_cpu_stall_suppress __read_mostly;
648 649
649 static void record_gp_stall_check_time(struct rcu_state *rsp) 650 static void record_gp_stall_check_time(struct rcu_state *rsp)
650 { 651 {
651 rsp->gp_start = jiffies; 652 rsp->gp_start = jiffies;
652 rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK; 653 rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
653 } 654 }
654 655
655 static void print_other_cpu_stall(struct rcu_state *rsp) 656 static void print_other_cpu_stall(struct rcu_state *rsp)
656 { 657 {
657 int cpu; 658 int cpu;
658 long delta; 659 long delta;
659 unsigned long flags; 660 unsigned long flags;
660 int ndetected; 661 int ndetected;
661 struct rcu_node *rnp = rcu_get_root(rsp); 662 struct rcu_node *rnp = rcu_get_root(rsp);
662 663
663 /* Only let one CPU complain about others per time interval. */ 664 /* Only let one CPU complain about others per time interval. */
664 665
665 raw_spin_lock_irqsave(&rnp->lock, flags); 666 raw_spin_lock_irqsave(&rnp->lock, flags);
666 delta = jiffies - rsp->jiffies_stall; 667 delta = jiffies - rsp->jiffies_stall;
667 if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) { 668 if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
668 raw_spin_unlock_irqrestore(&rnp->lock, flags); 669 raw_spin_unlock_irqrestore(&rnp->lock, flags);
669 return; 670 return;
670 } 671 }
671 rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK; 672 rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
672 673
673 /* 674 /*
674 * Now rat on any tasks that got kicked up to the root rcu_node 675 * Now rat on any tasks that got kicked up to the root rcu_node
675 * due to CPU offlining. 676 * due to CPU offlining.
676 */ 677 */
677 ndetected = rcu_print_task_stall(rnp); 678 ndetected = rcu_print_task_stall(rnp);
678 raw_spin_unlock_irqrestore(&rnp->lock, flags); 679 raw_spin_unlock_irqrestore(&rnp->lock, flags);
679 680
680 /* 681 /*
681 * OK, time to rat on our buddy... 682 * OK, time to rat on our buddy...
682 * See Documentation/RCU/stallwarn.txt for info on how to debug 683 * See Documentation/RCU/stallwarn.txt for info on how to debug
683 * RCU CPU stall warnings. 684 * RCU CPU stall warnings.
684 */ 685 */
685 printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", 686 printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {",
686 rsp->name); 687 rsp->name);
687 rcu_for_each_leaf_node(rsp, rnp) { 688 rcu_for_each_leaf_node(rsp, rnp) {
688 raw_spin_lock_irqsave(&rnp->lock, flags); 689 raw_spin_lock_irqsave(&rnp->lock, flags);
689 ndetected += rcu_print_task_stall(rnp); 690 ndetected += rcu_print_task_stall(rnp);
690 raw_spin_unlock_irqrestore(&rnp->lock, flags); 691 raw_spin_unlock_irqrestore(&rnp->lock, flags);
691 if (rnp->qsmask == 0) 692 if (rnp->qsmask == 0)
692 continue; 693 continue;
693 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) 694 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
694 if (rnp->qsmask & (1UL << cpu)) { 695 if (rnp->qsmask & (1UL << cpu)) {
695 printk(" %d", rnp->grplo + cpu); 696 printk(" %d", rnp->grplo + cpu);
696 ndetected++; 697 ndetected++;
697 } 698 }
698 } 699 }
699 printk("} (detected by %d, t=%ld jiffies)\n", 700 printk("} (detected by %d, t=%ld jiffies)\n",
700 smp_processor_id(), (long)(jiffies - rsp->gp_start)); 701 smp_processor_id(), (long)(jiffies - rsp->gp_start));
701 if (ndetected == 0) 702 if (ndetected == 0)
702 printk(KERN_ERR "INFO: Stall ended before state dump start\n"); 703 printk(KERN_ERR "INFO: Stall ended before state dump start\n");
703 else if (!trigger_all_cpu_backtrace()) 704 else if (!trigger_all_cpu_backtrace())
704 dump_stack(); 705 dump_stack();
705 706
706 /* If so configured, complain about tasks blocking the grace period. */ 707 /* If so configured, complain about tasks blocking the grace period. */
707 708
708 rcu_print_detail_task_stall(rsp); 709 rcu_print_detail_task_stall(rsp);
709 710
710 force_quiescent_state(rsp, 0); /* Kick them all. */ 711 force_quiescent_state(rsp, 0); /* Kick them all. */
711 } 712 }
712 713
713 static void print_cpu_stall(struct rcu_state *rsp) 714 static void print_cpu_stall(struct rcu_state *rsp)
714 { 715 {
715 unsigned long flags; 716 unsigned long flags;
716 struct rcu_node *rnp = rcu_get_root(rsp); 717 struct rcu_node *rnp = rcu_get_root(rsp);
717 718
718 /* 719 /*
719 * OK, time to rat on ourselves... 720 * OK, time to rat on ourselves...
720 * See Documentation/RCU/stallwarn.txt for info on how to debug 721 * See Documentation/RCU/stallwarn.txt for info on how to debug
721 * RCU CPU stall warnings. 722 * RCU CPU stall warnings.
722 */ 723 */
723 printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", 724 printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n",
724 rsp->name, smp_processor_id(), jiffies - rsp->gp_start); 725 rsp->name, smp_processor_id(), jiffies - rsp->gp_start);
725 if (!trigger_all_cpu_backtrace()) 726 if (!trigger_all_cpu_backtrace())
726 dump_stack(); 727 dump_stack();
727 728
728 raw_spin_lock_irqsave(&rnp->lock, flags); 729 raw_spin_lock_irqsave(&rnp->lock, flags);
729 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) 730 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
730 rsp->jiffies_stall = 731 rsp->jiffies_stall =
731 jiffies + RCU_SECONDS_TILL_STALL_RECHECK; 732 jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
732 raw_spin_unlock_irqrestore(&rnp->lock, flags); 733 raw_spin_unlock_irqrestore(&rnp->lock, flags);
733 734
734 set_need_resched(); /* kick ourselves to get things going. */ 735 set_need_resched(); /* kick ourselves to get things going. */
735 } 736 }
736 737
737 static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) 738 static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
738 { 739 {
739 unsigned long j; 740 unsigned long j;
740 unsigned long js; 741 unsigned long js;
741 struct rcu_node *rnp; 742 struct rcu_node *rnp;
742 743
743 if (rcu_cpu_stall_suppress) 744 if (rcu_cpu_stall_suppress)
744 return; 745 return;
745 j = ACCESS_ONCE(jiffies); 746 j = ACCESS_ONCE(jiffies);
746 js = ACCESS_ONCE(rsp->jiffies_stall); 747 js = ACCESS_ONCE(rsp->jiffies_stall);
747 rnp = rdp->mynode; 748 rnp = rdp->mynode;
748 if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) { 749 if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) {
749 750
750 /* We haven't checked in, so go dump stack. */ 751 /* We haven't checked in, so go dump stack. */
751 print_cpu_stall(rsp); 752 print_cpu_stall(rsp);
752 753
753 } else if (rcu_gp_in_progress(rsp) && 754 } else if (rcu_gp_in_progress(rsp) &&
754 ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) { 755 ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) {
755 756
756 /* They had a few time units to dump stack, so complain. */ 757 /* They had a few time units to dump stack, so complain. */
757 print_other_cpu_stall(rsp); 758 print_other_cpu_stall(rsp);
758 } 759 }
759 } 760 }
760 761
761 static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) 762 static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
762 { 763 {
763 rcu_cpu_stall_suppress = 1; 764 rcu_cpu_stall_suppress = 1;
764 return NOTIFY_DONE; 765 return NOTIFY_DONE;
765 } 766 }
766 767
767 /** 768 /**
768 * rcu_cpu_stall_reset - prevent further stall warnings in current grace period 769 * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
769 * 770 *
770 * Set the stall-warning timeout way off into the future, thus preventing 771 * Set the stall-warning timeout way off into the future, thus preventing
771 * any RCU CPU stall-warning messages from appearing in the current set of 772 * any RCU CPU stall-warning messages from appearing in the current set of
772 * RCU grace periods. 773 * RCU grace periods.
773 * 774 *
774 * The caller must disable hard irqs. 775 * The caller must disable hard irqs.
775 */ 776 */
776 void rcu_cpu_stall_reset(void) 777 void rcu_cpu_stall_reset(void)
777 { 778 {
778 rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; 779 rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2;
779 rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; 780 rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2;
780 rcu_preempt_stall_reset(); 781 rcu_preempt_stall_reset();
781 } 782 }
782 783
783 static struct notifier_block rcu_panic_block = { 784 static struct notifier_block rcu_panic_block = {
784 .notifier_call = rcu_panic, 785 .notifier_call = rcu_panic,
785 }; 786 };
786 787
787 static void __init check_cpu_stall_init(void) 788 static void __init check_cpu_stall_init(void)
788 { 789 {
789 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); 790 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
790 } 791 }
791 792
792 /* 793 /*
793 * Update CPU-local rcu_data state to record the newly noticed grace period. 794 * Update CPU-local rcu_data state to record the newly noticed grace period.
794 * This is used both when we started the grace period and when we notice 795 * This is used both when we started the grace period and when we notice
795 * that someone else started the grace period. The caller must hold the 796 * that someone else started the grace period. The caller must hold the
796 * ->lock of the leaf rcu_node structure corresponding to the current CPU, 797 * ->lock of the leaf rcu_node structure corresponding to the current CPU,
797 * and must have irqs disabled. 798 * and must have irqs disabled.
798 */ 799 */
799 static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) 800 static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
800 { 801 {
801 if (rdp->gpnum != rnp->gpnum) { 802 if (rdp->gpnum != rnp->gpnum) {
802 /* 803 /*
803 * If the current grace period is waiting for this CPU, 804 * If the current grace period is waiting for this CPU,
804 * set up to detect a quiescent state, otherwise don't 805 * set up to detect a quiescent state, otherwise don't
805 * go looking for one. 806 * go looking for one.
806 */ 807 */
807 rdp->gpnum = rnp->gpnum; 808 rdp->gpnum = rnp->gpnum;
808 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart"); 809 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart");
809 if (rnp->qsmask & rdp->grpmask) { 810 if (rnp->qsmask & rdp->grpmask) {
810 rdp->qs_pending = 1; 811 rdp->qs_pending = 1;
811 rdp->passed_quiesce = 0; 812 rdp->passed_quiesce = 0;
812 } else 813 } else
813 rdp->qs_pending = 0; 814 rdp->qs_pending = 0;
814 } 815 }
815 } 816 }
816 817
817 static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp) 818 static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp)
818 { 819 {
819 unsigned long flags; 820 unsigned long flags;
820 struct rcu_node *rnp; 821 struct rcu_node *rnp;
821 822
822 local_irq_save(flags); 823 local_irq_save(flags);
823 rnp = rdp->mynode; 824 rnp = rdp->mynode;
824 if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */ 825 if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */
825 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ 826 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
826 local_irq_restore(flags); 827 local_irq_restore(flags);
827 return; 828 return;
828 } 829 }
829 __note_new_gpnum(rsp, rnp, rdp); 830 __note_new_gpnum(rsp, rnp, rdp);
830 raw_spin_unlock_irqrestore(&rnp->lock, flags); 831 raw_spin_unlock_irqrestore(&rnp->lock, flags);
831 } 832 }
832 833
833 /* 834 /*
834 * Did someone else start a new RCU grace period start since we last 835 * Did someone else start a new RCU grace period start since we last
835 * checked? Update local state appropriately if so. Must be called 836 * checked? Update local state appropriately if so. Must be called
836 * on the CPU corresponding to rdp. 837 * on the CPU corresponding to rdp.
837 */ 838 */
838 static int 839 static int
839 check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp) 840 check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp)
840 { 841 {
841 unsigned long flags; 842 unsigned long flags;
842 int ret = 0; 843 int ret = 0;
843 844
844 local_irq_save(flags); 845 local_irq_save(flags);
845 if (rdp->gpnum != rsp->gpnum) { 846 if (rdp->gpnum != rsp->gpnum) {
846 note_new_gpnum(rsp, rdp); 847 note_new_gpnum(rsp, rdp);
847 ret = 1; 848 ret = 1;
848 } 849 }
849 local_irq_restore(flags); 850 local_irq_restore(flags);
850 return ret; 851 return ret;
851 } 852 }
852 853
853 /* 854 /*
854 * Advance this CPU's callbacks, but only if the current grace period 855 * Advance this CPU's callbacks, but only if the current grace period
855 * has ended. This may be called only from the CPU to whom the rdp 856 * has ended. This may be called only from the CPU to whom the rdp
856 * belongs. In addition, the corresponding leaf rcu_node structure's 857 * belongs. In addition, the corresponding leaf rcu_node structure's
857 * ->lock must be held by the caller, with irqs disabled. 858 * ->lock must be held by the caller, with irqs disabled.
858 */ 859 */
859 static void 860 static void
860 __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) 861 __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
861 { 862 {
862 /* Did another grace period end? */ 863 /* Did another grace period end? */
863 if (rdp->completed != rnp->completed) { 864 if (rdp->completed != rnp->completed) {
864 865
865 /* Advance callbacks. No harm if list empty. */ 866 /* Advance callbacks. No harm if list empty. */
866 rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL]; 867 rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL];
867 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL]; 868 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL];
868 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 869 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
869 870
870 /* Remember that we saw this grace-period completion. */ 871 /* Remember that we saw this grace-period completion. */
871 rdp->completed = rnp->completed; 872 rdp->completed = rnp->completed;
872 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend"); 873 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend");
873 874
874 /* 875 /*
875 * If we were in an extended quiescent state, we may have 876 * If we were in an extended quiescent state, we may have
876 * missed some grace periods that others CPUs handled on 877 * missed some grace periods that others CPUs handled on
877 * our behalf. Catch up with this state to avoid noting 878 * our behalf. Catch up with this state to avoid noting
878 * spurious new grace periods. If another grace period 879 * spurious new grace periods. If another grace period
879 * has started, then rnp->gpnum will have advanced, so 880 * has started, then rnp->gpnum will have advanced, so
880 * we will detect this later on. 881 * we will detect this later on.
881 */ 882 */
882 if (ULONG_CMP_LT(rdp->gpnum, rdp->completed)) 883 if (ULONG_CMP_LT(rdp->gpnum, rdp->completed))
883 rdp->gpnum = rdp->completed; 884 rdp->gpnum = rdp->completed;
884 885
885 /* 886 /*
886 * If RCU does not need a quiescent state from this CPU, 887 * If RCU does not need a quiescent state from this CPU,
887 * then make sure that this CPU doesn't go looking for one. 888 * then make sure that this CPU doesn't go looking for one.
888 */ 889 */
889 if ((rnp->qsmask & rdp->grpmask) == 0) 890 if ((rnp->qsmask & rdp->grpmask) == 0)
890 rdp->qs_pending = 0; 891 rdp->qs_pending = 0;
891 } 892 }
892 } 893 }
893 894
894 /* 895 /*
895 * Advance this CPU's callbacks, but only if the current grace period 896 * Advance this CPU's callbacks, but only if the current grace period
896 * has ended. This may be called only from the CPU to whom the rdp 897 * has ended. This may be called only from the CPU to whom the rdp
897 * belongs. 898 * belongs.
898 */ 899 */
899 static void 900 static void
900 rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp) 901 rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
901 { 902 {
902 unsigned long flags; 903 unsigned long flags;
903 struct rcu_node *rnp; 904 struct rcu_node *rnp;
904 905
905 local_irq_save(flags); 906 local_irq_save(flags);
906 rnp = rdp->mynode; 907 rnp = rdp->mynode;
907 if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */ 908 if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */
908 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ 909 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
909 local_irq_restore(flags); 910 local_irq_restore(flags);
910 return; 911 return;
911 } 912 }
912 __rcu_process_gp_end(rsp, rnp, rdp); 913 __rcu_process_gp_end(rsp, rnp, rdp);
913 raw_spin_unlock_irqrestore(&rnp->lock, flags); 914 raw_spin_unlock_irqrestore(&rnp->lock, flags);
914 } 915 }
915 916
916 /* 917 /*
917 * Do per-CPU grace-period initialization for running CPU. The caller 918 * Do per-CPU grace-period initialization for running CPU. The caller
918 * must hold the lock of the leaf rcu_node structure corresponding to 919 * must hold the lock of the leaf rcu_node structure corresponding to
919 * this CPU. 920 * this CPU.
920 */ 921 */
921 static void 922 static void
922 rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) 923 rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
923 { 924 {
924 /* Prior grace period ended, so advance callbacks for current CPU. */ 925 /* Prior grace period ended, so advance callbacks for current CPU. */
925 __rcu_process_gp_end(rsp, rnp, rdp); 926 __rcu_process_gp_end(rsp, rnp, rdp);
926 927
927 /* 928 /*
928 * Because this CPU just now started the new grace period, we know 929 * Because this CPU just now started the new grace period, we know
929 * that all of its callbacks will be covered by this upcoming grace 930 * that all of its callbacks will be covered by this upcoming grace
930 * period, even the ones that were registered arbitrarily recently. 931 * period, even the ones that were registered arbitrarily recently.
931 * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL. 932 * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL.
932 * 933 *
933 * Other CPUs cannot be sure exactly when the grace period started. 934 * Other CPUs cannot be sure exactly when the grace period started.
934 * Therefore, their recently registered callbacks must pass through 935 * Therefore, their recently registered callbacks must pass through
935 * an additional RCU_NEXT_READY stage, so that they will be handled 936 * an additional RCU_NEXT_READY stage, so that they will be handled
936 * by the next RCU grace period. 937 * by the next RCU grace period.
937 */ 938 */
938 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 939 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
939 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 940 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
940 941
941 /* Set state so that this CPU will detect the next quiescent state. */ 942 /* Set state so that this CPU will detect the next quiescent state. */
942 __note_new_gpnum(rsp, rnp, rdp); 943 __note_new_gpnum(rsp, rnp, rdp);
943 } 944 }
944 945
945 /* 946 /*
946 * Start a new RCU grace period if warranted, re-initializing the hierarchy 947 * Start a new RCU grace period if warranted, re-initializing the hierarchy
947 * in preparation for detecting the next grace period. The caller must hold 948 * in preparation for detecting the next grace period. The caller must hold
948 * the root node's ->lock, which is released before return. Hard irqs must 949 * the root node's ->lock, which is released before return. Hard irqs must
949 * be disabled. 950 * be disabled.
950 */ 951 */
951 static void 952 static void
952 rcu_start_gp(struct rcu_state *rsp, unsigned long flags) 953 rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
953 __releases(rcu_get_root(rsp)->lock) 954 __releases(rcu_get_root(rsp)->lock)
954 { 955 {
955 struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 956 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
956 struct rcu_node *rnp = rcu_get_root(rsp); 957 struct rcu_node *rnp = rcu_get_root(rsp);
957 958
958 if (!rcu_scheduler_fully_active || 959 if (!rcu_scheduler_fully_active ||
959 !cpu_needs_another_gp(rsp, rdp)) { 960 !cpu_needs_another_gp(rsp, rdp)) {
960 /* 961 /*
961 * Either the scheduler hasn't yet spawned the first 962 * Either the scheduler hasn't yet spawned the first
962 * non-idle task or this CPU does not need another 963 * non-idle task or this CPU does not need another
963 * grace period. Either way, don't start a new grace 964 * grace period. Either way, don't start a new grace
964 * period. 965 * period.
965 */ 966 */
966 raw_spin_unlock_irqrestore(&rnp->lock, flags); 967 raw_spin_unlock_irqrestore(&rnp->lock, flags);
967 return; 968 return;
968 } 969 }
969 970
970 if (rsp->fqs_active) { 971 if (rsp->fqs_active) {
971 /* 972 /*
972 * This CPU needs a grace period, but force_quiescent_state() 973 * This CPU needs a grace period, but force_quiescent_state()
973 * is running. Tell it to start one on this CPU's behalf. 974 * is running. Tell it to start one on this CPU's behalf.
974 */ 975 */
975 rsp->fqs_need_gp = 1; 976 rsp->fqs_need_gp = 1;
976 raw_spin_unlock_irqrestore(&rnp->lock, flags); 977 raw_spin_unlock_irqrestore(&rnp->lock, flags);
977 return; 978 return;
978 } 979 }
979 980
980 /* Advance to a new grace period and initialize state. */ 981 /* Advance to a new grace period and initialize state. */
981 rsp->gpnum++; 982 rsp->gpnum++;
982 trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); 983 trace_rcu_grace_period(rsp->name, rsp->gpnum, "start");
983 WARN_ON_ONCE(rsp->fqs_state == RCU_GP_INIT); 984 WARN_ON_ONCE(rsp->fqs_state == RCU_GP_INIT);
984 rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */ 985 rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */
985 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 986 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
986 record_gp_stall_check_time(rsp); 987 record_gp_stall_check_time(rsp);
987 988
988 /* Special-case the common single-level case. */ 989 /* Special-case the common single-level case. */
989 if (NUM_RCU_NODES == 1) { 990 if (NUM_RCU_NODES == 1) {
990 rcu_preempt_check_blocked_tasks(rnp); 991 rcu_preempt_check_blocked_tasks(rnp);
991 rnp->qsmask = rnp->qsmaskinit; 992 rnp->qsmask = rnp->qsmaskinit;
992 rnp->gpnum = rsp->gpnum; 993 rnp->gpnum = rsp->gpnum;
993 rnp->completed = rsp->completed; 994 rnp->completed = rsp->completed;
994 rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state OK */ 995 rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state OK */
995 rcu_start_gp_per_cpu(rsp, rnp, rdp); 996 rcu_start_gp_per_cpu(rsp, rnp, rdp);
996 rcu_preempt_boost_start_gp(rnp); 997 rcu_preempt_boost_start_gp(rnp);
997 trace_rcu_grace_period_init(rsp->name, rnp->gpnum, 998 trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
998 rnp->level, rnp->grplo, 999 rnp->level, rnp->grplo,
999 rnp->grphi, rnp->qsmask); 1000 rnp->grphi, rnp->qsmask);
1000 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1001 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1001 return; 1002 return;
1002 } 1003 }
1003 1004
1004 raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */ 1005 raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */
1005 1006
1006 1007
1007 /* Exclude any concurrent CPU-hotplug operations. */ 1008 /* Exclude any concurrent CPU-hotplug operations. */
1008 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ 1009 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
1009 1010
1010 /* 1011 /*
1011 * Set the quiescent-state-needed bits in all the rcu_node 1012 * Set the quiescent-state-needed bits in all the rcu_node
1012 * structures for all currently online CPUs in breadth-first 1013 * structures for all currently online CPUs in breadth-first
1013 * order, starting from the root rcu_node structure. This 1014 * order, starting from the root rcu_node structure. This
1014 * operation relies on the layout of the hierarchy within the 1015 * operation relies on the layout of the hierarchy within the
1015 * rsp->node[] array. Note that other CPUs will access only 1016 * rsp->node[] array. Note that other CPUs will access only
1016 * the leaves of the hierarchy, which still indicate that no 1017 * the leaves of the hierarchy, which still indicate that no
1017 * grace period is in progress, at least until the corresponding 1018 * grace period is in progress, at least until the corresponding
1018 * leaf node has been initialized. In addition, we have excluded 1019 * leaf node has been initialized. In addition, we have excluded
1019 * CPU-hotplug operations. 1020 * CPU-hotplug operations.
1020 * 1021 *
1021 * Note that the grace period cannot complete until we finish 1022 * Note that the grace period cannot complete until we finish
1022 * the initialization process, as there will be at least one 1023 * the initialization process, as there will be at least one
1023 * qsmask bit set in the root node until that time, namely the 1024 * qsmask bit set in the root node until that time, namely the
1024 * one corresponding to this CPU, due to the fact that we have 1025 * one corresponding to this CPU, due to the fact that we have
1025 * irqs disabled. 1026 * irqs disabled.
1026 */ 1027 */
1027 rcu_for_each_node_breadth_first(rsp, rnp) { 1028 rcu_for_each_node_breadth_first(rsp, rnp) {
1028 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 1029 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
1029 rcu_preempt_check_blocked_tasks(rnp); 1030 rcu_preempt_check_blocked_tasks(rnp);
1030 rnp->qsmask = rnp->qsmaskinit; 1031 rnp->qsmask = rnp->qsmaskinit;
1031 rnp->gpnum = rsp->gpnum; 1032 rnp->gpnum = rsp->gpnum;
1032 rnp->completed = rsp->completed; 1033 rnp->completed = rsp->completed;
1033 if (rnp == rdp->mynode) 1034 if (rnp == rdp->mynode)
1034 rcu_start_gp_per_cpu(rsp, rnp, rdp); 1035 rcu_start_gp_per_cpu(rsp, rnp, rdp);
1035 rcu_preempt_boost_start_gp(rnp); 1036 rcu_preempt_boost_start_gp(rnp);
1036 trace_rcu_grace_period_init(rsp->name, rnp->gpnum, 1037 trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
1037 rnp->level, rnp->grplo, 1038 rnp->level, rnp->grplo,
1038 rnp->grphi, rnp->qsmask); 1039 rnp->grphi, rnp->qsmask);
1039 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1040 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1040 } 1041 }
1041 1042
1042 rnp = rcu_get_root(rsp); 1043 rnp = rcu_get_root(rsp);
1043 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 1044 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
1044 rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ 1045 rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */
1045 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1046 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1046 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 1047 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
1047 } 1048 }
1048 1049
1049 /* 1050 /*
1050 * Report a full set of quiescent states to the specified rcu_state 1051 * Report a full set of quiescent states to the specified rcu_state
1051 * data structure. This involves cleaning up after the prior grace 1052 * data structure. This involves cleaning up after the prior grace
1052 * period and letting rcu_start_gp() start up the next grace period 1053 * period and letting rcu_start_gp() start up the next grace period
1053 * if one is needed. Note that the caller must hold rnp->lock, as 1054 * if one is needed. Note that the caller must hold rnp->lock, as
1054 * required by rcu_start_gp(), which will release it. 1055 * required by rcu_start_gp(), which will release it.
1055 */ 1056 */
1056 static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) 1057 static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
1057 __releases(rcu_get_root(rsp)->lock) 1058 __releases(rcu_get_root(rsp)->lock)
1058 { 1059 {
1059 unsigned long gp_duration; 1060 unsigned long gp_duration;
1060 struct rcu_node *rnp = rcu_get_root(rsp); 1061 struct rcu_node *rnp = rcu_get_root(rsp);
1061 struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 1062 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1062 1063
1063 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 1064 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
1064 1065
1065 /* 1066 /*
1066 * Ensure that all grace-period and pre-grace-period activity 1067 * Ensure that all grace-period and pre-grace-period activity
1067 * is seen before the assignment to rsp->completed. 1068 * is seen before the assignment to rsp->completed.
1068 */ 1069 */
1069 smp_mb(); /* See above block comment. */ 1070 smp_mb(); /* See above block comment. */
1070 gp_duration = jiffies - rsp->gp_start; 1071 gp_duration = jiffies - rsp->gp_start;
1071 if (gp_duration > rsp->gp_max) 1072 if (gp_duration > rsp->gp_max)
1072 rsp->gp_max = gp_duration; 1073 rsp->gp_max = gp_duration;
1073 1074
1074 /* 1075 /*
1075 * We know the grace period is complete, but to everyone else 1076 * We know the grace period is complete, but to everyone else
1076 * it appears to still be ongoing. But it is also the case 1077 * it appears to still be ongoing. But it is also the case
1077 * that to everyone else it looks like there is nothing that 1078 * that to everyone else it looks like there is nothing that
1078 * they can do to advance the grace period. It is therefore 1079 * they can do to advance the grace period. It is therefore
1079 * safe for us to drop the lock in order to mark the grace 1080 * safe for us to drop the lock in order to mark the grace
1080 * period as completed in all of the rcu_node structures. 1081 * period as completed in all of the rcu_node structures.
1081 * 1082 *
1082 * But if this CPU needs another grace period, it will take 1083 * But if this CPU needs another grace period, it will take
1083 * care of this while initializing the next grace period. 1084 * care of this while initializing the next grace period.
1084 * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL 1085 * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL
1085 * because the callbacks have not yet been advanced: Those 1086 * because the callbacks have not yet been advanced: Those
1086 * callbacks are waiting on the grace period that just now 1087 * callbacks are waiting on the grace period that just now
1087 * completed. 1088 * completed.
1088 */ 1089 */
1089 if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) { 1090 if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) {
1090 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1091 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1091 1092
1092 /* 1093 /*
1093 * Propagate new ->completed value to rcu_node structures 1094 * Propagate new ->completed value to rcu_node structures
1094 * so that other CPUs don't have to wait until the start 1095 * so that other CPUs don't have to wait until the start
1095 * of the next grace period to process their callbacks. 1096 * of the next grace period to process their callbacks.
1096 */ 1097 */
1097 rcu_for_each_node_breadth_first(rsp, rnp) { 1098 rcu_for_each_node_breadth_first(rsp, rnp) {
1098 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 1099 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
1099 rnp->completed = rsp->gpnum; 1100 rnp->completed = rsp->gpnum;
1100 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1101 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1101 } 1102 }
1102 rnp = rcu_get_root(rsp); 1103 rnp = rcu_get_root(rsp);
1103 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 1104 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
1104 } 1105 }
1105 1106
1106 rsp->completed = rsp->gpnum; /* Declare the grace period complete. */ 1107 rsp->completed = rsp->gpnum; /* Declare the grace period complete. */
1107 trace_rcu_grace_period(rsp->name, rsp->completed, "end"); 1108 trace_rcu_grace_period(rsp->name, rsp->completed, "end");
1108 rsp->fqs_state = RCU_GP_IDLE; 1109 rsp->fqs_state = RCU_GP_IDLE;
1109 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ 1110 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
1110 } 1111 }
1111 1112
1112 /* 1113 /*
1113 * Similar to rcu_report_qs_rdp(), for which it is a helper function. 1114 * Similar to rcu_report_qs_rdp(), for which it is a helper function.
1114 * Allows quiescent states for a group of CPUs to be reported at one go 1115 * Allows quiescent states for a group of CPUs to be reported at one go
1115 * to the specified rcu_node structure, though all the CPUs in the group 1116 * to the specified rcu_node structure, though all the CPUs in the group
1116 * must be represented by the same rcu_node structure (which need not be 1117 * must be represented by the same rcu_node structure (which need not be
1117 * a leaf rcu_node structure, though it often will be). That structure's 1118 * a leaf rcu_node structure, though it often will be). That structure's
1118 * lock must be held upon entry, and it is released before return. 1119 * lock must be held upon entry, and it is released before return.
1119 */ 1120 */
1120 static void 1121 static void
1121 rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, 1122 rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
1122 struct rcu_node *rnp, unsigned long flags) 1123 struct rcu_node *rnp, unsigned long flags)
1123 __releases(rnp->lock) 1124 __releases(rnp->lock)
1124 { 1125 {
1125 struct rcu_node *rnp_c; 1126 struct rcu_node *rnp_c;
1126 1127
1127 /* Walk up the rcu_node hierarchy. */ 1128 /* Walk up the rcu_node hierarchy. */
1128 for (;;) { 1129 for (;;) {
1129 if (!(rnp->qsmask & mask)) { 1130 if (!(rnp->qsmask & mask)) {
1130 1131
1131 /* Our bit has already been cleared, so done. */ 1132 /* Our bit has already been cleared, so done. */
1132 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1133 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1133 return; 1134 return;
1134 } 1135 }
1135 rnp->qsmask &= ~mask; 1136 rnp->qsmask &= ~mask;
1136 trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, 1137 trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
1137 mask, rnp->qsmask, rnp->level, 1138 mask, rnp->qsmask, rnp->level,
1138 rnp->grplo, rnp->grphi, 1139 rnp->grplo, rnp->grphi,
1139 !!rnp->gp_tasks); 1140 !!rnp->gp_tasks);
1140 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { 1141 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
1141 1142
1142 /* Other bits still set at this level, so done. */ 1143 /* Other bits still set at this level, so done. */
1143 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1144 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1144 return; 1145 return;
1145 } 1146 }
1146 mask = rnp->grpmask; 1147 mask = rnp->grpmask;
1147 if (rnp->parent == NULL) { 1148 if (rnp->parent == NULL) {
1148 1149
1149 /* No more levels. Exit loop holding root lock. */ 1150 /* No more levels. Exit loop holding root lock. */
1150 1151
1151 break; 1152 break;
1152 } 1153 }
1153 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1154 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1154 rnp_c = rnp; 1155 rnp_c = rnp;
1155 rnp = rnp->parent; 1156 rnp = rnp->parent;
1156 raw_spin_lock_irqsave(&rnp->lock, flags); 1157 raw_spin_lock_irqsave(&rnp->lock, flags);
1157 WARN_ON_ONCE(rnp_c->qsmask); 1158 WARN_ON_ONCE(rnp_c->qsmask);
1158 } 1159 }
1159 1160
1160 /* 1161 /*
1161 * Get here if we are the last CPU to pass through a quiescent 1162 * Get here if we are the last CPU to pass through a quiescent
1162 * state for this grace period. Invoke rcu_report_qs_rsp() 1163 * state for this grace period. Invoke rcu_report_qs_rsp()
1163 * to clean up and start the next grace period if one is needed. 1164 * to clean up and start the next grace period if one is needed.
1164 */ 1165 */
1165 rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */ 1166 rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */
1166 } 1167 }
1167 1168
1168 /* 1169 /*
1169 * Record a quiescent state for the specified CPU to that CPU's rcu_data 1170 * Record a quiescent state for the specified CPU to that CPU's rcu_data
1170 * structure. This must be either called from the specified CPU, or 1171 * structure. This must be either called from the specified CPU, or
1171 * called when the specified CPU is known to be offline (and when it is 1172 * called when the specified CPU is known to be offline (and when it is
1172 * also known that no other CPU is concurrently trying to help the offline 1173 * also known that no other CPU is concurrently trying to help the offline
1173 * CPU). The lastcomp argument is used to make sure we are still in the 1174 * CPU). The lastcomp argument is used to make sure we are still in the
1174 * grace period of interest. We don't want to end the current grace period 1175 * grace period of interest. We don't want to end the current grace period
1175 * based on quiescent states detected in an earlier grace period! 1176 * based on quiescent states detected in an earlier grace period!
1176 */ 1177 */
1177 static void 1178 static void
1178 rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastgp) 1179 rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastgp)
1179 { 1180 {
1180 unsigned long flags; 1181 unsigned long flags;
1181 unsigned long mask; 1182 unsigned long mask;
1182 struct rcu_node *rnp; 1183 struct rcu_node *rnp;
1183 1184
1184 rnp = rdp->mynode; 1185 rnp = rdp->mynode;
1185 raw_spin_lock_irqsave(&rnp->lock, flags); 1186 raw_spin_lock_irqsave(&rnp->lock, flags);
1186 if (lastgp != rnp->gpnum || rnp->completed == rnp->gpnum) { 1187 if (lastgp != rnp->gpnum || rnp->completed == rnp->gpnum) {
1187 1188
1188 /* 1189 /*
1189 * The grace period in which this quiescent state was 1190 * The grace period in which this quiescent state was
1190 * recorded has ended, so don't report it upwards. 1191 * recorded has ended, so don't report it upwards.
1191 * We will instead need a new quiescent state that lies 1192 * We will instead need a new quiescent state that lies
1192 * within the current grace period. 1193 * within the current grace period.
1193 */ 1194 */
1194 rdp->passed_quiesce = 0; /* need qs for new gp. */ 1195 rdp->passed_quiesce = 0; /* need qs for new gp. */
1195 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1196 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1196 return; 1197 return;
1197 } 1198 }
1198 mask = rdp->grpmask; 1199 mask = rdp->grpmask;
1199 if ((rnp->qsmask & mask) == 0) { 1200 if ((rnp->qsmask & mask) == 0) {
1200 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1201 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1201 } else { 1202 } else {
1202 rdp->qs_pending = 0; 1203 rdp->qs_pending = 0;
1203 1204
1204 /* 1205 /*
1205 * This GP can't end until cpu checks in, so all of our 1206 * This GP can't end until cpu checks in, so all of our
1206 * callbacks can be processed during the next GP. 1207 * callbacks can be processed during the next GP.
1207 */ 1208 */
1208 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 1209 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
1209 1210
1210 rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */ 1211 rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
1211 } 1212 }
1212 } 1213 }
1213 1214
1214 /* 1215 /*
1215 * Check to see if there is a new grace period of which this CPU 1216 * Check to see if there is a new grace period of which this CPU
1216 * is not yet aware, and if so, set up local rcu_data state for it. 1217 * is not yet aware, and if so, set up local rcu_data state for it.
1217 * Otherwise, see if this CPU has just passed through its first 1218 * Otherwise, see if this CPU has just passed through its first
1218 * quiescent state for this grace period, and record that fact if so. 1219 * quiescent state for this grace period, and record that fact if so.
1219 */ 1220 */
1220 static void 1221 static void
1221 rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) 1222 rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
1222 { 1223 {
1223 /* If there is now a new grace period, record and return. */ 1224 /* If there is now a new grace period, record and return. */
1224 if (check_for_new_grace_period(rsp, rdp)) 1225 if (check_for_new_grace_period(rsp, rdp))
1225 return; 1226 return;
1226 1227
1227 /* 1228 /*
1228 * Does this CPU still need to do its part for current grace period? 1229 * Does this CPU still need to do its part for current grace period?
1229 * If no, return and let the other CPUs do their part as well. 1230 * If no, return and let the other CPUs do their part as well.
1230 */ 1231 */
1231 if (!rdp->qs_pending) 1232 if (!rdp->qs_pending)
1232 return; 1233 return;
1233 1234
1234 /* 1235 /*
1235 * Was there a quiescent state since the beginning of the grace 1236 * Was there a quiescent state since the beginning of the grace
1236 * period? If no, then exit and wait for the next call. 1237 * period? If no, then exit and wait for the next call.
1237 */ 1238 */
1238 if (!rdp->passed_quiesce) 1239 if (!rdp->passed_quiesce)
1239 return; 1240 return;
1240 1241
1241 /* 1242 /*
1242 * Tell RCU we are done (but rcu_report_qs_rdp() will be the 1243 * Tell RCU we are done (but rcu_report_qs_rdp() will be the
1243 * judge of that). 1244 * judge of that).
1244 */ 1245 */
1245 rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesce_gpnum); 1246 rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesce_gpnum);
1246 } 1247 }
1247 1248
1248 #ifdef CONFIG_HOTPLUG_CPU 1249 #ifdef CONFIG_HOTPLUG_CPU
1249 1250
1250 /* 1251 /*
1251 * Move a dying CPU's RCU callbacks to online CPU's callback list. 1252 * Move a dying CPU's RCU callbacks to online CPU's callback list.
1252 * Synchronization is not required because this function executes 1253 * Synchronization is not required because this function executes
1253 * in stop_machine() context. 1254 * in stop_machine() context.
1254 */ 1255 */
1255 static void rcu_send_cbs_to_online(struct rcu_state *rsp) 1256 static void rcu_send_cbs_to_online(struct rcu_state *rsp)
1256 { 1257 {
1257 int i; 1258 int i;
1258 /* current DYING CPU is cleared in the cpu_online_mask */ 1259 /* current DYING CPU is cleared in the cpu_online_mask */
1259 int receive_cpu = cpumask_any(cpu_online_mask); 1260 int receive_cpu = cpumask_any(cpu_online_mask);
1260 struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 1261 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1261 struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); 1262 struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu);
1262 1263
1263 if (rdp->nxtlist == NULL) 1264 if (rdp->nxtlist == NULL)
1264 return; /* irqs disabled, so comparison is stable. */ 1265 return; /* irqs disabled, so comparison is stable. */
1265 1266
1266 *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; 1267 *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
1267 receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 1268 receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
1268 receive_rdp->qlen += rdp->qlen; 1269 receive_rdp->qlen += rdp->qlen;
1269 receive_rdp->n_cbs_adopted += rdp->qlen; 1270 receive_rdp->n_cbs_adopted += rdp->qlen;
1270 rdp->n_cbs_orphaned += rdp->qlen; 1271 rdp->n_cbs_orphaned += rdp->qlen;
1271 1272
1272 rdp->nxtlist = NULL; 1273 rdp->nxtlist = NULL;
1273 for (i = 0; i < RCU_NEXT_SIZE; i++) 1274 for (i = 0; i < RCU_NEXT_SIZE; i++)
1274 rdp->nxttail[i] = &rdp->nxtlist; 1275 rdp->nxttail[i] = &rdp->nxtlist;
1275 rdp->qlen = 0; 1276 rdp->qlen = 0;
1276 } 1277 }
1277 1278
1278 /* 1279 /*
1279 * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy 1280 * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy
1280 * and move all callbacks from the outgoing CPU to the current one. 1281 * and move all callbacks from the outgoing CPU to the current one.
1281 * There can only be one CPU hotplug operation at a time, so no other 1282 * There can only be one CPU hotplug operation at a time, so no other
1282 * CPU can be attempting to update rcu_cpu_kthread_task. 1283 * CPU can be attempting to update rcu_cpu_kthread_task.
1283 */ 1284 */
1284 static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) 1285 static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1285 { 1286 {
1286 unsigned long flags; 1287 unsigned long flags;
1287 unsigned long mask; 1288 unsigned long mask;
1288 int need_report = 0; 1289 int need_report = 0;
1289 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 1290 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1290 struct rcu_node *rnp; 1291 struct rcu_node *rnp;
1291 1292
1292 rcu_stop_cpu_kthread(cpu); 1293 rcu_stop_cpu_kthread(cpu);
1293 1294
1294 /* Exclude any attempts to start a new grace period. */ 1295 /* Exclude any attempts to start a new grace period. */
1295 raw_spin_lock_irqsave(&rsp->onofflock, flags); 1296 raw_spin_lock_irqsave(&rsp->onofflock, flags);
1296 1297
1297 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ 1298 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
1298 rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */ 1299 rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */
1299 mask = rdp->grpmask; /* rnp->grplo is constant. */ 1300 mask = rdp->grpmask; /* rnp->grplo is constant. */
1300 do { 1301 do {
1301 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 1302 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
1302 rnp->qsmaskinit &= ~mask; 1303 rnp->qsmaskinit &= ~mask;
1303 if (rnp->qsmaskinit != 0) { 1304 if (rnp->qsmaskinit != 0) {
1304 if (rnp != rdp->mynode) 1305 if (rnp != rdp->mynode)
1305 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1306 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1306 else 1307 else
1307 trace_rcu_grace_period(rsp->name, 1308 trace_rcu_grace_period(rsp->name,
1308 rnp->gpnum + 1 - 1309 rnp->gpnum + 1 -
1309 !!(rnp->qsmask & mask), 1310 !!(rnp->qsmask & mask),
1310 "cpuofl"); 1311 "cpuofl");
1311 break; 1312 break;
1312 } 1313 }
1313 if (rnp == rdp->mynode) { 1314 if (rnp == rdp->mynode) {
1314 trace_rcu_grace_period(rsp->name, 1315 trace_rcu_grace_period(rsp->name,
1315 rnp->gpnum + 1 - 1316 rnp->gpnum + 1 -
1316 !!(rnp->qsmask & mask), 1317 !!(rnp->qsmask & mask),
1317 "cpuofl"); 1318 "cpuofl");
1318 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); 1319 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
1319 } else 1320 } else
1320 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1321 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1321 mask = rnp->grpmask; 1322 mask = rnp->grpmask;
1322 rnp = rnp->parent; 1323 rnp = rnp->parent;
1323 } while (rnp != NULL); 1324 } while (rnp != NULL);
1324 1325
1325 /* 1326 /*
1326 * We still hold the leaf rcu_node structure lock here, and 1327 * We still hold the leaf rcu_node structure lock here, and
1327 * irqs are still disabled. The reason for this subterfuge is 1328 * irqs are still disabled. The reason for this subterfuge is
1328 * because invoking rcu_report_unblock_qs_rnp() with ->onofflock 1329 * because invoking rcu_report_unblock_qs_rnp() with ->onofflock
1329 * held leads to deadlock. 1330 * held leads to deadlock.
1330 */ 1331 */
1331 raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ 1332 raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
1332 rnp = rdp->mynode; 1333 rnp = rdp->mynode;
1333 if (need_report & RCU_OFL_TASKS_NORM_GP) 1334 if (need_report & RCU_OFL_TASKS_NORM_GP)
1334 rcu_report_unblock_qs_rnp(rnp, flags); 1335 rcu_report_unblock_qs_rnp(rnp, flags);
1335 else 1336 else
1336 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1337 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1337 if (need_report & RCU_OFL_TASKS_EXP_GP) 1338 if (need_report & RCU_OFL_TASKS_EXP_GP)
1338 rcu_report_exp_rnp(rsp, rnp, true); 1339 rcu_report_exp_rnp(rsp, rnp, true);
1339 rcu_node_kthread_setaffinity(rnp, -1); 1340 rcu_node_kthread_setaffinity(rnp, -1);
1340 } 1341 }
1341 1342
1342 /* 1343 /*
1343 * Remove the specified CPU from the RCU hierarchy and move any pending 1344 * Remove the specified CPU from the RCU hierarchy and move any pending
1344 * callbacks that it might have to the current CPU. This code assumes 1345 * callbacks that it might have to the current CPU. This code assumes
1345 * that at least one CPU in the system will remain running at all times. 1346 * that at least one CPU in the system will remain running at all times.
1346 * Any attempt to offline -all- CPUs is likely to strand RCU callbacks. 1347 * Any attempt to offline -all- CPUs is likely to strand RCU callbacks.
1347 */ 1348 */
1348 static void rcu_offline_cpu(int cpu) 1349 static void rcu_offline_cpu(int cpu)
1349 { 1350 {
1350 __rcu_offline_cpu(cpu, &rcu_sched_state); 1351 __rcu_offline_cpu(cpu, &rcu_sched_state);
1351 __rcu_offline_cpu(cpu, &rcu_bh_state); 1352 __rcu_offline_cpu(cpu, &rcu_bh_state);
1352 rcu_preempt_offline_cpu(cpu); 1353 rcu_preempt_offline_cpu(cpu);
1353 } 1354 }
1354 1355
1355 #else /* #ifdef CONFIG_HOTPLUG_CPU */ 1356 #else /* #ifdef CONFIG_HOTPLUG_CPU */
1356 1357
1357 static void rcu_send_cbs_to_online(struct rcu_state *rsp) 1358 static void rcu_send_cbs_to_online(struct rcu_state *rsp)
1358 { 1359 {
1359 } 1360 }
1360 1361
1361 static void rcu_offline_cpu(int cpu) 1362 static void rcu_offline_cpu(int cpu)
1362 { 1363 {
1363 } 1364 }
1364 1365
1365 #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ 1366 #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
1366 1367
1367 /* 1368 /*
1368 * Invoke any RCU callbacks that have made it to the end of their grace 1369 * Invoke any RCU callbacks that have made it to the end of their grace
1369 * period. Thottle as specified by rdp->blimit. 1370 * period. Thottle as specified by rdp->blimit.
1370 */ 1371 */
1371 static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) 1372 static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1372 { 1373 {
1373 unsigned long flags; 1374 unsigned long flags;
1374 struct rcu_head *next, *list, **tail; 1375 struct rcu_head *next, *list, **tail;
1375 int bl, count; 1376 int bl, count;
1376 1377
1377 /* If no callbacks are ready, just return.*/ 1378 /* If no callbacks are ready, just return.*/
1378 if (!cpu_has_callbacks_ready_to_invoke(rdp)) { 1379 if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
1379 trace_rcu_batch_start(rsp->name, 0, 0); 1380 trace_rcu_batch_start(rsp->name, 0, 0);
1380 trace_rcu_batch_end(rsp->name, 0); 1381 trace_rcu_batch_end(rsp->name, 0);
1381 return; 1382 return;
1382 } 1383 }
1383 1384
1384 /* 1385 /*
1385 * Extract the list of ready callbacks, disabling to prevent 1386 * Extract the list of ready callbacks, disabling to prevent
1386 * races with call_rcu() from interrupt handlers. 1387 * races with call_rcu() from interrupt handlers.
1387 */ 1388 */
1388 local_irq_save(flags); 1389 local_irq_save(flags);
1389 bl = rdp->blimit; 1390 bl = rdp->blimit;
1390 trace_rcu_batch_start(rsp->name, rdp->qlen, bl); 1391 trace_rcu_batch_start(rsp->name, rdp->qlen, bl);
1391 list = rdp->nxtlist; 1392 list = rdp->nxtlist;
1392 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; 1393 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
1393 *rdp->nxttail[RCU_DONE_TAIL] = NULL; 1394 *rdp->nxttail[RCU_DONE_TAIL] = NULL;
1394 tail = rdp->nxttail[RCU_DONE_TAIL]; 1395 tail = rdp->nxttail[RCU_DONE_TAIL];
1395 for (count = RCU_NEXT_SIZE - 1; count >= 0; count--) 1396 for (count = RCU_NEXT_SIZE - 1; count >= 0; count--)
1396 if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL]) 1397 if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL])
1397 rdp->nxttail[count] = &rdp->nxtlist; 1398 rdp->nxttail[count] = &rdp->nxtlist;
1398 local_irq_restore(flags); 1399 local_irq_restore(flags);
1399 1400
1400 /* Invoke callbacks. */ 1401 /* Invoke callbacks. */
1401 count = 0; 1402 count = 0;
1402 while (list) { 1403 while (list) {
1403 next = list->next; 1404 next = list->next;
1404 prefetch(next); 1405 prefetch(next);
1405 debug_rcu_head_unqueue(list); 1406 debug_rcu_head_unqueue(list);
1406 __rcu_reclaim(rsp->name, list); 1407 __rcu_reclaim(rsp->name, list);
1407 list = next; 1408 list = next;
1408 if (++count >= bl) 1409 if (++count >= bl)
1409 break; 1410 break;
1410 } 1411 }
1411 1412
1412 local_irq_save(flags); 1413 local_irq_save(flags);
1413 trace_rcu_batch_end(rsp->name, count); 1414 trace_rcu_batch_end(rsp->name, count);
1414 1415
1415 /* Update count, and requeue any remaining callbacks. */ 1416 /* Update count, and requeue any remaining callbacks. */
1416 rdp->qlen -= count; 1417 rdp->qlen -= count;
1417 rdp->n_cbs_invoked += count; 1418 rdp->n_cbs_invoked += count;
1418 if (list != NULL) { 1419 if (list != NULL) {
1419 *tail = rdp->nxtlist; 1420 *tail = rdp->nxtlist;
1420 rdp->nxtlist = list; 1421 rdp->nxtlist = list;
1421 for (count = 0; count < RCU_NEXT_SIZE; count++) 1422 for (count = 0; count < RCU_NEXT_SIZE; count++)
1422 if (&rdp->nxtlist == rdp->nxttail[count]) 1423 if (&rdp->nxtlist == rdp->nxttail[count])
1423 rdp->nxttail[count] = tail; 1424 rdp->nxttail[count] = tail;
1424 else 1425 else
1425 break; 1426 break;
1426 } 1427 }
1427 1428
1428 /* Reinstate batch limit if we have worked down the excess. */ 1429 /* Reinstate batch limit if we have worked down the excess. */
1429 if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) 1430 if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark)
1430 rdp->blimit = blimit; 1431 rdp->blimit = blimit;
1431 1432
1432 /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ 1433 /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
1433 if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) { 1434 if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) {
1434 rdp->qlen_last_fqs_check = 0; 1435 rdp->qlen_last_fqs_check = 0;
1435 rdp->n_force_qs_snap = rsp->n_force_qs; 1436 rdp->n_force_qs_snap = rsp->n_force_qs;
1436 } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) 1437 } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
1437 rdp->qlen_last_fqs_check = rdp->qlen; 1438 rdp->qlen_last_fqs_check = rdp->qlen;
1438 1439
1439 local_irq_restore(flags); 1440 local_irq_restore(flags);
1440 1441
1441 /* Re-invoke RCU core processing if there are callbacks remaining. */ 1442 /* Re-invoke RCU core processing if there are callbacks remaining. */
1442 if (cpu_has_callbacks_ready_to_invoke(rdp)) 1443 if (cpu_has_callbacks_ready_to_invoke(rdp))
1443 invoke_rcu_core(); 1444 invoke_rcu_core();
1444 } 1445 }
1445 1446
1446 /* 1447 /*
1447 * Check to see if this CPU is in a non-context-switch quiescent state 1448 * Check to see if this CPU is in a non-context-switch quiescent state
1448 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). 1449 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
1449 * Also schedule RCU core processing. 1450 * Also schedule RCU core processing.
1450 * 1451 *
1451 * This function must be called from hardirq context. It is normally 1452 * This function must be called from hardirq context. It is normally
1452 * invoked from the scheduling-clock interrupt. If rcu_pending returns 1453 * invoked from the scheduling-clock interrupt. If rcu_pending returns
1453 * false, there is no point in invoking rcu_check_callbacks(). 1454 * false, there is no point in invoking rcu_check_callbacks().
1454 */ 1455 */
1455 void rcu_check_callbacks(int cpu, int user) 1456 void rcu_check_callbacks(int cpu, int user)
1456 { 1457 {
1457 trace_rcu_utilization("Start scheduler-tick"); 1458 trace_rcu_utilization("Start scheduler-tick");
1458 if (user || rcu_is_cpu_rrupt_from_idle()) { 1459 if (user || rcu_is_cpu_rrupt_from_idle()) {
1459 1460
1460 /* 1461 /*
1461 * Get here if this CPU took its interrupt from user 1462 * Get here if this CPU took its interrupt from user
1462 * mode or from the idle loop, and if this is not a 1463 * mode or from the idle loop, and if this is not a
1463 * nested interrupt. In this case, the CPU is in 1464 * nested interrupt. In this case, the CPU is in
1464 * a quiescent state, so note it. 1465 * a quiescent state, so note it.
1465 * 1466 *
1466 * No memory barrier is required here because both 1467 * No memory barrier is required here because both
1467 * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local 1468 * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local
1468 * variables that other CPUs neither access nor modify, 1469 * variables that other CPUs neither access nor modify,
1469 * at least not while the corresponding CPU is online. 1470 * at least not while the corresponding CPU is online.
1470 */ 1471 */
1471 1472
1472 rcu_sched_qs(cpu); 1473 rcu_sched_qs(cpu);
1473 rcu_bh_qs(cpu); 1474 rcu_bh_qs(cpu);
1474 1475
1475 } else if (!in_softirq()) { 1476 } else if (!in_softirq()) {
1476 1477
1477 /* 1478 /*
1478 * Get here if this CPU did not take its interrupt from 1479 * Get here if this CPU did not take its interrupt from
1479 * softirq, in other words, if it is not interrupting 1480 * softirq, in other words, if it is not interrupting
1480 * a rcu_bh read-side critical section. This is an _bh 1481 * a rcu_bh read-side critical section. This is an _bh
1481 * critical section, so note it. 1482 * critical section, so note it.
1482 */ 1483 */
1483 1484
1484 rcu_bh_qs(cpu); 1485 rcu_bh_qs(cpu);
1485 } 1486 }
1486 rcu_preempt_check_callbacks(cpu); 1487 rcu_preempt_check_callbacks(cpu);
1487 if (rcu_pending(cpu)) 1488 if (rcu_pending(cpu))
1488 invoke_rcu_core(); 1489 invoke_rcu_core();
1489 trace_rcu_utilization("End scheduler-tick"); 1490 trace_rcu_utilization("End scheduler-tick");
1490 } 1491 }
1491 1492
1492 #ifdef CONFIG_SMP 1493 #ifdef CONFIG_SMP
1493 1494
1494 /* 1495 /*
1495 * Scan the leaf rcu_node structures, processing dyntick state for any that 1496 * Scan the leaf rcu_node structures, processing dyntick state for any that
1496 * have not yet encountered a quiescent state, using the function specified. 1497 * have not yet encountered a quiescent state, using the function specified.
1497 * Also initiate boosting for any threads blocked on the root rcu_node. 1498 * Also initiate boosting for any threads blocked on the root rcu_node.
1498 * 1499 *
1499 * The caller must have suppressed start of new grace periods. 1500 * The caller must have suppressed start of new grace periods.
1500 */ 1501 */
1501 static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) 1502 static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
1502 { 1503 {
1503 unsigned long bit; 1504 unsigned long bit;
1504 int cpu; 1505 int cpu;
1505 unsigned long flags; 1506 unsigned long flags;
1506 unsigned long mask; 1507 unsigned long mask;
1507 struct rcu_node *rnp; 1508 struct rcu_node *rnp;
1508 1509
1509 rcu_for_each_leaf_node(rsp, rnp) { 1510 rcu_for_each_leaf_node(rsp, rnp) {
1510 mask = 0; 1511 mask = 0;
1511 raw_spin_lock_irqsave(&rnp->lock, flags); 1512 raw_spin_lock_irqsave(&rnp->lock, flags);
1512 if (!rcu_gp_in_progress(rsp)) { 1513 if (!rcu_gp_in_progress(rsp)) {
1513 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1514 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1514 return; 1515 return;
1515 } 1516 }
1516 if (rnp->qsmask == 0) { 1517 if (rnp->qsmask == 0) {
1517 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ 1518 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
1518 continue; 1519 continue;
1519 } 1520 }
1520 cpu = rnp->grplo; 1521 cpu = rnp->grplo;
1521 bit = 1; 1522 bit = 1;
1522 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { 1523 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
1523 if ((rnp->qsmask & bit) != 0 && 1524 if ((rnp->qsmask & bit) != 0 &&
1524 f(per_cpu_ptr(rsp->rda, cpu))) 1525 f(per_cpu_ptr(rsp->rda, cpu)))
1525 mask |= bit; 1526 mask |= bit;
1526 } 1527 }
1527 if (mask != 0) { 1528 if (mask != 0) {
1528 1529
1529 /* rcu_report_qs_rnp() releases rnp->lock. */ 1530 /* rcu_report_qs_rnp() releases rnp->lock. */
1530 rcu_report_qs_rnp(mask, rsp, rnp, flags); 1531 rcu_report_qs_rnp(mask, rsp, rnp, flags);
1531 continue; 1532 continue;
1532 } 1533 }
1533 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1534 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1534 } 1535 }
1535 rnp = rcu_get_root(rsp); 1536 rnp = rcu_get_root(rsp);
1536 if (rnp->qsmask == 0) { 1537 if (rnp->qsmask == 0) {
1537 raw_spin_lock_irqsave(&rnp->lock, flags); 1538 raw_spin_lock_irqsave(&rnp->lock, flags);
1538 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ 1539 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
1539 } 1540 }
1540 } 1541 }
1541 1542
1542 /* 1543 /*
1543 * Force quiescent states on reluctant CPUs, and also detect which 1544 * Force quiescent states on reluctant CPUs, and also detect which
1544 * CPUs are in dyntick-idle mode. 1545 * CPUs are in dyntick-idle mode.
1545 */ 1546 */
1546 static void force_quiescent_state(struct rcu_state *rsp, int relaxed) 1547 static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1547 { 1548 {
1548 unsigned long flags; 1549 unsigned long flags;
1549 struct rcu_node *rnp = rcu_get_root(rsp); 1550 struct rcu_node *rnp = rcu_get_root(rsp);
1550 1551
1551 trace_rcu_utilization("Start fqs"); 1552 trace_rcu_utilization("Start fqs");
1552 if (!rcu_gp_in_progress(rsp)) { 1553 if (!rcu_gp_in_progress(rsp)) {
1553 trace_rcu_utilization("End fqs"); 1554 trace_rcu_utilization("End fqs");
1554 return; /* No grace period in progress, nothing to force. */ 1555 return; /* No grace period in progress, nothing to force. */
1555 } 1556 }
1556 if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) { 1557 if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) {
1557 rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ 1558 rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */
1558 trace_rcu_utilization("End fqs"); 1559 trace_rcu_utilization("End fqs");
1559 return; /* Someone else is already on the job. */ 1560 return; /* Someone else is already on the job. */
1560 } 1561 }
1561 if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) 1562 if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies))
1562 goto unlock_fqs_ret; /* no emergency and done recently. */ 1563 goto unlock_fqs_ret; /* no emergency and done recently. */
1563 rsp->n_force_qs++; 1564 rsp->n_force_qs++;
1564 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 1565 raw_spin_lock(&rnp->lock); /* irqs already disabled */
1565 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 1566 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
1566 if(!rcu_gp_in_progress(rsp)) { 1567 if(!rcu_gp_in_progress(rsp)) {
1567 rsp->n_force_qs_ngp++; 1568 rsp->n_force_qs_ngp++;
1568 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 1569 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
1569 goto unlock_fqs_ret; /* no GP in progress, time updated. */ 1570 goto unlock_fqs_ret; /* no GP in progress, time updated. */
1570 } 1571 }
1571 rsp->fqs_active = 1; 1572 rsp->fqs_active = 1;
1572 switch (rsp->fqs_state) { 1573 switch (rsp->fqs_state) {
1573 case RCU_GP_IDLE: 1574 case RCU_GP_IDLE:
1574 case RCU_GP_INIT: 1575 case RCU_GP_INIT:
1575 1576
1576 break; /* grace period idle or initializing, ignore. */ 1577 break; /* grace period idle or initializing, ignore. */
1577 1578
1578 case RCU_SAVE_DYNTICK: 1579 case RCU_SAVE_DYNTICK:
1579 if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) 1580 if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK)
1580 break; /* So gcc recognizes the dead code. */ 1581 break; /* So gcc recognizes the dead code. */
1581 1582
1582 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 1583 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
1583 1584
1584 /* Record dyntick-idle state. */ 1585 /* Record dyntick-idle state. */
1585 force_qs_rnp(rsp, dyntick_save_progress_counter); 1586 force_qs_rnp(rsp, dyntick_save_progress_counter);
1586 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 1587 raw_spin_lock(&rnp->lock); /* irqs already disabled */
1587 if (rcu_gp_in_progress(rsp)) 1588 if (rcu_gp_in_progress(rsp))
1588 rsp->fqs_state = RCU_FORCE_QS; 1589 rsp->fqs_state = RCU_FORCE_QS;
1589 break; 1590 break;
1590 1591
1591 case RCU_FORCE_QS: 1592 case RCU_FORCE_QS:
1592 1593
1593 /* Check dyntick-idle state, send IPI to laggarts. */ 1594 /* Check dyntick-idle state, send IPI to laggarts. */
1594 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 1595 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
1595 force_qs_rnp(rsp, rcu_implicit_dynticks_qs); 1596 force_qs_rnp(rsp, rcu_implicit_dynticks_qs);
1596 1597
1597 /* Leave state in case more forcing is required. */ 1598 /* Leave state in case more forcing is required. */
1598 1599
1599 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 1600 raw_spin_lock(&rnp->lock); /* irqs already disabled */
1600 break; 1601 break;
1601 } 1602 }
1602 rsp->fqs_active = 0; 1603 rsp->fqs_active = 0;
1603 if (rsp->fqs_need_gp) { 1604 if (rsp->fqs_need_gp) {
1604 raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */ 1605 raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */
1605 rsp->fqs_need_gp = 0; 1606 rsp->fqs_need_gp = 0;
1606 rcu_start_gp(rsp, flags); /* releases rnp->lock */ 1607 rcu_start_gp(rsp, flags); /* releases rnp->lock */
1607 trace_rcu_utilization("End fqs"); 1608 trace_rcu_utilization("End fqs");
1608 return; 1609 return;
1609 } 1610 }
1610 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 1611 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
1611 unlock_fqs_ret: 1612 unlock_fqs_ret:
1612 raw_spin_unlock_irqrestore(&rsp->fqslock, flags); 1613 raw_spin_unlock_irqrestore(&rsp->fqslock, flags);
1613 trace_rcu_utilization("End fqs"); 1614 trace_rcu_utilization("End fqs");
1614 } 1615 }
1615 1616
1616 #else /* #ifdef CONFIG_SMP */ 1617 #else /* #ifdef CONFIG_SMP */
1617 1618
1618 static void force_quiescent_state(struct rcu_state *rsp, int relaxed) 1619 static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1619 { 1620 {
1620 set_need_resched(); 1621 set_need_resched();
1621 } 1622 }
1622 1623
1623 #endif /* #else #ifdef CONFIG_SMP */ 1624 #endif /* #else #ifdef CONFIG_SMP */
1624 1625
1625 /* 1626 /*
1626 * This does the RCU core processing work for the specified rcu_state 1627 * This does the RCU core processing work for the specified rcu_state
1627 * and rcu_data structures. This may be called only from the CPU to 1628 * and rcu_data structures. This may be called only from the CPU to
1628 * whom the rdp belongs. 1629 * whom the rdp belongs.
1629 */ 1630 */
1630 static void 1631 static void
1631 __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) 1632 __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1632 { 1633 {
1633 unsigned long flags; 1634 unsigned long flags;
1634 1635
1635 WARN_ON_ONCE(rdp->beenonline == 0); 1636 WARN_ON_ONCE(rdp->beenonline == 0);
1636 1637
1637 /* 1638 /*
1638 * If an RCU GP has gone long enough, go check for dyntick 1639 * If an RCU GP has gone long enough, go check for dyntick
1639 * idle CPUs and, if needed, send resched IPIs. 1640 * idle CPUs and, if needed, send resched IPIs.
1640 */ 1641 */
1641 if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) 1642 if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
1642 force_quiescent_state(rsp, 1); 1643 force_quiescent_state(rsp, 1);
1643 1644
1644 /* 1645 /*
1645 * Advance callbacks in response to end of earlier grace 1646 * Advance callbacks in response to end of earlier grace
1646 * period that some other CPU ended. 1647 * period that some other CPU ended.
1647 */ 1648 */
1648 rcu_process_gp_end(rsp, rdp); 1649 rcu_process_gp_end(rsp, rdp);
1649 1650
1650 /* Update RCU state based on any recent quiescent states. */ 1651 /* Update RCU state based on any recent quiescent states. */
1651 rcu_check_quiescent_state(rsp, rdp); 1652 rcu_check_quiescent_state(rsp, rdp);
1652 1653
1653 /* Does this CPU require a not-yet-started grace period? */ 1654 /* Does this CPU require a not-yet-started grace period? */
1654 if (cpu_needs_another_gp(rsp, rdp)) { 1655 if (cpu_needs_another_gp(rsp, rdp)) {
1655 raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags); 1656 raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags);
1656 rcu_start_gp(rsp, flags); /* releases above lock */ 1657 rcu_start_gp(rsp, flags); /* releases above lock */
1657 } 1658 }
1658 1659
1659 /* If there are callbacks ready, invoke them. */ 1660 /* If there are callbacks ready, invoke them. */
1660 if (cpu_has_callbacks_ready_to_invoke(rdp)) 1661 if (cpu_has_callbacks_ready_to_invoke(rdp))
1661 invoke_rcu_callbacks(rsp, rdp); 1662 invoke_rcu_callbacks(rsp, rdp);
1662 } 1663 }
1663 1664
1664 /* 1665 /*
1665 * Do RCU core processing for the current CPU. 1666 * Do RCU core processing for the current CPU.
1666 */ 1667 */
1667 static void rcu_process_callbacks(struct softirq_action *unused) 1668 static void rcu_process_callbacks(struct softirq_action *unused)
1668 { 1669 {
1669 trace_rcu_utilization("Start RCU core"); 1670 trace_rcu_utilization("Start RCU core");
1670 __rcu_process_callbacks(&rcu_sched_state, 1671 __rcu_process_callbacks(&rcu_sched_state,
1671 &__get_cpu_var(rcu_sched_data)); 1672 &__get_cpu_var(rcu_sched_data));
1672 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); 1673 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
1673 rcu_preempt_process_callbacks(); 1674 rcu_preempt_process_callbacks();
1674 trace_rcu_utilization("End RCU core"); 1675 trace_rcu_utilization("End RCU core");
1675 } 1676 }
1676 1677
1677 /* 1678 /*
1678 * Schedule RCU callback invocation. If the specified type of RCU 1679 * Schedule RCU callback invocation. If the specified type of RCU
1679 * does not support RCU priority boosting, just do a direct call, 1680 * does not support RCU priority boosting, just do a direct call,
1680 * otherwise wake up the per-CPU kernel kthread. Note that because we 1681 * otherwise wake up the per-CPU kernel kthread. Note that because we
1681 * are running on the current CPU with interrupts disabled, the 1682 * are running on the current CPU with interrupts disabled, the
1682 * rcu_cpu_kthread_task cannot disappear out from under us. 1683 * rcu_cpu_kthread_task cannot disappear out from under us.
1683 */ 1684 */
1684 static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) 1685 static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1685 { 1686 {
1686 if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active))) 1687 if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active)))
1687 return; 1688 return;
1688 if (likely(!rsp->boost)) { 1689 if (likely(!rsp->boost)) {
1689 rcu_do_batch(rsp, rdp); 1690 rcu_do_batch(rsp, rdp);
1690 return; 1691 return;
1691 } 1692 }
1692 invoke_rcu_callbacks_kthread(); 1693 invoke_rcu_callbacks_kthread();
1693 } 1694 }
1694 1695
1695 static void invoke_rcu_core(void) 1696 static void invoke_rcu_core(void)
1696 { 1697 {
1697 raise_softirq(RCU_SOFTIRQ); 1698 raise_softirq(RCU_SOFTIRQ);
1698 } 1699 }
1699 1700
1700 static void 1701 static void
1701 __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), 1702 __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1702 struct rcu_state *rsp) 1703 struct rcu_state *rsp)
1703 { 1704 {
1704 unsigned long flags; 1705 unsigned long flags;
1705 struct rcu_data *rdp; 1706 struct rcu_data *rdp;
1706 1707
1707 debug_rcu_head_queue(head); 1708 debug_rcu_head_queue(head);
1708 head->func = func; 1709 head->func = func;
1709 head->next = NULL; 1710 head->next = NULL;
1710 1711
1711 smp_mb(); /* Ensure RCU update seen before callback registry. */ 1712 smp_mb(); /* Ensure RCU update seen before callback registry. */
1712 1713
1713 /* 1714 /*
1714 * Opportunistically note grace-period endings and beginnings. 1715 * Opportunistically note grace-period endings and beginnings.
1715 * Note that we might see a beginning right after we see an 1716 * Note that we might see a beginning right after we see an
1716 * end, but never vice versa, since this CPU has to pass through 1717 * end, but never vice versa, since this CPU has to pass through
1717 * a quiescent state betweentimes. 1718 * a quiescent state betweentimes.
1718 */ 1719 */
1719 local_irq_save(flags); 1720 local_irq_save(flags);
1720 rdp = this_cpu_ptr(rsp->rda); 1721 rdp = this_cpu_ptr(rsp->rda);
1721 1722
1722 /* Add the callback to our list. */ 1723 /* Add the callback to our list. */
1723 *rdp->nxttail[RCU_NEXT_TAIL] = head; 1724 *rdp->nxttail[RCU_NEXT_TAIL] = head;
1724 rdp->nxttail[RCU_NEXT_TAIL] = &head->next; 1725 rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
1725 rdp->qlen++; 1726 rdp->qlen++;
1726 1727
1727 if (__is_kfree_rcu_offset((unsigned long)func)) 1728 if (__is_kfree_rcu_offset((unsigned long)func))
1728 trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, 1729 trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
1729 rdp->qlen); 1730 rdp->qlen);
1730 else 1731 else
1731 trace_rcu_callback(rsp->name, head, rdp->qlen); 1732 trace_rcu_callback(rsp->name, head, rdp->qlen);
1732 1733
1733 /* If interrupts were disabled, don't dive into RCU core. */ 1734 /* If interrupts were disabled, don't dive into RCU core. */
1734 if (irqs_disabled_flags(flags)) { 1735 if (irqs_disabled_flags(flags)) {
1735 local_irq_restore(flags); 1736 local_irq_restore(flags);
1736 return; 1737 return;
1737 } 1738 }
1738 1739
1739 /* 1740 /*
1740 * Force the grace period if too many callbacks or too long waiting. 1741 * Force the grace period if too many callbacks or too long waiting.
1741 * Enforce hysteresis, and don't invoke force_quiescent_state() 1742 * Enforce hysteresis, and don't invoke force_quiescent_state()
1742 * if some other CPU has recently done so. Also, don't bother 1743 * if some other CPU has recently done so. Also, don't bother
1743 * invoking force_quiescent_state() if the newly enqueued callback 1744 * invoking force_quiescent_state() if the newly enqueued callback
1744 * is the only one waiting for a grace period to complete. 1745 * is the only one waiting for a grace period to complete.
1745 */ 1746 */
1746 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { 1747 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
1747 1748
1748 /* Are we ignoring a completed grace period? */ 1749 /* Are we ignoring a completed grace period? */
1749 rcu_process_gp_end(rsp, rdp); 1750 rcu_process_gp_end(rsp, rdp);
1750 check_for_new_grace_period(rsp, rdp); 1751 check_for_new_grace_period(rsp, rdp);
1751 1752
1752 /* Start a new grace period if one not already started. */ 1753 /* Start a new grace period if one not already started. */
1753 if (!rcu_gp_in_progress(rsp)) { 1754 if (!rcu_gp_in_progress(rsp)) {
1754 unsigned long nestflag; 1755 unsigned long nestflag;
1755 struct rcu_node *rnp_root = rcu_get_root(rsp); 1756 struct rcu_node *rnp_root = rcu_get_root(rsp);
1756 1757
1757 raw_spin_lock_irqsave(&rnp_root->lock, nestflag); 1758 raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
1758 rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ 1759 rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */
1759 } else { 1760 } else {
1760 /* Give the grace period a kick. */ 1761 /* Give the grace period a kick. */
1761 rdp->blimit = LONG_MAX; 1762 rdp->blimit = LONG_MAX;
1762 if (rsp->n_force_qs == rdp->n_force_qs_snap && 1763 if (rsp->n_force_qs == rdp->n_force_qs_snap &&
1763 *rdp->nxttail[RCU_DONE_TAIL] != head) 1764 *rdp->nxttail[RCU_DONE_TAIL] != head)
1764 force_quiescent_state(rsp, 0); 1765 force_quiescent_state(rsp, 0);
1765 rdp->n_force_qs_snap = rsp->n_force_qs; 1766 rdp->n_force_qs_snap = rsp->n_force_qs;
1766 rdp->qlen_last_fqs_check = rdp->qlen; 1767 rdp->qlen_last_fqs_check = rdp->qlen;
1767 } 1768 }
1768 } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) 1769 } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
1769 force_quiescent_state(rsp, 1); 1770 force_quiescent_state(rsp, 1);
1770 local_irq_restore(flags); 1771 local_irq_restore(flags);
1771 } 1772 }
1772 1773
1773 /* 1774 /*
1774 * Queue an RCU-sched callback for invocation after a grace period. 1775 * Queue an RCU-sched callback for invocation after a grace period.
1775 */ 1776 */
1776 void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 1777 void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1777 { 1778 {
1778 __call_rcu(head, func, &rcu_sched_state); 1779 __call_rcu(head, func, &rcu_sched_state);
1779 } 1780 }
1780 EXPORT_SYMBOL_GPL(call_rcu_sched); 1781 EXPORT_SYMBOL_GPL(call_rcu_sched);
1781 1782
1782 /* 1783 /*
1783 * Queue an RCU for invocation after a quicker grace period. 1784 * Queue an RCU for invocation after a quicker grace period.
1784 */ 1785 */
1785 void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 1786 void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1786 { 1787 {
1787 __call_rcu(head, func, &rcu_bh_state); 1788 __call_rcu(head, func, &rcu_bh_state);
1788 } 1789 }
1789 EXPORT_SYMBOL_GPL(call_rcu_bh); 1790 EXPORT_SYMBOL_GPL(call_rcu_bh);
1790 1791
1791 /** 1792 /**
1792 * synchronize_sched - wait until an rcu-sched grace period has elapsed. 1793 * synchronize_sched - wait until an rcu-sched grace period has elapsed.
1793 * 1794 *
1794 * Control will return to the caller some time after a full rcu-sched 1795 * Control will return to the caller some time after a full rcu-sched
1795 * grace period has elapsed, in other words after all currently executing 1796 * grace period has elapsed, in other words after all currently executing
1796 * rcu-sched read-side critical sections have completed. These read-side 1797 * rcu-sched read-side critical sections have completed. These read-side
1797 * critical sections are delimited by rcu_read_lock_sched() and 1798 * critical sections are delimited by rcu_read_lock_sched() and
1798 * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(), 1799 * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(),
1799 * local_irq_disable(), and so on may be used in place of 1800 * local_irq_disable(), and so on may be used in place of
1800 * rcu_read_lock_sched(). 1801 * rcu_read_lock_sched().
1801 * 1802 *
1802 * This means that all preempt_disable code sequences, including NMI and 1803 * This means that all preempt_disable code sequences, including NMI and
1803 * hardware-interrupt handlers, in progress on entry will have completed 1804 * hardware-interrupt handlers, in progress on entry will have completed
1804 * before this primitive returns. However, this does not guarantee that 1805 * before this primitive returns. However, this does not guarantee that
1805 * softirq handlers will have completed, since in some kernels, these 1806 * softirq handlers will have completed, since in some kernels, these
1806 * handlers can run in process context, and can block. 1807 * handlers can run in process context, and can block.
1807 * 1808 *
1808 * This primitive provides the guarantees made by the (now removed) 1809 * This primitive provides the guarantees made by the (now removed)
1809 * synchronize_kernel() API. In contrast, synchronize_rcu() only 1810 * synchronize_kernel() API. In contrast, synchronize_rcu() only
1810 * guarantees that rcu_read_lock() sections will have completed. 1811 * guarantees that rcu_read_lock() sections will have completed.
1811 * In "classic RCU", these two guarantees happen to be one and 1812 * In "classic RCU", these two guarantees happen to be one and
1812 * the same, but can differ in realtime RCU implementations. 1813 * the same, but can differ in realtime RCU implementations.
1813 */ 1814 */
1814 void synchronize_sched(void) 1815 void synchronize_sched(void)
1815 { 1816 {
1816 if (rcu_blocking_is_gp()) 1817 if (rcu_blocking_is_gp())
1817 return; 1818 return;
1818 wait_rcu_gp(call_rcu_sched); 1819 wait_rcu_gp(call_rcu_sched);
1819 } 1820 }
1820 EXPORT_SYMBOL_GPL(synchronize_sched); 1821 EXPORT_SYMBOL_GPL(synchronize_sched);
1821 1822
1822 /** 1823 /**
1823 * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. 1824 * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
1824 * 1825 *
1825 * Control will return to the caller some time after a full rcu_bh grace 1826 * Control will return to the caller some time after a full rcu_bh grace
1826 * period has elapsed, in other words after all currently executing rcu_bh 1827 * period has elapsed, in other words after all currently executing rcu_bh
1827 * read-side critical sections have completed. RCU read-side critical 1828 * read-side critical sections have completed. RCU read-side critical
1828 * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(), 1829 * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
1829 * and may be nested. 1830 * and may be nested.
1830 */ 1831 */
1831 void synchronize_rcu_bh(void) 1832 void synchronize_rcu_bh(void)
1832 { 1833 {
1833 if (rcu_blocking_is_gp()) 1834 if (rcu_blocking_is_gp())
1834 return; 1835 return;
1835 wait_rcu_gp(call_rcu_bh); 1836 wait_rcu_gp(call_rcu_bh);
1836 } 1837 }
1837 EXPORT_SYMBOL_GPL(synchronize_rcu_bh); 1838 EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
1838 1839
1839 /* 1840 /*
1840 * Check to see if there is any immediate RCU-related work to be done 1841 * Check to see if there is any immediate RCU-related work to be done
1841 * by the current CPU, for the specified type of RCU, returning 1 if so. 1842 * by the current CPU, for the specified type of RCU, returning 1 if so.
1842 * The checks are in order of increasing expense: checks that can be 1843 * The checks are in order of increasing expense: checks that can be
1843 * carried out against CPU-local state are performed first. However, 1844 * carried out against CPU-local state are performed first. However,
1844 * we must check for CPU stalls first, else we might not get a chance. 1845 * we must check for CPU stalls first, else we might not get a chance.
1845 */ 1846 */
1846 static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) 1847 static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
1847 { 1848 {
1848 struct rcu_node *rnp = rdp->mynode; 1849 struct rcu_node *rnp = rdp->mynode;
1849 1850
1850 rdp->n_rcu_pending++; 1851 rdp->n_rcu_pending++;
1851 1852
1852 /* Check for CPU stalls, if enabled. */ 1853 /* Check for CPU stalls, if enabled. */
1853 check_cpu_stall(rsp, rdp); 1854 check_cpu_stall(rsp, rdp);
1854 1855
1855 /* Is the RCU core waiting for a quiescent state from this CPU? */ 1856 /* Is the RCU core waiting for a quiescent state from this CPU? */
1856 if (rcu_scheduler_fully_active && 1857 if (rcu_scheduler_fully_active &&
1857 rdp->qs_pending && !rdp->passed_quiesce) { 1858 rdp->qs_pending && !rdp->passed_quiesce) {
1858 1859
1859 /* 1860 /*
1860 * If force_quiescent_state() coming soon and this CPU 1861 * If force_quiescent_state() coming soon and this CPU
1861 * needs a quiescent state, and this is either RCU-sched 1862 * needs a quiescent state, and this is either RCU-sched
1862 * or RCU-bh, force a local reschedule. 1863 * or RCU-bh, force a local reschedule.
1863 */ 1864 */
1864 rdp->n_rp_qs_pending++; 1865 rdp->n_rp_qs_pending++;
1865 if (!rdp->preemptible && 1866 if (!rdp->preemptible &&
1866 ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, 1867 ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1,
1867 jiffies)) 1868 jiffies))
1868 set_need_resched(); 1869 set_need_resched();
1869 } else if (rdp->qs_pending && rdp->passed_quiesce) { 1870 } else if (rdp->qs_pending && rdp->passed_quiesce) {
1870 rdp->n_rp_report_qs++; 1871 rdp->n_rp_report_qs++;
1871 return 1; 1872 return 1;
1872 } 1873 }
1873 1874
1874 /* Does this CPU have callbacks ready to invoke? */ 1875 /* Does this CPU have callbacks ready to invoke? */
1875 if (cpu_has_callbacks_ready_to_invoke(rdp)) { 1876 if (cpu_has_callbacks_ready_to_invoke(rdp)) {
1876 rdp->n_rp_cb_ready++; 1877 rdp->n_rp_cb_ready++;
1877 return 1; 1878 return 1;
1878 } 1879 }
1879 1880
1880 /* Has RCU gone idle with this CPU needing another grace period? */ 1881 /* Has RCU gone idle with this CPU needing another grace period? */
1881 if (cpu_needs_another_gp(rsp, rdp)) { 1882 if (cpu_needs_another_gp(rsp, rdp)) {
1882 rdp->n_rp_cpu_needs_gp++; 1883 rdp->n_rp_cpu_needs_gp++;
1883 return 1; 1884 return 1;
1884 } 1885 }
1885 1886
1886 /* Has another RCU grace period completed? */ 1887 /* Has another RCU grace period completed? */
1887 if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */ 1888 if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */
1888 rdp->n_rp_gp_completed++; 1889 rdp->n_rp_gp_completed++;
1889 return 1; 1890 return 1;
1890 } 1891 }
1891 1892
1892 /* Has a new RCU grace period started? */ 1893 /* Has a new RCU grace period started? */
1893 if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */ 1894 if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */
1894 rdp->n_rp_gp_started++; 1895 rdp->n_rp_gp_started++;
1895 return 1; 1896 return 1;
1896 } 1897 }
1897 1898
1898 /* Has an RCU GP gone long enough to send resched IPIs &c? */ 1899 /* Has an RCU GP gone long enough to send resched IPIs &c? */
1899 if (rcu_gp_in_progress(rsp) && 1900 if (rcu_gp_in_progress(rsp) &&
1900 ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) { 1901 ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) {
1901 rdp->n_rp_need_fqs++; 1902 rdp->n_rp_need_fqs++;
1902 return 1; 1903 return 1;
1903 } 1904 }
1904 1905
1905 /* nothing to do */ 1906 /* nothing to do */
1906 rdp->n_rp_need_nothing++; 1907 rdp->n_rp_need_nothing++;
1907 return 0; 1908 return 0;
1908 } 1909 }
1909 1910
1910 /* 1911 /*
1911 * Check to see if there is any immediate RCU-related work to be done 1912 * Check to see if there is any immediate RCU-related work to be done
1912 * by the current CPU, returning 1 if so. This function is part of the 1913 * by the current CPU, returning 1 if so. This function is part of the
1913 * RCU implementation; it is -not- an exported member of the RCU API. 1914 * RCU implementation; it is -not- an exported member of the RCU API.
1914 */ 1915 */
1915 static int rcu_pending(int cpu) 1916 static int rcu_pending(int cpu)
1916 { 1917 {
1917 return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || 1918 return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) ||
1918 __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) || 1919 __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) ||
1919 rcu_preempt_pending(cpu); 1920 rcu_preempt_pending(cpu);
1920 } 1921 }
1921 1922
1922 /* 1923 /*
1923 * Check to see if any future RCU-related work will need to be done 1924 * Check to see if any future RCU-related work will need to be done
1924 * by the current CPU, even if none need be done immediately, returning 1925 * by the current CPU, even if none need be done immediately, returning
1925 * 1 if so. 1926 * 1 if so.
1926 */ 1927 */
1927 static int rcu_cpu_has_callbacks(int cpu) 1928 static int rcu_cpu_has_callbacks(int cpu)
1928 { 1929 {
1929 /* RCU callbacks either ready or pending? */ 1930 /* RCU callbacks either ready or pending? */
1930 return per_cpu(rcu_sched_data, cpu).nxtlist || 1931 return per_cpu(rcu_sched_data, cpu).nxtlist ||
1931 per_cpu(rcu_bh_data, cpu).nxtlist || 1932 per_cpu(rcu_bh_data, cpu).nxtlist ||
1932 rcu_preempt_needs_cpu(cpu); 1933 rcu_preempt_needs_cpu(cpu);
1933 } 1934 }
1934 1935
1935 static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; 1936 static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
1936 static atomic_t rcu_barrier_cpu_count; 1937 static atomic_t rcu_barrier_cpu_count;
1937 static DEFINE_MUTEX(rcu_barrier_mutex); 1938 static DEFINE_MUTEX(rcu_barrier_mutex);
1938 static struct completion rcu_barrier_completion; 1939 static struct completion rcu_barrier_completion;
1939 1940
1940 static void rcu_barrier_callback(struct rcu_head *notused) 1941 static void rcu_barrier_callback(struct rcu_head *notused)
1941 { 1942 {
1942 if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 1943 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
1943 complete(&rcu_barrier_completion); 1944 complete(&rcu_barrier_completion);
1944 } 1945 }
1945 1946
1946 /* 1947 /*
1947 * Called with preemption disabled, and from cross-cpu IRQ context. 1948 * Called with preemption disabled, and from cross-cpu IRQ context.
1948 */ 1949 */
1949 static void rcu_barrier_func(void *type) 1950 static void rcu_barrier_func(void *type)
1950 { 1951 {
1951 int cpu = smp_processor_id(); 1952 int cpu = smp_processor_id();
1952 struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); 1953 struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
1953 void (*call_rcu_func)(struct rcu_head *head, 1954 void (*call_rcu_func)(struct rcu_head *head,
1954 void (*func)(struct rcu_head *head)); 1955 void (*func)(struct rcu_head *head));
1955 1956
1956 atomic_inc(&rcu_barrier_cpu_count); 1957 atomic_inc(&rcu_barrier_cpu_count);
1957 call_rcu_func = type; 1958 call_rcu_func = type;
1958 call_rcu_func(head, rcu_barrier_callback); 1959 call_rcu_func(head, rcu_barrier_callback);
1959 } 1960 }
1960 1961
1961 /* 1962 /*
1962 * Orchestrate the specified type of RCU barrier, waiting for all 1963 * Orchestrate the specified type of RCU barrier, waiting for all
1963 * RCU callbacks of the specified type to complete. 1964 * RCU callbacks of the specified type to complete.
1964 */ 1965 */
1965 static void _rcu_barrier(struct rcu_state *rsp, 1966 static void _rcu_barrier(struct rcu_state *rsp,
1966 void (*call_rcu_func)(struct rcu_head *head, 1967 void (*call_rcu_func)(struct rcu_head *head,
1967 void (*func)(struct rcu_head *head))) 1968 void (*func)(struct rcu_head *head)))
1968 { 1969 {
1969 BUG_ON(in_interrupt()); 1970 BUG_ON(in_interrupt());
1970 /* Take mutex to serialize concurrent rcu_barrier() requests. */ 1971 /* Take mutex to serialize concurrent rcu_barrier() requests. */
1971 mutex_lock(&rcu_barrier_mutex); 1972 mutex_lock(&rcu_barrier_mutex);
1972 init_completion(&rcu_barrier_completion); 1973 init_completion(&rcu_barrier_completion);
1973 /* 1974 /*
1974 * Initialize rcu_barrier_cpu_count to 1, then invoke 1975 * Initialize rcu_barrier_cpu_count to 1, then invoke
1975 * rcu_barrier_func() on each CPU, so that each CPU also has 1976 * rcu_barrier_func() on each CPU, so that each CPU also has
1976 * incremented rcu_barrier_cpu_count. Only then is it safe to 1977 * incremented rcu_barrier_cpu_count. Only then is it safe to
1977 * decrement rcu_barrier_cpu_count -- otherwise the first CPU 1978 * decrement rcu_barrier_cpu_count -- otherwise the first CPU
1978 * might complete its grace period before all of the other CPUs 1979 * might complete its grace period before all of the other CPUs
1979 * did their increment, causing this function to return too 1980 * did their increment, causing this function to return too
1980 * early. Note that on_each_cpu() disables irqs, which prevents 1981 * early. Note that on_each_cpu() disables irqs, which prevents
1981 * any CPUs from coming online or going offline until each online 1982 * any CPUs from coming online or going offline until each online
1982 * CPU has queued its RCU-barrier callback. 1983 * CPU has queued its RCU-barrier callback.
1983 */ 1984 */
1984 atomic_set(&rcu_barrier_cpu_count, 1); 1985 atomic_set(&rcu_barrier_cpu_count, 1);
1985 on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); 1986 on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1);
1986 if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 1987 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
1987 complete(&rcu_barrier_completion); 1988 complete(&rcu_barrier_completion);
1988 wait_for_completion(&rcu_barrier_completion); 1989 wait_for_completion(&rcu_barrier_completion);
1989 mutex_unlock(&rcu_barrier_mutex); 1990 mutex_unlock(&rcu_barrier_mutex);
1990 } 1991 }
1991 1992
1992 /** 1993 /**
1993 * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. 1994 * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
1994 */ 1995 */
1995 void rcu_barrier_bh(void) 1996 void rcu_barrier_bh(void)
1996 { 1997 {
1997 _rcu_barrier(&rcu_bh_state, call_rcu_bh); 1998 _rcu_barrier(&rcu_bh_state, call_rcu_bh);
1998 } 1999 }
1999 EXPORT_SYMBOL_GPL(rcu_barrier_bh); 2000 EXPORT_SYMBOL_GPL(rcu_barrier_bh);
2000 2001
2001 /** 2002 /**
2002 * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. 2003 * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
2003 */ 2004 */
2004 void rcu_barrier_sched(void) 2005 void rcu_barrier_sched(void)
2005 { 2006 {
2006 _rcu_barrier(&rcu_sched_state, call_rcu_sched); 2007 _rcu_barrier(&rcu_sched_state, call_rcu_sched);
2007 } 2008 }
2008 EXPORT_SYMBOL_GPL(rcu_barrier_sched); 2009 EXPORT_SYMBOL_GPL(rcu_barrier_sched);
2009 2010
2010 /* 2011 /*
2011 * Do boot-time initialization of a CPU's per-CPU RCU data. 2012 * Do boot-time initialization of a CPU's per-CPU RCU data.
2012 */ 2013 */
2013 static void __init 2014 static void __init
2014 rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) 2015 rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
2015 { 2016 {
2016 unsigned long flags; 2017 unsigned long flags;
2017 int i; 2018 int i;
2018 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2019 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2019 struct rcu_node *rnp = rcu_get_root(rsp); 2020 struct rcu_node *rnp = rcu_get_root(rsp);
2020 2021
2021 /* Set up local state, ensuring consistent view of global state. */ 2022 /* Set up local state, ensuring consistent view of global state. */
2022 raw_spin_lock_irqsave(&rnp->lock, flags); 2023 raw_spin_lock_irqsave(&rnp->lock, flags);
2023 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); 2024 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
2024 rdp->nxtlist = NULL; 2025 rdp->nxtlist = NULL;
2025 for (i = 0; i < RCU_NEXT_SIZE; i++) 2026 for (i = 0; i < RCU_NEXT_SIZE; i++)
2026 rdp->nxttail[i] = &rdp->nxtlist; 2027 rdp->nxttail[i] = &rdp->nxtlist;
2027 rdp->qlen = 0; 2028 rdp->qlen = 0;
2028 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 2029 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
2029 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING); 2030 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING);
2030 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); 2031 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
2031 rdp->cpu = cpu; 2032 rdp->cpu = cpu;
2032 rdp->rsp = rsp; 2033 rdp->rsp = rsp;
2033 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2034 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2034 } 2035 }
2035 2036
2036 /* 2037 /*
2037 * Initialize a CPU's per-CPU RCU data. Note that only one online or 2038 * Initialize a CPU's per-CPU RCU data. Note that only one online or
2038 * offline event can be happening at a given time. Note also that we 2039 * offline event can be happening at a given time. Note also that we
2039 * can accept some slop in the rsp->completed access due to the fact 2040 * can accept some slop in the rsp->completed access due to the fact
2040 * that this CPU cannot possibly have any RCU callbacks in flight yet. 2041 * that this CPU cannot possibly have any RCU callbacks in flight yet.
2041 */ 2042 */
2042 static void __cpuinit 2043 static void __cpuinit
2043 rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) 2044 rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2044 { 2045 {
2045 unsigned long flags; 2046 unsigned long flags;
2046 unsigned long mask; 2047 unsigned long mask;
2047 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2048 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2048 struct rcu_node *rnp = rcu_get_root(rsp); 2049 struct rcu_node *rnp = rcu_get_root(rsp);
2049 2050
2050 /* Set up local state, ensuring consistent view of global state. */ 2051 /* Set up local state, ensuring consistent view of global state. */
2051 raw_spin_lock_irqsave(&rnp->lock, flags); 2052 raw_spin_lock_irqsave(&rnp->lock, flags);
2052 rdp->beenonline = 1; /* We have now been online. */ 2053 rdp->beenonline = 1; /* We have now been online. */
2053 rdp->preemptible = preemptible; 2054 rdp->preemptible = preemptible;
2054 rdp->qlen_last_fqs_check = 0; 2055 rdp->qlen_last_fqs_check = 0;
2055 rdp->n_force_qs_snap = rsp->n_force_qs; 2056 rdp->n_force_qs_snap = rsp->n_force_qs;
2056 rdp->blimit = blimit; 2057 rdp->blimit = blimit;
2057 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_NESTING; 2058 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_NESTING;
2058 atomic_set(&rdp->dynticks->dynticks, 2059 atomic_set(&rdp->dynticks->dynticks,
2059 (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); 2060 (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
2061 rcu_prepare_for_idle_init(cpu);
2060 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 2062 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2061 2063
2062 /* 2064 /*
2063 * A new grace period might start here. If so, we won't be part 2065 * A new grace period might start here. If so, we won't be part
2064 * of it, but that is OK, as we are currently in a quiescent state. 2066 * of it, but that is OK, as we are currently in a quiescent state.
2065 */ 2067 */
2066 2068
2067 /* Exclude any attempts to start a new GP on large systems. */ 2069 /* Exclude any attempts to start a new GP on large systems. */
2068 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ 2070 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
2069 2071
2070 /* Add CPU to rcu_node bitmasks. */ 2072 /* Add CPU to rcu_node bitmasks. */
2071 rnp = rdp->mynode; 2073 rnp = rdp->mynode;
2072 mask = rdp->grpmask; 2074 mask = rdp->grpmask;
2073 do { 2075 do {
2074 /* Exclude any attempts to start a new GP on small systems. */ 2076 /* Exclude any attempts to start a new GP on small systems. */
2075 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 2077 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
2076 rnp->qsmaskinit |= mask; 2078 rnp->qsmaskinit |= mask;
2077 mask = rnp->grpmask; 2079 mask = rnp->grpmask;
2078 if (rnp == rdp->mynode) { 2080 if (rnp == rdp->mynode) {
2079 /* 2081 /*
2080 * If there is a grace period in progress, we will 2082 * If there is a grace period in progress, we will
2081 * set up to wait for it next time we run the 2083 * set up to wait for it next time we run the
2082 * RCU core code. 2084 * RCU core code.
2083 */ 2085 */
2084 rdp->gpnum = rnp->completed; 2086 rdp->gpnum = rnp->completed;
2085 rdp->completed = rnp->completed; 2087 rdp->completed = rnp->completed;
2086 rdp->passed_quiesce = 0; 2088 rdp->passed_quiesce = 0;
2087 rdp->qs_pending = 0; 2089 rdp->qs_pending = 0;
2088 rdp->passed_quiesce_gpnum = rnp->gpnum - 1; 2090 rdp->passed_quiesce_gpnum = rnp->gpnum - 1;
2089 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl"); 2091 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl");
2090 } 2092 }
2091 raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ 2093 raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
2092 rnp = rnp->parent; 2094 rnp = rnp->parent;
2093 } while (rnp != NULL && !(rnp->qsmaskinit & mask)); 2095 } while (rnp != NULL && !(rnp->qsmaskinit & mask));
2094 2096
2095 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 2097 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
2096 } 2098 }
2097 2099
2098 static void __cpuinit rcu_prepare_cpu(int cpu) 2100 static void __cpuinit rcu_prepare_cpu(int cpu)
2099 { 2101 {
2100 rcu_init_percpu_data(cpu, &rcu_sched_state, 0); 2102 rcu_init_percpu_data(cpu, &rcu_sched_state, 0);
2101 rcu_init_percpu_data(cpu, &rcu_bh_state, 0); 2103 rcu_init_percpu_data(cpu, &rcu_bh_state, 0);
2102 rcu_preempt_init_percpu_data(cpu); 2104 rcu_preempt_init_percpu_data(cpu);
2103 } 2105 }
2104 2106
2105 /* 2107 /*
2106 * Handle CPU online/offline notification events. 2108 * Handle CPU online/offline notification events.
2107 */ 2109 */
2108 static int __cpuinit rcu_cpu_notify(struct notifier_block *self, 2110 static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2109 unsigned long action, void *hcpu) 2111 unsigned long action, void *hcpu)
2110 { 2112 {
2111 long cpu = (long)hcpu; 2113 long cpu = (long)hcpu;
2112 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 2114 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
2113 struct rcu_node *rnp = rdp->mynode; 2115 struct rcu_node *rnp = rdp->mynode;
2114 2116
2115 trace_rcu_utilization("Start CPU hotplug"); 2117 trace_rcu_utilization("Start CPU hotplug");
2116 switch (action) { 2118 switch (action) {
2117 case CPU_UP_PREPARE: 2119 case CPU_UP_PREPARE:
2118 case CPU_UP_PREPARE_FROZEN: 2120 case CPU_UP_PREPARE_FROZEN:
2119 rcu_prepare_cpu(cpu); 2121 rcu_prepare_cpu(cpu);
2120 rcu_prepare_kthreads(cpu); 2122 rcu_prepare_kthreads(cpu);
2121 break; 2123 break;
2122 case CPU_ONLINE: 2124 case CPU_ONLINE:
2123 case CPU_DOWN_FAILED: 2125 case CPU_DOWN_FAILED:
2124 rcu_node_kthread_setaffinity(rnp, -1); 2126 rcu_node_kthread_setaffinity(rnp, -1);
2125 rcu_cpu_kthread_setrt(cpu, 1); 2127 rcu_cpu_kthread_setrt(cpu, 1);
2126 break; 2128 break;
2127 case CPU_DOWN_PREPARE: 2129 case CPU_DOWN_PREPARE:
2128 rcu_node_kthread_setaffinity(rnp, cpu); 2130 rcu_node_kthread_setaffinity(rnp, cpu);
2129 rcu_cpu_kthread_setrt(cpu, 0); 2131 rcu_cpu_kthread_setrt(cpu, 0);
2130 break; 2132 break;
2131 case CPU_DYING: 2133 case CPU_DYING:
2132 case CPU_DYING_FROZEN: 2134 case CPU_DYING_FROZEN:
2133 /* 2135 /*
2134 * The whole machine is "stopped" except this CPU, so we can 2136 * The whole machine is "stopped" except this CPU, so we can
2135 * touch any data without introducing corruption. We send the 2137 * touch any data without introducing corruption. We send the
2136 * dying CPU's callbacks to an arbitrarily chosen online CPU. 2138 * dying CPU's callbacks to an arbitrarily chosen online CPU.
2137 */ 2139 */
2138 rcu_send_cbs_to_online(&rcu_bh_state); 2140 rcu_send_cbs_to_online(&rcu_bh_state);
2139 rcu_send_cbs_to_online(&rcu_sched_state); 2141 rcu_send_cbs_to_online(&rcu_sched_state);
2140 rcu_preempt_send_cbs_to_online(); 2142 rcu_preempt_send_cbs_to_online();
2143 rcu_cleanup_after_idle(cpu);
2141 break; 2144 break;
2142 case CPU_DEAD: 2145 case CPU_DEAD:
2143 case CPU_DEAD_FROZEN: 2146 case CPU_DEAD_FROZEN:
2144 case CPU_UP_CANCELED: 2147 case CPU_UP_CANCELED:
2145 case CPU_UP_CANCELED_FROZEN: 2148 case CPU_UP_CANCELED_FROZEN:
2146 rcu_offline_cpu(cpu); 2149 rcu_offline_cpu(cpu);
2147 break; 2150 break;
2148 default: 2151 default:
2149 break; 2152 break;
2150 } 2153 }
2151 trace_rcu_utilization("End CPU hotplug"); 2154 trace_rcu_utilization("End CPU hotplug");
2152 return NOTIFY_OK; 2155 return NOTIFY_OK;
2153 } 2156 }
2154 2157
2155 /* 2158 /*
2156 * This function is invoked towards the end of the scheduler's initialization 2159 * This function is invoked towards the end of the scheduler's initialization
2157 * process. Before this is called, the idle task might contain 2160 * process. Before this is called, the idle task might contain
2158 * RCU read-side critical sections (during which time, this idle 2161 * RCU read-side critical sections (during which time, this idle
2159 * task is booting the system). After this function is called, the 2162 * task is booting the system). After this function is called, the
2160 * idle tasks are prohibited from containing RCU read-side critical 2163 * idle tasks are prohibited from containing RCU read-side critical
2161 * sections. This function also enables RCU lockdep checking. 2164 * sections. This function also enables RCU lockdep checking.
2162 */ 2165 */
2163 void rcu_scheduler_starting(void) 2166 void rcu_scheduler_starting(void)
2164 { 2167 {
2165 WARN_ON(num_online_cpus() != 1); 2168 WARN_ON(num_online_cpus() != 1);
2166 WARN_ON(nr_context_switches() > 0); 2169 WARN_ON(nr_context_switches() > 0);
2167 rcu_scheduler_active = 1; 2170 rcu_scheduler_active = 1;
2168 } 2171 }
2169 2172
2170 /* 2173 /*
2171 * Compute the per-level fanout, either using the exact fanout specified 2174 * Compute the per-level fanout, either using the exact fanout specified
2172 * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. 2175 * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT.
2173 */ 2176 */
2174 #ifdef CONFIG_RCU_FANOUT_EXACT 2177 #ifdef CONFIG_RCU_FANOUT_EXACT
2175 static void __init rcu_init_levelspread(struct rcu_state *rsp) 2178 static void __init rcu_init_levelspread(struct rcu_state *rsp)
2176 { 2179 {
2177 int i; 2180 int i;
2178 2181
2179 for (i = NUM_RCU_LVLS - 1; i > 0; i--) 2182 for (i = NUM_RCU_LVLS - 1; i > 0; i--)
2180 rsp->levelspread[i] = CONFIG_RCU_FANOUT; 2183 rsp->levelspread[i] = CONFIG_RCU_FANOUT;
2181 rsp->levelspread[0] = RCU_FANOUT_LEAF; 2184 rsp->levelspread[0] = RCU_FANOUT_LEAF;
2182 } 2185 }
2183 #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ 2186 #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
2184 static void __init rcu_init_levelspread(struct rcu_state *rsp) 2187 static void __init rcu_init_levelspread(struct rcu_state *rsp)
2185 { 2188 {
2186 int ccur; 2189 int ccur;
2187 int cprv; 2190 int cprv;
2188 int i; 2191 int i;
2189 2192
2190 cprv = NR_CPUS; 2193 cprv = NR_CPUS;
2191 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { 2194 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
2192 ccur = rsp->levelcnt[i]; 2195 ccur = rsp->levelcnt[i];
2193 rsp->levelspread[i] = (cprv + ccur - 1) / ccur; 2196 rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
2194 cprv = ccur; 2197 cprv = ccur;
2195 } 2198 }
2196 } 2199 }
2197 #endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */ 2200 #endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */
2198 2201
2199 /* 2202 /*
2200 * Helper function for rcu_init() that initializes one rcu_state structure. 2203 * Helper function for rcu_init() that initializes one rcu_state structure.
2201 */ 2204 */
2202 static void __init rcu_init_one(struct rcu_state *rsp, 2205 static void __init rcu_init_one(struct rcu_state *rsp,
2203 struct rcu_data __percpu *rda) 2206 struct rcu_data __percpu *rda)
2204 { 2207 {
2205 static char *buf[] = { "rcu_node_level_0", 2208 static char *buf[] = { "rcu_node_level_0",
2206 "rcu_node_level_1", 2209 "rcu_node_level_1",
2207 "rcu_node_level_2", 2210 "rcu_node_level_2",
2208 "rcu_node_level_3" }; /* Match MAX_RCU_LVLS */ 2211 "rcu_node_level_3" }; /* Match MAX_RCU_LVLS */
2209 int cpustride = 1; 2212 int cpustride = 1;
2210 int i; 2213 int i;
2211 int j; 2214 int j;
2212 struct rcu_node *rnp; 2215 struct rcu_node *rnp;
2213 2216
2214 BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */ 2217 BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */
2215 2218
2216 /* Initialize the level-tracking arrays. */ 2219 /* Initialize the level-tracking arrays. */
2217 2220
2218 for (i = 1; i < NUM_RCU_LVLS; i++) 2221 for (i = 1; i < NUM_RCU_LVLS; i++)
2219 rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; 2222 rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
2220 rcu_init_levelspread(rsp); 2223 rcu_init_levelspread(rsp);
2221 2224
2222 /* Initialize the elements themselves, starting from the leaves. */ 2225 /* Initialize the elements themselves, starting from the leaves. */
2223 2226
2224 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { 2227 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
2225 cpustride *= rsp->levelspread[i]; 2228 cpustride *= rsp->levelspread[i];
2226 rnp = rsp->level[i]; 2229 rnp = rsp->level[i];
2227 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { 2230 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
2228 raw_spin_lock_init(&rnp->lock); 2231 raw_spin_lock_init(&rnp->lock);
2229 lockdep_set_class_and_name(&rnp->lock, 2232 lockdep_set_class_and_name(&rnp->lock,
2230 &rcu_node_class[i], buf[i]); 2233 &rcu_node_class[i], buf[i]);
2231 rnp->gpnum = 0; 2234 rnp->gpnum = 0;
2232 rnp->qsmask = 0; 2235 rnp->qsmask = 0;
2233 rnp->qsmaskinit = 0; 2236 rnp->qsmaskinit = 0;
2234 rnp->grplo = j * cpustride; 2237 rnp->grplo = j * cpustride;
2235 rnp->grphi = (j + 1) * cpustride - 1; 2238 rnp->grphi = (j + 1) * cpustride - 1;
2236 if (rnp->grphi >= NR_CPUS) 2239 if (rnp->grphi >= NR_CPUS)
2237 rnp->grphi = NR_CPUS - 1; 2240 rnp->grphi = NR_CPUS - 1;
2238 if (i == 0) { 2241 if (i == 0) {
2239 rnp->grpnum = 0; 2242 rnp->grpnum = 0;
2240 rnp->grpmask = 0; 2243 rnp->grpmask = 0;
2241 rnp->parent = NULL; 2244 rnp->parent = NULL;
2242 } else { 2245 } else {
2243 rnp->grpnum = j % rsp->levelspread[i - 1]; 2246 rnp->grpnum = j % rsp->levelspread[i - 1];
2244 rnp->grpmask = 1UL << rnp->grpnum; 2247 rnp->grpmask = 1UL << rnp->grpnum;
2245 rnp->parent = rsp->level[i - 1] + 2248 rnp->parent = rsp->level[i - 1] +
2246 j / rsp->levelspread[i - 1]; 2249 j / rsp->levelspread[i - 1];
2247 } 2250 }
2248 rnp->level = i; 2251 rnp->level = i;
2249 INIT_LIST_HEAD(&rnp->blkd_tasks); 2252 INIT_LIST_HEAD(&rnp->blkd_tasks);
2250 } 2253 }
2251 } 2254 }
2252 2255
2253 rsp->rda = rda; 2256 rsp->rda = rda;
2254 rnp = rsp->level[NUM_RCU_LVLS - 1]; 2257 rnp = rsp->level[NUM_RCU_LVLS - 1];
2255 for_each_possible_cpu(i) { 2258 for_each_possible_cpu(i) {
2256 while (i > rnp->grphi) 2259 while (i > rnp->grphi)
2257 rnp++; 2260 rnp++;
2258 per_cpu_ptr(rsp->rda, i)->mynode = rnp; 2261 per_cpu_ptr(rsp->rda, i)->mynode = rnp;
2259 rcu_boot_init_percpu_data(i, rsp); 2262 rcu_boot_init_percpu_data(i, rsp);
2260 } 2263 }
2261 } 2264 }
2262 2265
2263 void __init rcu_init(void) 2266 void __init rcu_init(void)
2264 { 2267 {
2265 int cpu; 2268 int cpu;
2266 2269
2267 rcu_bootup_announce(); 2270 rcu_bootup_announce();
2268 rcu_init_one(&rcu_sched_state, &rcu_sched_data); 2271 rcu_init_one(&rcu_sched_state, &rcu_sched_data);
2269 rcu_init_one(&rcu_bh_state, &rcu_bh_data); 2272 rcu_init_one(&rcu_bh_state, &rcu_bh_data);
2270 __rcu_init_preempt(); 2273 __rcu_init_preempt();
2271 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 2274 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
2272 2275
2273 /* 2276 /*
2274 * We don't need protection against CPU-hotplug here because 2277 * We don't need protection against CPU-hotplug here because
2275 * this is called early in boot, before either interrupts 2278 * this is called early in boot, before either interrupts
2276 * or the scheduler are operational. 2279 * or the scheduler are operational.
2277 */ 2280 */
2278 cpu_notifier(rcu_cpu_notify, 0); 2281 cpu_notifier(rcu_cpu_notify, 0);
2279 for_each_online_cpu(cpu) 2282 for_each_online_cpu(cpu)
2280 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); 2283 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
2281 check_cpu_stall_init(); 2284 check_cpu_stall_init();
2282 } 2285 }
2283 2286
2284 #include "rcutree_plugin.h" 2287 #include "rcutree_plugin.h"
2285 2288
1 /* 1 /*
2 * Read-Copy Update mechanism for mutual exclusion (tree-based version) 2 * Read-Copy Update mechanism for mutual exclusion (tree-based version)
3 * Internal non-public definitions. 3 * Internal non-public definitions.
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details. 13 * GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * 18 *
19 * Copyright IBM Corporation, 2008 19 * Copyright IBM Corporation, 2008
20 * 20 *
21 * Author: Ingo Molnar <mingo@elte.hu> 21 * Author: Ingo Molnar <mingo@elte.hu>
22 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> 22 * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
23 */ 23 */
24 24
25 #include <linux/cache.h> 25 #include <linux/cache.h>
26 #include <linux/spinlock.h> 26 #include <linux/spinlock.h>
27 #include <linux/threads.h> 27 #include <linux/threads.h>
28 #include <linux/cpumask.h> 28 #include <linux/cpumask.h>
29 #include <linux/seqlock.h> 29 #include <linux/seqlock.h>
30 30
31 /* 31 /*
32 * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. 32 * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT.
33 * In theory, it should be possible to add more levels straightforwardly. 33 * In theory, it should be possible to add more levels straightforwardly.
34 * In practice, this did work well going from three levels to four. 34 * In practice, this did work well going from three levels to four.
35 * Of course, your mileage may vary. 35 * Of course, your mileage may vary.
36 */ 36 */
37 #define MAX_RCU_LVLS 4 37 #define MAX_RCU_LVLS 4
38 #if CONFIG_RCU_FANOUT > 16 38 #if CONFIG_RCU_FANOUT > 16
39 #define RCU_FANOUT_LEAF 16 39 #define RCU_FANOUT_LEAF 16
40 #else /* #if CONFIG_RCU_FANOUT > 16 */ 40 #else /* #if CONFIG_RCU_FANOUT > 16 */
41 #define RCU_FANOUT_LEAF (CONFIG_RCU_FANOUT) 41 #define RCU_FANOUT_LEAF (CONFIG_RCU_FANOUT)
42 #endif /* #else #if CONFIG_RCU_FANOUT > 16 */ 42 #endif /* #else #if CONFIG_RCU_FANOUT > 16 */
43 #define RCU_FANOUT_1 (RCU_FANOUT_LEAF) 43 #define RCU_FANOUT_1 (RCU_FANOUT_LEAF)
44 #define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT) 44 #define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT)
45 #define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT) 45 #define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT)
46 #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) 46 #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
47 47
48 #if NR_CPUS <= RCU_FANOUT_1 48 #if NR_CPUS <= RCU_FANOUT_1
49 # define NUM_RCU_LVLS 1 49 # define NUM_RCU_LVLS 1
50 # define NUM_RCU_LVL_0 1 50 # define NUM_RCU_LVL_0 1
51 # define NUM_RCU_LVL_1 (NR_CPUS) 51 # define NUM_RCU_LVL_1 (NR_CPUS)
52 # define NUM_RCU_LVL_2 0 52 # define NUM_RCU_LVL_2 0
53 # define NUM_RCU_LVL_3 0 53 # define NUM_RCU_LVL_3 0
54 # define NUM_RCU_LVL_4 0 54 # define NUM_RCU_LVL_4 0
55 #elif NR_CPUS <= RCU_FANOUT_2 55 #elif NR_CPUS <= RCU_FANOUT_2
56 # define NUM_RCU_LVLS 2 56 # define NUM_RCU_LVLS 2
57 # define NUM_RCU_LVL_0 1 57 # define NUM_RCU_LVL_0 1
58 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) 58 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
59 # define NUM_RCU_LVL_2 (NR_CPUS) 59 # define NUM_RCU_LVL_2 (NR_CPUS)
60 # define NUM_RCU_LVL_3 0 60 # define NUM_RCU_LVL_3 0
61 # define NUM_RCU_LVL_4 0 61 # define NUM_RCU_LVL_4 0
62 #elif NR_CPUS <= RCU_FANOUT_3 62 #elif NR_CPUS <= RCU_FANOUT_3
63 # define NUM_RCU_LVLS 3 63 # define NUM_RCU_LVLS 3
64 # define NUM_RCU_LVL_0 1 64 # define NUM_RCU_LVL_0 1
65 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) 65 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
66 # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) 66 # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
67 # define NUM_RCU_LVL_3 (NR_CPUS) 67 # define NUM_RCU_LVL_3 (NR_CPUS)
68 # define NUM_RCU_LVL_4 0 68 # define NUM_RCU_LVL_4 0
69 #elif NR_CPUS <= RCU_FANOUT_4 69 #elif NR_CPUS <= RCU_FANOUT_4
70 # define NUM_RCU_LVLS 4 70 # define NUM_RCU_LVLS 4
71 # define NUM_RCU_LVL_0 1 71 # define NUM_RCU_LVL_0 1
72 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) 72 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
73 # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) 73 # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
74 # define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) 74 # define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
75 # define NUM_RCU_LVL_4 (NR_CPUS) 75 # define NUM_RCU_LVL_4 (NR_CPUS)
76 #else 76 #else
77 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" 77 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
78 #endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */ 78 #endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
79 79
80 #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) 80 #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
81 #define NUM_RCU_NODES (RCU_SUM - NR_CPUS) 81 #define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
82 82
83 /* 83 /*
84 * Dynticks per-CPU state. 84 * Dynticks per-CPU state.
85 */ 85 */
86 struct rcu_dynticks { 86 struct rcu_dynticks {
87 long long dynticks_nesting; /* Track irq/process nesting level. */ 87 long long dynticks_nesting; /* Track irq/process nesting level. */
88 /* Process level is worth LLONG_MAX/2. */ 88 /* Process level is worth LLONG_MAX/2. */
89 int dynticks_nmi_nesting; /* Track NMI nesting level. */ 89 int dynticks_nmi_nesting; /* Track NMI nesting level. */
90 atomic_t dynticks; /* Even value for idle, else odd. */ 90 atomic_t dynticks; /* Even value for idle, else odd. */
91 }; 91 };
92 92
93 /* RCU's kthread states for tracing. */ 93 /* RCU's kthread states for tracing. */
94 #define RCU_KTHREAD_STOPPED 0 94 #define RCU_KTHREAD_STOPPED 0
95 #define RCU_KTHREAD_RUNNING 1 95 #define RCU_KTHREAD_RUNNING 1
96 #define RCU_KTHREAD_WAITING 2 96 #define RCU_KTHREAD_WAITING 2
97 #define RCU_KTHREAD_OFFCPU 3 97 #define RCU_KTHREAD_OFFCPU 3
98 #define RCU_KTHREAD_YIELDING 4 98 #define RCU_KTHREAD_YIELDING 4
99 #define RCU_KTHREAD_MAX 4 99 #define RCU_KTHREAD_MAX 4
100 100
101 /* 101 /*
102 * Definition for node within the RCU grace-period-detection hierarchy. 102 * Definition for node within the RCU grace-period-detection hierarchy.
103 */ 103 */
104 struct rcu_node { 104 struct rcu_node {
105 raw_spinlock_t lock; /* Root rcu_node's lock protects some */ 105 raw_spinlock_t lock; /* Root rcu_node's lock protects some */
106 /* rcu_state fields as well as following. */ 106 /* rcu_state fields as well as following. */
107 unsigned long gpnum; /* Current grace period for this node. */ 107 unsigned long gpnum; /* Current grace period for this node. */
108 /* This will either be equal to or one */ 108 /* This will either be equal to or one */
109 /* behind the root rcu_node's gpnum. */ 109 /* behind the root rcu_node's gpnum. */
110 unsigned long completed; /* Last GP completed for this node. */ 110 unsigned long completed; /* Last GP completed for this node. */
111 /* This will either be equal to or one */ 111 /* This will either be equal to or one */
112 /* behind the root rcu_node's gpnum. */ 112 /* behind the root rcu_node's gpnum. */
113 unsigned long qsmask; /* CPUs or groups that need to switch in */ 113 unsigned long qsmask; /* CPUs or groups that need to switch in */
114 /* order for current grace period to proceed.*/ 114 /* order for current grace period to proceed.*/
115 /* In leaf rcu_node, each bit corresponds to */ 115 /* In leaf rcu_node, each bit corresponds to */
116 /* an rcu_data structure, otherwise, each */ 116 /* an rcu_data structure, otherwise, each */
117 /* bit corresponds to a child rcu_node */ 117 /* bit corresponds to a child rcu_node */
118 /* structure. */ 118 /* structure. */
119 unsigned long expmask; /* Groups that have ->blkd_tasks */ 119 unsigned long expmask; /* Groups that have ->blkd_tasks */
120 /* elements that need to drain to allow the */ 120 /* elements that need to drain to allow the */
121 /* current expedited grace period to */ 121 /* current expedited grace period to */
122 /* complete (only for TREE_PREEMPT_RCU). */ 122 /* complete (only for TREE_PREEMPT_RCU). */
123 atomic_t wakemask; /* CPUs whose kthread needs to be awakened. */ 123 atomic_t wakemask; /* CPUs whose kthread needs to be awakened. */
124 /* Since this has meaning only for leaf */ 124 /* Since this has meaning only for leaf */
125 /* rcu_node structures, 32 bits suffices. */ 125 /* rcu_node structures, 32 bits suffices. */
126 unsigned long qsmaskinit; 126 unsigned long qsmaskinit;
127 /* Per-GP initial value for qsmask & expmask. */ 127 /* Per-GP initial value for qsmask & expmask. */
128 unsigned long grpmask; /* Mask to apply to parent qsmask. */ 128 unsigned long grpmask; /* Mask to apply to parent qsmask. */
129 /* Only one bit will be set in this mask. */ 129 /* Only one bit will be set in this mask. */
130 int grplo; /* lowest-numbered CPU or group here. */ 130 int grplo; /* lowest-numbered CPU or group here. */
131 int grphi; /* highest-numbered CPU or group here. */ 131 int grphi; /* highest-numbered CPU or group here. */
132 u8 grpnum; /* CPU/group number for next level up. */ 132 u8 grpnum; /* CPU/group number for next level up. */
133 u8 level; /* root is at level 0. */ 133 u8 level; /* root is at level 0. */
134 struct rcu_node *parent; 134 struct rcu_node *parent;
135 struct list_head blkd_tasks; 135 struct list_head blkd_tasks;
136 /* Tasks blocked in RCU read-side critical */ 136 /* Tasks blocked in RCU read-side critical */
137 /* section. Tasks are placed at the head */ 137 /* section. Tasks are placed at the head */
138 /* of this list and age towards the tail. */ 138 /* of this list and age towards the tail. */
139 struct list_head *gp_tasks; 139 struct list_head *gp_tasks;
140 /* Pointer to the first task blocking the */ 140 /* Pointer to the first task blocking the */
141 /* current grace period, or NULL if there */ 141 /* current grace period, or NULL if there */
142 /* is no such task. */ 142 /* is no such task. */
143 struct list_head *exp_tasks; 143 struct list_head *exp_tasks;
144 /* Pointer to the first task blocking the */ 144 /* Pointer to the first task blocking the */
145 /* current expedited grace period, or NULL */ 145 /* current expedited grace period, or NULL */
146 /* if there is no such task. If there */ 146 /* if there is no such task. If there */
147 /* is no current expedited grace period, */ 147 /* is no current expedited grace period, */
148 /* then there can cannot be any such task. */ 148 /* then there can cannot be any such task. */
149 #ifdef CONFIG_RCU_BOOST 149 #ifdef CONFIG_RCU_BOOST
150 struct list_head *boost_tasks; 150 struct list_head *boost_tasks;
151 /* Pointer to first task that needs to be */ 151 /* Pointer to first task that needs to be */
152 /* priority boosted, or NULL if no priority */ 152 /* priority boosted, or NULL if no priority */
153 /* boosting is needed for this rcu_node */ 153 /* boosting is needed for this rcu_node */
154 /* structure. If there are no tasks */ 154 /* structure. If there are no tasks */
155 /* queued on this rcu_node structure that */ 155 /* queued on this rcu_node structure that */
156 /* are blocking the current grace period, */ 156 /* are blocking the current grace period, */
157 /* there can be no such task. */ 157 /* there can be no such task. */
158 unsigned long boost_time; 158 unsigned long boost_time;
159 /* When to start boosting (jiffies). */ 159 /* When to start boosting (jiffies). */
160 struct task_struct *boost_kthread_task; 160 struct task_struct *boost_kthread_task;
161 /* kthread that takes care of priority */ 161 /* kthread that takes care of priority */
162 /* boosting for this rcu_node structure. */ 162 /* boosting for this rcu_node structure. */
163 unsigned int boost_kthread_status; 163 unsigned int boost_kthread_status;
164 /* State of boost_kthread_task for tracing. */ 164 /* State of boost_kthread_task for tracing. */
165 unsigned long n_tasks_boosted; 165 unsigned long n_tasks_boosted;
166 /* Total number of tasks boosted. */ 166 /* Total number of tasks boosted. */
167 unsigned long n_exp_boosts; 167 unsigned long n_exp_boosts;
168 /* Number of tasks boosted for expedited GP. */ 168 /* Number of tasks boosted for expedited GP. */
169 unsigned long n_normal_boosts; 169 unsigned long n_normal_boosts;
170 /* Number of tasks boosted for normal GP. */ 170 /* Number of tasks boosted for normal GP. */
171 unsigned long n_balk_blkd_tasks; 171 unsigned long n_balk_blkd_tasks;
172 /* Refused to boost: no blocked tasks. */ 172 /* Refused to boost: no blocked tasks. */
173 unsigned long n_balk_exp_gp_tasks; 173 unsigned long n_balk_exp_gp_tasks;
174 /* Refused to boost: nothing blocking GP. */ 174 /* Refused to boost: nothing blocking GP. */
175 unsigned long n_balk_boost_tasks; 175 unsigned long n_balk_boost_tasks;
176 /* Refused to boost: already boosting. */ 176 /* Refused to boost: already boosting. */
177 unsigned long n_balk_notblocked; 177 unsigned long n_balk_notblocked;
178 /* Refused to boost: RCU RS CS still running. */ 178 /* Refused to boost: RCU RS CS still running. */
179 unsigned long n_balk_notyet; 179 unsigned long n_balk_notyet;
180 /* Refused to boost: not yet time. */ 180 /* Refused to boost: not yet time. */
181 unsigned long n_balk_nos; 181 unsigned long n_balk_nos;
182 /* Refused to boost: not sure why, though. */ 182 /* Refused to boost: not sure why, though. */
183 /* This can happen due to race conditions. */ 183 /* This can happen due to race conditions. */
184 #endif /* #ifdef CONFIG_RCU_BOOST */ 184 #endif /* #ifdef CONFIG_RCU_BOOST */
185 struct task_struct *node_kthread_task; 185 struct task_struct *node_kthread_task;
186 /* kthread that takes care of this rcu_node */ 186 /* kthread that takes care of this rcu_node */
187 /* structure, for example, awakening the */ 187 /* structure, for example, awakening the */
188 /* per-CPU kthreads as needed. */ 188 /* per-CPU kthreads as needed. */
189 unsigned int node_kthread_status; 189 unsigned int node_kthread_status;
190 /* State of node_kthread_task for tracing. */ 190 /* State of node_kthread_task for tracing. */
191 } ____cacheline_internodealigned_in_smp; 191 } ____cacheline_internodealigned_in_smp;
192 192
193 /* 193 /*
194 * Do a full breadth-first scan of the rcu_node structures for the 194 * Do a full breadth-first scan of the rcu_node structures for the
195 * specified rcu_state structure. 195 * specified rcu_state structure.
196 */ 196 */
197 #define rcu_for_each_node_breadth_first(rsp, rnp) \ 197 #define rcu_for_each_node_breadth_first(rsp, rnp) \
198 for ((rnp) = &(rsp)->node[0]; \ 198 for ((rnp) = &(rsp)->node[0]; \
199 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) 199 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
200 200
201 /* 201 /*
202 * Do a breadth-first scan of the non-leaf rcu_node structures for the 202 * Do a breadth-first scan of the non-leaf rcu_node structures for the
203 * specified rcu_state structure. Note that if there is a singleton 203 * specified rcu_state structure. Note that if there is a singleton
204 * rcu_node tree with but one rcu_node structure, this loop is a no-op. 204 * rcu_node tree with but one rcu_node structure, this loop is a no-op.
205 */ 205 */
206 #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ 206 #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
207 for ((rnp) = &(rsp)->node[0]; \ 207 for ((rnp) = &(rsp)->node[0]; \
208 (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++) 208 (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++)
209 209
210 /* 210 /*
211 * Scan the leaves of the rcu_node hierarchy for the specified rcu_state 211 * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
212 * structure. Note that if there is a singleton rcu_node tree with but 212 * structure. Note that if there is a singleton rcu_node tree with but
213 * one rcu_node structure, this loop -will- visit the rcu_node structure. 213 * one rcu_node structure, this loop -will- visit the rcu_node structure.
214 * It is still a leaf node, even if it is also the root node. 214 * It is still a leaf node, even if it is also the root node.
215 */ 215 */
216 #define rcu_for_each_leaf_node(rsp, rnp) \ 216 #define rcu_for_each_leaf_node(rsp, rnp) \
217 for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ 217 for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \
218 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) 218 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
219 219
220 /* Index values for nxttail array in struct rcu_data. */ 220 /* Index values for nxttail array in struct rcu_data. */
221 #define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ 221 #define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */
222 #define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */ 222 #define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */
223 #define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */ 223 #define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */
224 #define RCU_NEXT_TAIL 3 224 #define RCU_NEXT_TAIL 3
225 #define RCU_NEXT_SIZE 4 225 #define RCU_NEXT_SIZE 4
226 226
227 /* Per-CPU data for read-copy update. */ 227 /* Per-CPU data for read-copy update. */
228 struct rcu_data { 228 struct rcu_data {
229 /* 1) quiescent-state and grace-period handling : */ 229 /* 1) quiescent-state and grace-period handling : */
230 unsigned long completed; /* Track rsp->completed gp number */ 230 unsigned long completed; /* Track rsp->completed gp number */
231 /* in order to detect GP end. */ 231 /* in order to detect GP end. */
232 unsigned long gpnum; /* Highest gp number that this CPU */ 232 unsigned long gpnum; /* Highest gp number that this CPU */
233 /* is aware of having started. */ 233 /* is aware of having started. */
234 unsigned long passed_quiesce_gpnum; 234 unsigned long passed_quiesce_gpnum;
235 /* gpnum at time of quiescent state. */ 235 /* gpnum at time of quiescent state. */
236 bool passed_quiesce; /* User-mode/idle loop etc. */ 236 bool passed_quiesce; /* User-mode/idle loop etc. */
237 bool qs_pending; /* Core waits for quiesc state. */ 237 bool qs_pending; /* Core waits for quiesc state. */
238 bool beenonline; /* CPU online at least once. */ 238 bool beenonline; /* CPU online at least once. */
239 bool preemptible; /* Preemptible RCU? */ 239 bool preemptible; /* Preemptible RCU? */
240 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ 240 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
241 unsigned long grpmask; /* Mask to apply to leaf qsmask. */ 241 unsigned long grpmask; /* Mask to apply to leaf qsmask. */
242 242
243 /* 2) batch handling */ 243 /* 2) batch handling */
244 /* 244 /*
245 * If nxtlist is not NULL, it is partitioned as follows. 245 * If nxtlist is not NULL, it is partitioned as follows.
246 * Any of the partitions might be empty, in which case the 246 * Any of the partitions might be empty, in which case the
247 * pointer to that partition will be equal to the pointer for 247 * pointer to that partition will be equal to the pointer for
248 * the following partition. When the list is empty, all of 248 * the following partition. When the list is empty, all of
249 * the nxttail elements point to the ->nxtlist pointer itself, 249 * the nxttail elements point to the ->nxtlist pointer itself,
250 * which in that case is NULL. 250 * which in that case is NULL.
251 * 251 *
252 * [nxtlist, *nxttail[RCU_DONE_TAIL]): 252 * [nxtlist, *nxttail[RCU_DONE_TAIL]):
253 * Entries that batch # <= ->completed 253 * Entries that batch # <= ->completed
254 * The grace period for these entries has completed, and 254 * The grace period for these entries has completed, and
255 * the other grace-period-completed entries may be moved 255 * the other grace-period-completed entries may be moved
256 * here temporarily in rcu_process_callbacks(). 256 * here temporarily in rcu_process_callbacks().
257 * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]): 257 * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
258 * Entries that batch # <= ->completed - 1: waiting for current GP 258 * Entries that batch # <= ->completed - 1: waiting for current GP
259 * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]): 259 * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
260 * Entries known to have arrived before current GP ended 260 * Entries known to have arrived before current GP ended
261 * [*nxttail[RCU_NEXT_READY_TAIL], *nxttail[RCU_NEXT_TAIL]): 261 * [*nxttail[RCU_NEXT_READY_TAIL], *nxttail[RCU_NEXT_TAIL]):
262 * Entries that might have arrived after current GP ended 262 * Entries that might have arrived after current GP ended
263 * Note that the value of *nxttail[RCU_NEXT_TAIL] will 263 * Note that the value of *nxttail[RCU_NEXT_TAIL] will
264 * always be NULL, as this is the end of the list. 264 * always be NULL, as this is the end of the list.
265 */ 265 */
266 struct rcu_head *nxtlist; 266 struct rcu_head *nxtlist;
267 struct rcu_head **nxttail[RCU_NEXT_SIZE]; 267 struct rcu_head **nxttail[RCU_NEXT_SIZE];
268 long qlen; /* # of queued callbacks */ 268 long qlen; /* # of queued callbacks */
269 long qlen_last_fqs_check; 269 long qlen_last_fqs_check;
270 /* qlen at last check for QS forcing */ 270 /* qlen at last check for QS forcing */
271 unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ 271 unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */
272 unsigned long n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */ 272 unsigned long n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */
273 unsigned long n_cbs_adopted; /* RCU cbs adopted from dying CPU */ 273 unsigned long n_cbs_adopted; /* RCU cbs adopted from dying CPU */
274 unsigned long n_force_qs_snap; 274 unsigned long n_force_qs_snap;
275 /* did other CPU force QS recently? */ 275 /* did other CPU force QS recently? */
276 long blimit; /* Upper limit on a processed batch */ 276 long blimit; /* Upper limit on a processed batch */
277 277
278 /* 3) dynticks interface. */ 278 /* 3) dynticks interface. */
279 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ 279 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */
280 int dynticks_snap; /* Per-GP tracking for dynticks. */ 280 int dynticks_snap; /* Per-GP tracking for dynticks. */
281 281
282 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ 282 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */
283 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ 283 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */
284 unsigned long offline_fqs; /* Kicked due to being offline. */ 284 unsigned long offline_fqs; /* Kicked due to being offline. */
285 unsigned long resched_ipi; /* Sent a resched IPI. */ 285 unsigned long resched_ipi; /* Sent a resched IPI. */
286 286
287 /* 5) __rcu_pending() statistics. */ 287 /* 5) __rcu_pending() statistics. */
288 unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */ 288 unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */
289 unsigned long n_rp_qs_pending; 289 unsigned long n_rp_qs_pending;
290 unsigned long n_rp_report_qs; 290 unsigned long n_rp_report_qs;
291 unsigned long n_rp_cb_ready; 291 unsigned long n_rp_cb_ready;
292 unsigned long n_rp_cpu_needs_gp; 292 unsigned long n_rp_cpu_needs_gp;
293 unsigned long n_rp_gp_completed; 293 unsigned long n_rp_gp_completed;
294 unsigned long n_rp_gp_started; 294 unsigned long n_rp_gp_started;
295 unsigned long n_rp_need_fqs; 295 unsigned long n_rp_need_fqs;
296 unsigned long n_rp_need_nothing; 296 unsigned long n_rp_need_nothing;
297 297
298 int cpu; 298 int cpu;
299 struct rcu_state *rsp; 299 struct rcu_state *rsp;
300 }; 300 };
301 301
302 /* Values for fqs_state field in struct rcu_state. */ 302 /* Values for fqs_state field in struct rcu_state. */
303 #define RCU_GP_IDLE 0 /* No grace period in progress. */ 303 #define RCU_GP_IDLE 0 /* No grace period in progress. */
304 #define RCU_GP_INIT 1 /* Grace period being initialized. */ 304 #define RCU_GP_INIT 1 /* Grace period being initialized. */
305 #define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */ 305 #define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */
306 #define RCU_FORCE_QS 3 /* Need to force quiescent state. */ 306 #define RCU_FORCE_QS 3 /* Need to force quiescent state. */
307 #define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK 307 #define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK
308 308
309 #define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ 309 #define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
310 310
311 #ifdef CONFIG_PROVE_RCU 311 #ifdef CONFIG_PROVE_RCU
312 #define RCU_STALL_DELAY_DELTA (5 * HZ) 312 #define RCU_STALL_DELAY_DELTA (5 * HZ)
313 #else 313 #else
314 #define RCU_STALL_DELAY_DELTA 0 314 #define RCU_STALL_DELAY_DELTA 0
315 #endif 315 #endif
316 316
317 #define RCU_SECONDS_TILL_STALL_CHECK (CONFIG_RCU_CPU_STALL_TIMEOUT * HZ + \ 317 #define RCU_SECONDS_TILL_STALL_CHECK (CONFIG_RCU_CPU_STALL_TIMEOUT * HZ + \
318 RCU_STALL_DELAY_DELTA) 318 RCU_STALL_DELAY_DELTA)
319 /* for rsp->jiffies_stall */ 319 /* for rsp->jiffies_stall */
320 #define RCU_SECONDS_TILL_STALL_RECHECK (3 * RCU_SECONDS_TILL_STALL_CHECK + 30) 320 #define RCU_SECONDS_TILL_STALL_RECHECK (3 * RCU_SECONDS_TILL_STALL_CHECK + 30)
321 /* for rsp->jiffies_stall */ 321 /* for rsp->jiffies_stall */
322 #define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ 322 #define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
323 /* to take at least one */ 323 /* to take at least one */
324 /* scheduling clock irq */ 324 /* scheduling clock irq */
325 /* before ratting on them. */ 325 /* before ratting on them. */
326 326
327 #define rcu_wait(cond) \ 327 #define rcu_wait(cond) \
328 do { \ 328 do { \
329 for (;;) { \ 329 for (;;) { \
330 set_current_state(TASK_INTERRUPTIBLE); \ 330 set_current_state(TASK_INTERRUPTIBLE); \
331 if (cond) \ 331 if (cond) \
332 break; \ 332 break; \
333 schedule(); \ 333 schedule(); \
334 } \ 334 } \
335 __set_current_state(TASK_RUNNING); \ 335 __set_current_state(TASK_RUNNING); \
336 } while (0) 336 } while (0)
337 337
338 /* 338 /*
339 * RCU global state, including node hierarchy. This hierarchy is 339 * RCU global state, including node hierarchy. This hierarchy is
340 * represented in "heap" form in a dense array. The root (first level) 340 * represented in "heap" form in a dense array. The root (first level)
341 * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second 341 * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second
342 * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]), 342 * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]),
343 * and the third level in ->node[m+1] and following (->node[m+1] referenced 343 * and the third level in ->node[m+1] and following (->node[m+1] referenced
344 * by ->level[2]). The number of levels is determined by the number of 344 * by ->level[2]). The number of levels is determined by the number of
345 * CPUs and by CONFIG_RCU_FANOUT. Small systems will have a "hierarchy" 345 * CPUs and by CONFIG_RCU_FANOUT. Small systems will have a "hierarchy"
346 * consisting of a single rcu_node. 346 * consisting of a single rcu_node.
347 */ 347 */
348 struct rcu_state { 348 struct rcu_state {
349 struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ 349 struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */
350 struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ 350 struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */
351 u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ 351 u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */
352 u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ 352 u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */
353 struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ 353 struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
354 354
355 /* The following fields are guarded by the root rcu_node's lock. */ 355 /* The following fields are guarded by the root rcu_node's lock. */
356 356
357 u8 fqs_state ____cacheline_internodealigned_in_smp; 357 u8 fqs_state ____cacheline_internodealigned_in_smp;
358 /* Force QS state. */ 358 /* Force QS state. */
359 u8 fqs_active; /* force_quiescent_state() */ 359 u8 fqs_active; /* force_quiescent_state() */
360 /* is running. */ 360 /* is running. */
361 u8 fqs_need_gp; /* A CPU was prevented from */ 361 u8 fqs_need_gp; /* A CPU was prevented from */
362 /* starting a new grace */ 362 /* starting a new grace */
363 /* period because */ 363 /* period because */
364 /* force_quiescent_state() */ 364 /* force_quiescent_state() */
365 /* was running. */ 365 /* was running. */
366 u8 boost; /* Subject to priority boost. */ 366 u8 boost; /* Subject to priority boost. */
367 unsigned long gpnum; /* Current gp number. */ 367 unsigned long gpnum; /* Current gp number. */
368 unsigned long completed; /* # of last completed gp. */ 368 unsigned long completed; /* # of last completed gp. */
369 369
370 /* End of fields guarded by root rcu_node's lock. */ 370 /* End of fields guarded by root rcu_node's lock. */
371 371
372 raw_spinlock_t onofflock; /* exclude on/offline and */ 372 raw_spinlock_t onofflock; /* exclude on/offline and */
373 /* starting new GP. */ 373 /* starting new GP. */
374 raw_spinlock_t fqslock; /* Only one task forcing */ 374 raw_spinlock_t fqslock; /* Only one task forcing */
375 /* quiescent states. */ 375 /* quiescent states. */
376 unsigned long jiffies_force_qs; /* Time at which to invoke */ 376 unsigned long jiffies_force_qs; /* Time at which to invoke */
377 /* force_quiescent_state(). */ 377 /* force_quiescent_state(). */
378 unsigned long n_force_qs; /* Number of calls to */ 378 unsigned long n_force_qs; /* Number of calls to */
379 /* force_quiescent_state(). */ 379 /* force_quiescent_state(). */
380 unsigned long n_force_qs_lh; /* ~Number of calls leaving */ 380 unsigned long n_force_qs_lh; /* ~Number of calls leaving */
381 /* due to lock unavailable. */ 381 /* due to lock unavailable. */
382 unsigned long n_force_qs_ngp; /* Number of calls leaving */ 382 unsigned long n_force_qs_ngp; /* Number of calls leaving */
383 /* due to no GP active. */ 383 /* due to no GP active. */
384 unsigned long gp_start; /* Time at which GP started, */ 384 unsigned long gp_start; /* Time at which GP started, */
385 /* but in jiffies. */ 385 /* but in jiffies. */
386 unsigned long jiffies_stall; /* Time at which to check */ 386 unsigned long jiffies_stall; /* Time at which to check */
387 /* for CPU stalls. */ 387 /* for CPU stalls. */
388 unsigned long gp_max; /* Maximum GP duration in */ 388 unsigned long gp_max; /* Maximum GP duration in */
389 /* jiffies. */ 389 /* jiffies. */
390 char *name; /* Name of structure. */ 390 char *name; /* Name of structure. */
391 }; 391 };
392 392
393 /* Return values for rcu_preempt_offline_tasks(). */ 393 /* Return values for rcu_preempt_offline_tasks(). */
394 394
395 #define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */ 395 #define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */
396 /* GP were moved to root. */ 396 /* GP were moved to root. */
397 #define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */ 397 #define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */
398 /* GP were moved to root. */ 398 /* GP were moved to root. */
399 399
400 /* 400 /*
401 * RCU implementation internal declarations: 401 * RCU implementation internal declarations:
402 */ 402 */
403 extern struct rcu_state rcu_sched_state; 403 extern struct rcu_state rcu_sched_state;
404 DECLARE_PER_CPU(struct rcu_data, rcu_sched_data); 404 DECLARE_PER_CPU(struct rcu_data, rcu_sched_data);
405 405
406 extern struct rcu_state rcu_bh_state; 406 extern struct rcu_state rcu_bh_state;
407 DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); 407 DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
408 408
409 #ifdef CONFIG_TREE_PREEMPT_RCU 409 #ifdef CONFIG_TREE_PREEMPT_RCU
410 extern struct rcu_state rcu_preempt_state; 410 extern struct rcu_state rcu_preempt_state;
411 DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); 411 DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
412 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 412 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
413 413
414 #ifdef CONFIG_RCU_BOOST 414 #ifdef CONFIG_RCU_BOOST
415 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); 415 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
416 DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu); 416 DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
417 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); 417 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
418 DECLARE_PER_CPU(char, rcu_cpu_has_work); 418 DECLARE_PER_CPU(char, rcu_cpu_has_work);
419 #endif /* #ifdef CONFIG_RCU_BOOST */ 419 #endif /* #ifdef CONFIG_RCU_BOOST */
420 420
421 #ifndef RCU_TREE_NONCORE 421 #ifndef RCU_TREE_NONCORE
422 422
423 /* Forward declarations for rcutree_plugin.h */ 423 /* Forward declarations for rcutree_plugin.h */
424 static void rcu_bootup_announce(void); 424 static void rcu_bootup_announce(void);
425 long rcu_batches_completed(void); 425 long rcu_batches_completed(void);
426 static void rcu_preempt_note_context_switch(int cpu); 426 static void rcu_preempt_note_context_switch(int cpu);
427 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); 427 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
428 #ifdef CONFIG_HOTPLUG_CPU 428 #ifdef CONFIG_HOTPLUG_CPU
429 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, 429 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
430 unsigned long flags); 430 unsigned long flags);
431 static void rcu_stop_cpu_kthread(int cpu); 431 static void rcu_stop_cpu_kthread(int cpu);
432 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 432 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
433 static void rcu_print_detail_task_stall(struct rcu_state *rsp); 433 static void rcu_print_detail_task_stall(struct rcu_state *rsp);
434 static int rcu_print_task_stall(struct rcu_node *rnp); 434 static int rcu_print_task_stall(struct rcu_node *rnp);
435 static void rcu_preempt_stall_reset(void); 435 static void rcu_preempt_stall_reset(void);
436 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); 436 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
437 #ifdef CONFIG_HOTPLUG_CPU 437 #ifdef CONFIG_HOTPLUG_CPU
438 static int rcu_preempt_offline_tasks(struct rcu_state *rsp, 438 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
439 struct rcu_node *rnp, 439 struct rcu_node *rnp,
440 struct rcu_data *rdp); 440 struct rcu_data *rdp);
441 static void rcu_preempt_offline_cpu(int cpu); 441 static void rcu_preempt_offline_cpu(int cpu);
442 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 442 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
443 static void rcu_preempt_check_callbacks(int cpu); 443 static void rcu_preempt_check_callbacks(int cpu);
444 static void rcu_preempt_process_callbacks(void); 444 static void rcu_preempt_process_callbacks(void);
445 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); 445 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
446 #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) 446 #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
447 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, 447 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
448 bool wake); 448 bool wake);
449 #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ 449 #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
450 static int rcu_preempt_pending(int cpu); 450 static int rcu_preempt_pending(int cpu);
451 static int rcu_preempt_needs_cpu(int cpu); 451 static int rcu_preempt_needs_cpu(int cpu);
452 static void __cpuinit rcu_preempt_init_percpu_data(int cpu); 452 static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
453 static void rcu_preempt_send_cbs_to_online(void); 453 static void rcu_preempt_send_cbs_to_online(void);
454 static void __init __rcu_init_preempt(void); 454 static void __init __rcu_init_preempt(void);
455 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); 455 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
456 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); 456 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
457 static void invoke_rcu_callbacks_kthread(void); 457 static void invoke_rcu_callbacks_kthread(void);
458 #ifdef CONFIG_RCU_BOOST 458 #ifdef CONFIG_RCU_BOOST
459 static void rcu_preempt_do_callbacks(void); 459 static void rcu_preempt_do_callbacks(void);
460 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, 460 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
461 cpumask_var_t cm); 461 cpumask_var_t cm);
462 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, 462 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
463 struct rcu_node *rnp, 463 struct rcu_node *rnp,
464 int rnp_index); 464 int rnp_index);
465 static void invoke_rcu_node_kthread(struct rcu_node *rnp); 465 static void invoke_rcu_node_kthread(struct rcu_node *rnp);
466 static void rcu_yield(void (*f)(unsigned long), unsigned long arg); 466 static void rcu_yield(void (*f)(unsigned long), unsigned long arg);
467 #endif /* #ifdef CONFIG_RCU_BOOST */ 467 #endif /* #ifdef CONFIG_RCU_BOOST */
468 static void rcu_cpu_kthread_setrt(int cpu, int to_rt); 468 static void rcu_cpu_kthread_setrt(int cpu, int to_rt);
469 static void __cpuinit rcu_prepare_kthreads(int cpu); 469 static void __cpuinit rcu_prepare_kthreads(int cpu);
470 static void rcu_prepare_for_idle_init(int cpu);
471 static void rcu_cleanup_after_idle(int cpu);
470 static void rcu_prepare_for_idle(int cpu); 472 static void rcu_prepare_for_idle(int cpu);
471 473
472 #endif /* #ifndef RCU_TREE_NONCORE */ 474 #endif /* #ifndef RCU_TREE_NONCORE */
473 475
kernel/rcutree_plugin.h
1 /* 1 /*
2 * Read-Copy Update mechanism for mutual exclusion (tree-based version) 2 * Read-Copy Update mechanism for mutual exclusion (tree-based version)
3 * Internal non-public definitions that provide either classic 3 * Internal non-public definitions that provide either classic
4 * or preemptible semantics. 4 * or preemptible semantics.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or 8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version. 9 * (at your option) any later version.
10 * 10 *
11 * This program is distributed in the hope that it will be useful, 11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details. 14 * GNU General Public License for more details.
15 * 15 *
16 * You should have received a copy of the GNU General Public License 16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software 17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 * 19 *
20 * Copyright Red Hat, 2009 20 * Copyright Red Hat, 2009
21 * Copyright IBM Corporation, 2009 21 * Copyright IBM Corporation, 2009
22 * 22 *
23 * Author: Ingo Molnar <mingo@elte.hu> 23 * Author: Ingo Molnar <mingo@elte.hu>
24 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> 24 * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
25 */ 25 */
26 26
27 #include <linux/delay.h> 27 #include <linux/delay.h>
28 #include <linux/stop_machine.h> 28 #include <linux/stop_machine.h>
29 29
30 #define RCU_KTHREAD_PRIO 1 30 #define RCU_KTHREAD_PRIO 1
31 31
32 #ifdef CONFIG_RCU_BOOST 32 #ifdef CONFIG_RCU_BOOST
33 #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO 33 #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
34 #else 34 #else
35 #define RCU_BOOST_PRIO RCU_KTHREAD_PRIO 35 #define RCU_BOOST_PRIO RCU_KTHREAD_PRIO
36 #endif 36 #endif
37 37
38 /* 38 /*
39 * Check the RCU kernel configuration parameters and print informative 39 * Check the RCU kernel configuration parameters and print informative
40 * messages about anything out of the ordinary. If you like #ifdef, you 40 * messages about anything out of the ordinary. If you like #ifdef, you
41 * will love this function. 41 * will love this function.
42 */ 42 */
43 static void __init rcu_bootup_announce_oddness(void) 43 static void __init rcu_bootup_announce_oddness(void)
44 { 44 {
45 #ifdef CONFIG_RCU_TRACE 45 #ifdef CONFIG_RCU_TRACE
46 printk(KERN_INFO "\tRCU debugfs-based tracing is enabled.\n"); 46 printk(KERN_INFO "\tRCU debugfs-based tracing is enabled.\n");
47 #endif 47 #endif
48 #if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32) 48 #if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)
49 printk(KERN_INFO "\tCONFIG_RCU_FANOUT set to non-default value of %d\n", 49 printk(KERN_INFO "\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
50 CONFIG_RCU_FANOUT); 50 CONFIG_RCU_FANOUT);
51 #endif 51 #endif
52 #ifdef CONFIG_RCU_FANOUT_EXACT 52 #ifdef CONFIG_RCU_FANOUT_EXACT
53 printk(KERN_INFO "\tHierarchical RCU autobalancing is disabled.\n"); 53 printk(KERN_INFO "\tHierarchical RCU autobalancing is disabled.\n");
54 #endif 54 #endif
55 #ifdef CONFIG_RCU_FAST_NO_HZ 55 #ifdef CONFIG_RCU_FAST_NO_HZ
56 printk(KERN_INFO 56 printk(KERN_INFO
57 "\tRCU dyntick-idle grace-period acceleration is enabled.\n"); 57 "\tRCU dyntick-idle grace-period acceleration is enabled.\n");
58 #endif 58 #endif
59 #ifdef CONFIG_PROVE_RCU 59 #ifdef CONFIG_PROVE_RCU
60 printk(KERN_INFO "\tRCU lockdep checking is enabled.\n"); 60 printk(KERN_INFO "\tRCU lockdep checking is enabled.\n");
61 #endif 61 #endif
62 #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE 62 #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE
63 printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); 63 printk(KERN_INFO "\tRCU torture testing starts during boot.\n");
64 #endif 64 #endif
65 #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) 65 #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE)
66 printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); 66 printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n");
67 #endif 67 #endif
68 #if NUM_RCU_LVL_4 != 0 68 #if NUM_RCU_LVL_4 != 0
69 printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); 69 printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n");
70 #endif 70 #endif
71 } 71 }
72 72
73 #ifdef CONFIG_TREE_PREEMPT_RCU 73 #ifdef CONFIG_TREE_PREEMPT_RCU
74 74
75 struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt); 75 struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt);
76 DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); 76 DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
77 static struct rcu_state *rcu_state = &rcu_preempt_state; 77 static struct rcu_state *rcu_state = &rcu_preempt_state;
78 78
79 static void rcu_read_unlock_special(struct task_struct *t); 79 static void rcu_read_unlock_special(struct task_struct *t);
80 static int rcu_preempted_readers_exp(struct rcu_node *rnp); 80 static int rcu_preempted_readers_exp(struct rcu_node *rnp);
81 81
82 /* 82 /*
83 * Tell them what RCU they are running. 83 * Tell them what RCU they are running.
84 */ 84 */
85 static void __init rcu_bootup_announce(void) 85 static void __init rcu_bootup_announce(void)
86 { 86 {
87 printk(KERN_INFO "Preemptible hierarchical RCU implementation.\n"); 87 printk(KERN_INFO "Preemptible hierarchical RCU implementation.\n");
88 rcu_bootup_announce_oddness(); 88 rcu_bootup_announce_oddness();
89 } 89 }
90 90
91 /* 91 /*
92 * Return the number of RCU-preempt batches processed thus far 92 * Return the number of RCU-preempt batches processed thus far
93 * for debug and statistics. 93 * for debug and statistics.
94 */ 94 */
95 long rcu_batches_completed_preempt(void) 95 long rcu_batches_completed_preempt(void)
96 { 96 {
97 return rcu_preempt_state.completed; 97 return rcu_preempt_state.completed;
98 } 98 }
99 EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt); 99 EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
100 100
101 /* 101 /*
102 * Return the number of RCU batches processed thus far for debug & stats. 102 * Return the number of RCU batches processed thus far for debug & stats.
103 */ 103 */
104 long rcu_batches_completed(void) 104 long rcu_batches_completed(void)
105 { 105 {
106 return rcu_batches_completed_preempt(); 106 return rcu_batches_completed_preempt();
107 } 107 }
108 EXPORT_SYMBOL_GPL(rcu_batches_completed); 108 EXPORT_SYMBOL_GPL(rcu_batches_completed);
109 109
110 /* 110 /*
111 * Force a quiescent state for preemptible RCU. 111 * Force a quiescent state for preemptible RCU.
112 */ 112 */
113 void rcu_force_quiescent_state(void) 113 void rcu_force_quiescent_state(void)
114 { 114 {
115 force_quiescent_state(&rcu_preempt_state, 0); 115 force_quiescent_state(&rcu_preempt_state, 0);
116 } 116 }
117 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 117 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
118 118
119 /* 119 /*
120 * Record a preemptible-RCU quiescent state for the specified CPU. Note 120 * Record a preemptible-RCU quiescent state for the specified CPU. Note
121 * that this just means that the task currently running on the CPU is 121 * that this just means that the task currently running on the CPU is
122 * not in a quiescent state. There might be any number of tasks blocked 122 * not in a quiescent state. There might be any number of tasks blocked
123 * while in an RCU read-side critical section. 123 * while in an RCU read-side critical section.
124 * 124 *
125 * Unlike the other rcu_*_qs() functions, callers to this function 125 * Unlike the other rcu_*_qs() functions, callers to this function
126 * must disable irqs in order to protect the assignment to 126 * must disable irqs in order to protect the assignment to
127 * ->rcu_read_unlock_special. 127 * ->rcu_read_unlock_special.
128 */ 128 */
129 static void rcu_preempt_qs(int cpu) 129 static void rcu_preempt_qs(int cpu)
130 { 130 {
131 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 131 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
132 132
133 rdp->passed_quiesce_gpnum = rdp->gpnum; 133 rdp->passed_quiesce_gpnum = rdp->gpnum;
134 barrier(); 134 barrier();
135 if (rdp->passed_quiesce == 0) 135 if (rdp->passed_quiesce == 0)
136 trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs"); 136 trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs");
137 rdp->passed_quiesce = 1; 137 rdp->passed_quiesce = 1;
138 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 138 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
139 } 139 }
140 140
141 /* 141 /*
142 * We have entered the scheduler, and the current task might soon be 142 * We have entered the scheduler, and the current task might soon be
143 * context-switched away from. If this task is in an RCU read-side 143 * context-switched away from. If this task is in an RCU read-side
144 * critical section, we will no longer be able to rely on the CPU to 144 * critical section, we will no longer be able to rely on the CPU to
145 * record that fact, so we enqueue the task on the blkd_tasks list. 145 * record that fact, so we enqueue the task on the blkd_tasks list.
146 * The task will dequeue itself when it exits the outermost enclosing 146 * The task will dequeue itself when it exits the outermost enclosing
147 * RCU read-side critical section. Therefore, the current grace period 147 * RCU read-side critical section. Therefore, the current grace period
148 * cannot be permitted to complete until the blkd_tasks list entries 148 * cannot be permitted to complete until the blkd_tasks list entries
149 * predating the current grace period drain, in other words, until 149 * predating the current grace period drain, in other words, until
150 * rnp->gp_tasks becomes NULL. 150 * rnp->gp_tasks becomes NULL.
151 * 151 *
152 * Caller must disable preemption. 152 * Caller must disable preemption.
153 */ 153 */
154 static void rcu_preempt_note_context_switch(int cpu) 154 static void rcu_preempt_note_context_switch(int cpu)
155 { 155 {
156 struct task_struct *t = current; 156 struct task_struct *t = current;
157 unsigned long flags; 157 unsigned long flags;
158 struct rcu_data *rdp; 158 struct rcu_data *rdp;
159 struct rcu_node *rnp; 159 struct rcu_node *rnp;
160 160
161 if (t->rcu_read_lock_nesting > 0 && 161 if (t->rcu_read_lock_nesting > 0 &&
162 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { 162 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
163 163
164 /* Possibly blocking in an RCU read-side critical section. */ 164 /* Possibly blocking in an RCU read-side critical section. */
165 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); 165 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
166 rnp = rdp->mynode; 166 rnp = rdp->mynode;
167 raw_spin_lock_irqsave(&rnp->lock, flags); 167 raw_spin_lock_irqsave(&rnp->lock, flags);
168 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 168 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
169 t->rcu_blocked_node = rnp; 169 t->rcu_blocked_node = rnp;
170 170
171 /* 171 /*
172 * If this CPU has already checked in, then this task 172 * If this CPU has already checked in, then this task
173 * will hold up the next grace period rather than the 173 * will hold up the next grace period rather than the
174 * current grace period. Queue the task accordingly. 174 * current grace period. Queue the task accordingly.
175 * If the task is queued for the current grace period 175 * If the task is queued for the current grace period
176 * (i.e., this CPU has not yet passed through a quiescent 176 * (i.e., this CPU has not yet passed through a quiescent
177 * state for the current grace period), then as long 177 * state for the current grace period), then as long
178 * as that task remains queued, the current grace period 178 * as that task remains queued, the current grace period
179 * cannot end. Note that there is some uncertainty as 179 * cannot end. Note that there is some uncertainty as
180 * to exactly when the current grace period started. 180 * to exactly when the current grace period started.
181 * We take a conservative approach, which can result 181 * We take a conservative approach, which can result
182 * in unnecessarily waiting on tasks that started very 182 * in unnecessarily waiting on tasks that started very
183 * slightly after the current grace period began. C'est 183 * slightly after the current grace period began. C'est
184 * la vie!!! 184 * la vie!!!
185 * 185 *
186 * But first, note that the current CPU must still be 186 * But first, note that the current CPU must still be
187 * on line! 187 * on line!
188 */ 188 */
189 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); 189 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
190 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); 190 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
191 if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) { 191 if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
192 list_add(&t->rcu_node_entry, rnp->gp_tasks->prev); 192 list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
193 rnp->gp_tasks = &t->rcu_node_entry; 193 rnp->gp_tasks = &t->rcu_node_entry;
194 #ifdef CONFIG_RCU_BOOST 194 #ifdef CONFIG_RCU_BOOST
195 if (rnp->boost_tasks != NULL) 195 if (rnp->boost_tasks != NULL)
196 rnp->boost_tasks = rnp->gp_tasks; 196 rnp->boost_tasks = rnp->gp_tasks;
197 #endif /* #ifdef CONFIG_RCU_BOOST */ 197 #endif /* #ifdef CONFIG_RCU_BOOST */
198 } else { 198 } else {
199 list_add(&t->rcu_node_entry, &rnp->blkd_tasks); 199 list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
200 if (rnp->qsmask & rdp->grpmask) 200 if (rnp->qsmask & rdp->grpmask)
201 rnp->gp_tasks = &t->rcu_node_entry; 201 rnp->gp_tasks = &t->rcu_node_entry;
202 } 202 }
203 trace_rcu_preempt_task(rdp->rsp->name, 203 trace_rcu_preempt_task(rdp->rsp->name,
204 t->pid, 204 t->pid,
205 (rnp->qsmask & rdp->grpmask) 205 (rnp->qsmask & rdp->grpmask)
206 ? rnp->gpnum 206 ? rnp->gpnum
207 : rnp->gpnum + 1); 207 : rnp->gpnum + 1);
208 raw_spin_unlock_irqrestore(&rnp->lock, flags); 208 raw_spin_unlock_irqrestore(&rnp->lock, flags);
209 } else if (t->rcu_read_lock_nesting < 0 && 209 } else if (t->rcu_read_lock_nesting < 0 &&
210 t->rcu_read_unlock_special) { 210 t->rcu_read_unlock_special) {
211 211
212 /* 212 /*
213 * Complete exit from RCU read-side critical section on 213 * Complete exit from RCU read-side critical section on
214 * behalf of preempted instance of __rcu_read_unlock(). 214 * behalf of preempted instance of __rcu_read_unlock().
215 */ 215 */
216 rcu_read_unlock_special(t); 216 rcu_read_unlock_special(t);
217 } 217 }
218 218
219 /* 219 /*
220 * Either we were not in an RCU read-side critical section to 220 * Either we were not in an RCU read-side critical section to
221 * begin with, or we have now recorded that critical section 221 * begin with, or we have now recorded that critical section
222 * globally. Either way, we can now note a quiescent state 222 * globally. Either way, we can now note a quiescent state
223 * for this CPU. Again, if we were in an RCU read-side critical 223 * for this CPU. Again, if we were in an RCU read-side critical
224 * section, and if that critical section was blocking the current 224 * section, and if that critical section was blocking the current
225 * grace period, then the fact that the task has been enqueued 225 * grace period, then the fact that the task has been enqueued
226 * means that we continue to block the current grace period. 226 * means that we continue to block the current grace period.
227 */ 227 */
228 local_irq_save(flags); 228 local_irq_save(flags);
229 rcu_preempt_qs(cpu); 229 rcu_preempt_qs(cpu);
230 local_irq_restore(flags); 230 local_irq_restore(flags);
231 } 231 }
232 232
233 /* 233 /*
234 * Tree-preemptible RCU implementation for rcu_read_lock(). 234 * Tree-preemptible RCU implementation for rcu_read_lock().
235 * Just increment ->rcu_read_lock_nesting, shared state will be updated 235 * Just increment ->rcu_read_lock_nesting, shared state will be updated
236 * if we block. 236 * if we block.
237 */ 237 */
238 void __rcu_read_lock(void) 238 void __rcu_read_lock(void)
239 { 239 {
240 current->rcu_read_lock_nesting++; 240 current->rcu_read_lock_nesting++;
241 barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */ 241 barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */
242 } 242 }
243 EXPORT_SYMBOL_GPL(__rcu_read_lock); 243 EXPORT_SYMBOL_GPL(__rcu_read_lock);
244 244
245 /* 245 /*
246 * Check for preempted RCU readers blocking the current grace period 246 * Check for preempted RCU readers blocking the current grace period
247 * for the specified rcu_node structure. If the caller needs a reliable 247 * for the specified rcu_node structure. If the caller needs a reliable
248 * answer, it must hold the rcu_node's ->lock. 248 * answer, it must hold the rcu_node's ->lock.
249 */ 249 */
250 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) 250 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
251 { 251 {
252 return rnp->gp_tasks != NULL; 252 return rnp->gp_tasks != NULL;
253 } 253 }
254 254
255 /* 255 /*
256 * Record a quiescent state for all tasks that were previously queued 256 * Record a quiescent state for all tasks that were previously queued
257 * on the specified rcu_node structure and that were blocking the current 257 * on the specified rcu_node structure and that were blocking the current
258 * RCU grace period. The caller must hold the specified rnp->lock with 258 * RCU grace period. The caller must hold the specified rnp->lock with
259 * irqs disabled, and this lock is released upon return, but irqs remain 259 * irqs disabled, and this lock is released upon return, but irqs remain
260 * disabled. 260 * disabled.
261 */ 261 */
262 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) 262 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
263 __releases(rnp->lock) 263 __releases(rnp->lock)
264 { 264 {
265 unsigned long mask; 265 unsigned long mask;
266 struct rcu_node *rnp_p; 266 struct rcu_node *rnp_p;
267 267
268 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { 268 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
269 raw_spin_unlock_irqrestore(&rnp->lock, flags); 269 raw_spin_unlock_irqrestore(&rnp->lock, flags);
270 return; /* Still need more quiescent states! */ 270 return; /* Still need more quiescent states! */
271 } 271 }
272 272
273 rnp_p = rnp->parent; 273 rnp_p = rnp->parent;
274 if (rnp_p == NULL) { 274 if (rnp_p == NULL) {
275 /* 275 /*
276 * Either there is only one rcu_node in the tree, 276 * Either there is only one rcu_node in the tree,
277 * or tasks were kicked up to root rcu_node due to 277 * or tasks were kicked up to root rcu_node due to
278 * CPUs going offline. 278 * CPUs going offline.
279 */ 279 */
280 rcu_report_qs_rsp(&rcu_preempt_state, flags); 280 rcu_report_qs_rsp(&rcu_preempt_state, flags);
281 return; 281 return;
282 } 282 }
283 283
284 /* Report up the rest of the hierarchy. */ 284 /* Report up the rest of the hierarchy. */
285 mask = rnp->grpmask; 285 mask = rnp->grpmask;
286 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 286 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
287 raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */ 287 raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */
288 rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags); 288 rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
289 } 289 }
290 290
291 /* 291 /*
292 * Advance a ->blkd_tasks-list pointer to the next entry, instead 292 * Advance a ->blkd_tasks-list pointer to the next entry, instead
293 * returning NULL if at the end of the list. 293 * returning NULL if at the end of the list.
294 */ 294 */
295 static struct list_head *rcu_next_node_entry(struct task_struct *t, 295 static struct list_head *rcu_next_node_entry(struct task_struct *t,
296 struct rcu_node *rnp) 296 struct rcu_node *rnp)
297 { 297 {
298 struct list_head *np; 298 struct list_head *np;
299 299
300 np = t->rcu_node_entry.next; 300 np = t->rcu_node_entry.next;
301 if (np == &rnp->blkd_tasks) 301 if (np == &rnp->blkd_tasks)
302 np = NULL; 302 np = NULL;
303 return np; 303 return np;
304 } 304 }
305 305
306 /* 306 /*
307 * Handle special cases during rcu_read_unlock(), such as needing to 307 * Handle special cases during rcu_read_unlock(), such as needing to
308 * notify RCU core processing or task having blocked during the RCU 308 * notify RCU core processing or task having blocked during the RCU
309 * read-side critical section. 309 * read-side critical section.
310 */ 310 */
311 static noinline void rcu_read_unlock_special(struct task_struct *t) 311 static noinline void rcu_read_unlock_special(struct task_struct *t)
312 { 312 {
313 int empty; 313 int empty;
314 int empty_exp; 314 int empty_exp;
315 int empty_exp_now; 315 int empty_exp_now;
316 unsigned long flags; 316 unsigned long flags;
317 struct list_head *np; 317 struct list_head *np;
318 #ifdef CONFIG_RCU_BOOST 318 #ifdef CONFIG_RCU_BOOST
319 struct rt_mutex *rbmp = NULL; 319 struct rt_mutex *rbmp = NULL;
320 #endif /* #ifdef CONFIG_RCU_BOOST */ 320 #endif /* #ifdef CONFIG_RCU_BOOST */
321 struct rcu_node *rnp; 321 struct rcu_node *rnp;
322 int special; 322 int special;
323 323
324 /* NMI handlers cannot block and cannot safely manipulate state. */ 324 /* NMI handlers cannot block and cannot safely manipulate state. */
325 if (in_nmi()) 325 if (in_nmi())
326 return; 326 return;
327 327
328 local_irq_save(flags); 328 local_irq_save(flags);
329 329
330 /* 330 /*
331 * If RCU core is waiting for this CPU to exit critical section, 331 * If RCU core is waiting for this CPU to exit critical section,
332 * let it know that we have done so. 332 * let it know that we have done so.
333 */ 333 */
334 special = t->rcu_read_unlock_special; 334 special = t->rcu_read_unlock_special;
335 if (special & RCU_READ_UNLOCK_NEED_QS) { 335 if (special & RCU_READ_UNLOCK_NEED_QS) {
336 rcu_preempt_qs(smp_processor_id()); 336 rcu_preempt_qs(smp_processor_id());
337 } 337 }
338 338
339 /* Hardware IRQ handlers cannot block. */ 339 /* Hardware IRQ handlers cannot block. */
340 if (in_irq() || in_serving_softirq()) { 340 if (in_irq() || in_serving_softirq()) {
341 local_irq_restore(flags); 341 local_irq_restore(flags);
342 return; 342 return;
343 } 343 }
344 344
345 /* Clean up if blocked during RCU read-side critical section. */ 345 /* Clean up if blocked during RCU read-side critical section. */
346 if (special & RCU_READ_UNLOCK_BLOCKED) { 346 if (special & RCU_READ_UNLOCK_BLOCKED) {
347 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; 347 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
348 348
349 /* 349 /*
350 * Remove this task from the list it blocked on. The 350 * Remove this task from the list it blocked on. The
351 * task can migrate while we acquire the lock, but at 351 * task can migrate while we acquire the lock, but at
352 * most one time. So at most two passes through loop. 352 * most one time. So at most two passes through loop.
353 */ 353 */
354 for (;;) { 354 for (;;) {
355 rnp = t->rcu_blocked_node; 355 rnp = t->rcu_blocked_node;
356 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 356 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
357 if (rnp == t->rcu_blocked_node) 357 if (rnp == t->rcu_blocked_node)
358 break; 358 break;
359 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 359 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
360 } 360 }
361 empty = !rcu_preempt_blocked_readers_cgp(rnp); 361 empty = !rcu_preempt_blocked_readers_cgp(rnp);
362 empty_exp = !rcu_preempted_readers_exp(rnp); 362 empty_exp = !rcu_preempted_readers_exp(rnp);
363 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ 363 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
364 np = rcu_next_node_entry(t, rnp); 364 np = rcu_next_node_entry(t, rnp);
365 list_del_init(&t->rcu_node_entry); 365 list_del_init(&t->rcu_node_entry);
366 t->rcu_blocked_node = NULL; 366 t->rcu_blocked_node = NULL;
367 trace_rcu_unlock_preempted_task("rcu_preempt", 367 trace_rcu_unlock_preempted_task("rcu_preempt",
368 rnp->gpnum, t->pid); 368 rnp->gpnum, t->pid);
369 if (&t->rcu_node_entry == rnp->gp_tasks) 369 if (&t->rcu_node_entry == rnp->gp_tasks)
370 rnp->gp_tasks = np; 370 rnp->gp_tasks = np;
371 if (&t->rcu_node_entry == rnp->exp_tasks) 371 if (&t->rcu_node_entry == rnp->exp_tasks)
372 rnp->exp_tasks = np; 372 rnp->exp_tasks = np;
373 #ifdef CONFIG_RCU_BOOST 373 #ifdef CONFIG_RCU_BOOST
374 if (&t->rcu_node_entry == rnp->boost_tasks) 374 if (&t->rcu_node_entry == rnp->boost_tasks)
375 rnp->boost_tasks = np; 375 rnp->boost_tasks = np;
376 /* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */ 376 /* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */
377 if (t->rcu_boost_mutex) { 377 if (t->rcu_boost_mutex) {
378 rbmp = t->rcu_boost_mutex; 378 rbmp = t->rcu_boost_mutex;
379 t->rcu_boost_mutex = NULL; 379 t->rcu_boost_mutex = NULL;
380 } 380 }
381 #endif /* #ifdef CONFIG_RCU_BOOST */ 381 #endif /* #ifdef CONFIG_RCU_BOOST */
382 382
383 /* 383 /*
384 * If this was the last task on the current list, and if 384 * If this was the last task on the current list, and if
385 * we aren't waiting on any CPUs, report the quiescent state. 385 * we aren't waiting on any CPUs, report the quiescent state.
386 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock, 386 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
387 * so we must take a snapshot of the expedited state. 387 * so we must take a snapshot of the expedited state.
388 */ 388 */
389 empty_exp_now = !rcu_preempted_readers_exp(rnp); 389 empty_exp_now = !rcu_preempted_readers_exp(rnp);
390 if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { 390 if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) {
391 trace_rcu_quiescent_state_report("preempt_rcu", 391 trace_rcu_quiescent_state_report("preempt_rcu",
392 rnp->gpnum, 392 rnp->gpnum,
393 0, rnp->qsmask, 393 0, rnp->qsmask,
394 rnp->level, 394 rnp->level,
395 rnp->grplo, 395 rnp->grplo,
396 rnp->grphi, 396 rnp->grphi,
397 !!rnp->gp_tasks); 397 !!rnp->gp_tasks);
398 rcu_report_unblock_qs_rnp(rnp, flags); 398 rcu_report_unblock_qs_rnp(rnp, flags);
399 } else 399 } else
400 raw_spin_unlock_irqrestore(&rnp->lock, flags); 400 raw_spin_unlock_irqrestore(&rnp->lock, flags);
401 401
402 #ifdef CONFIG_RCU_BOOST 402 #ifdef CONFIG_RCU_BOOST
403 /* Unboost if we were boosted. */ 403 /* Unboost if we were boosted. */
404 if (rbmp) 404 if (rbmp)
405 rt_mutex_unlock(rbmp); 405 rt_mutex_unlock(rbmp);
406 #endif /* #ifdef CONFIG_RCU_BOOST */ 406 #endif /* #ifdef CONFIG_RCU_BOOST */
407 407
408 /* 408 /*
409 * If this was the last task on the expedited lists, 409 * If this was the last task on the expedited lists,
410 * then we need to report up the rcu_node hierarchy. 410 * then we need to report up the rcu_node hierarchy.
411 */ 411 */
412 if (!empty_exp && empty_exp_now) 412 if (!empty_exp && empty_exp_now)
413 rcu_report_exp_rnp(&rcu_preempt_state, rnp, true); 413 rcu_report_exp_rnp(&rcu_preempt_state, rnp, true);
414 } else { 414 } else {
415 local_irq_restore(flags); 415 local_irq_restore(flags);
416 } 416 }
417 } 417 }
418 418
419 /* 419 /*
420 * Tree-preemptible RCU implementation for rcu_read_unlock(). 420 * Tree-preemptible RCU implementation for rcu_read_unlock().
421 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost 421 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
422 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then 422 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
423 * invoke rcu_read_unlock_special() to clean up after a context switch 423 * invoke rcu_read_unlock_special() to clean up after a context switch
424 * in an RCU read-side critical section and other special cases. 424 * in an RCU read-side critical section and other special cases.
425 */ 425 */
426 void __rcu_read_unlock(void) 426 void __rcu_read_unlock(void)
427 { 427 {
428 struct task_struct *t = current; 428 struct task_struct *t = current;
429 429
430 if (t->rcu_read_lock_nesting != 1) 430 if (t->rcu_read_lock_nesting != 1)
431 --t->rcu_read_lock_nesting; 431 --t->rcu_read_lock_nesting;
432 else { 432 else {
433 barrier(); /* critical section before exit code. */ 433 barrier(); /* critical section before exit code. */
434 t->rcu_read_lock_nesting = INT_MIN; 434 t->rcu_read_lock_nesting = INT_MIN;
435 barrier(); /* assign before ->rcu_read_unlock_special load */ 435 barrier(); /* assign before ->rcu_read_unlock_special load */
436 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) 436 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
437 rcu_read_unlock_special(t); 437 rcu_read_unlock_special(t);
438 barrier(); /* ->rcu_read_unlock_special load before assign */ 438 barrier(); /* ->rcu_read_unlock_special load before assign */
439 t->rcu_read_lock_nesting = 0; 439 t->rcu_read_lock_nesting = 0;
440 } 440 }
441 #ifdef CONFIG_PROVE_LOCKING 441 #ifdef CONFIG_PROVE_LOCKING
442 { 442 {
443 int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); 443 int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
444 444
445 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); 445 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
446 } 446 }
447 #endif /* #ifdef CONFIG_PROVE_LOCKING */ 447 #endif /* #ifdef CONFIG_PROVE_LOCKING */
448 } 448 }
449 EXPORT_SYMBOL_GPL(__rcu_read_unlock); 449 EXPORT_SYMBOL_GPL(__rcu_read_unlock);
450 450
451 #ifdef CONFIG_RCU_CPU_STALL_VERBOSE 451 #ifdef CONFIG_RCU_CPU_STALL_VERBOSE
452 452
453 /* 453 /*
454 * Dump detailed information for all tasks blocking the current RCU 454 * Dump detailed information for all tasks blocking the current RCU
455 * grace period on the specified rcu_node structure. 455 * grace period on the specified rcu_node structure.
456 */ 456 */
457 static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) 457 static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
458 { 458 {
459 unsigned long flags; 459 unsigned long flags;
460 struct task_struct *t; 460 struct task_struct *t;
461 461
462 if (!rcu_preempt_blocked_readers_cgp(rnp)) 462 if (!rcu_preempt_blocked_readers_cgp(rnp))
463 return; 463 return;
464 raw_spin_lock_irqsave(&rnp->lock, flags); 464 raw_spin_lock_irqsave(&rnp->lock, flags);
465 t = list_entry(rnp->gp_tasks, 465 t = list_entry(rnp->gp_tasks,
466 struct task_struct, rcu_node_entry); 466 struct task_struct, rcu_node_entry);
467 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) 467 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
468 sched_show_task(t); 468 sched_show_task(t);
469 raw_spin_unlock_irqrestore(&rnp->lock, flags); 469 raw_spin_unlock_irqrestore(&rnp->lock, flags);
470 } 470 }
471 471
472 /* 472 /*
473 * Dump detailed information for all tasks blocking the current RCU 473 * Dump detailed information for all tasks blocking the current RCU
474 * grace period. 474 * grace period.
475 */ 475 */
476 static void rcu_print_detail_task_stall(struct rcu_state *rsp) 476 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
477 { 477 {
478 struct rcu_node *rnp = rcu_get_root(rsp); 478 struct rcu_node *rnp = rcu_get_root(rsp);
479 479
480 rcu_print_detail_task_stall_rnp(rnp); 480 rcu_print_detail_task_stall_rnp(rnp);
481 rcu_for_each_leaf_node(rsp, rnp) 481 rcu_for_each_leaf_node(rsp, rnp)
482 rcu_print_detail_task_stall_rnp(rnp); 482 rcu_print_detail_task_stall_rnp(rnp);
483 } 483 }
484 484
485 #else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ 485 #else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
486 486
487 static void rcu_print_detail_task_stall(struct rcu_state *rsp) 487 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
488 { 488 {
489 } 489 }
490 490
491 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ 491 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
492 492
493 /* 493 /*
494 * Scan the current list of tasks blocked within RCU read-side critical 494 * Scan the current list of tasks blocked within RCU read-side critical
495 * sections, printing out the tid of each. 495 * sections, printing out the tid of each.
496 */ 496 */
497 static int rcu_print_task_stall(struct rcu_node *rnp) 497 static int rcu_print_task_stall(struct rcu_node *rnp)
498 { 498 {
499 struct task_struct *t; 499 struct task_struct *t;
500 int ndetected = 0; 500 int ndetected = 0;
501 501
502 if (!rcu_preempt_blocked_readers_cgp(rnp)) 502 if (!rcu_preempt_blocked_readers_cgp(rnp))
503 return 0; 503 return 0;
504 t = list_entry(rnp->gp_tasks, 504 t = list_entry(rnp->gp_tasks,
505 struct task_struct, rcu_node_entry); 505 struct task_struct, rcu_node_entry);
506 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { 506 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
507 printk(" P%d", t->pid); 507 printk(" P%d", t->pid);
508 ndetected++; 508 ndetected++;
509 } 509 }
510 return ndetected; 510 return ndetected;
511 } 511 }
512 512
513 /* 513 /*
514 * Suppress preemptible RCU's CPU stall warnings by pushing the 514 * Suppress preemptible RCU's CPU stall warnings by pushing the
515 * time of the next stall-warning message comfortably far into the 515 * time of the next stall-warning message comfortably far into the
516 * future. 516 * future.
517 */ 517 */
518 static void rcu_preempt_stall_reset(void) 518 static void rcu_preempt_stall_reset(void)
519 { 519 {
520 rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; 520 rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2;
521 } 521 }
522 522
523 /* 523 /*
524 * Check that the list of blocked tasks for the newly completed grace 524 * Check that the list of blocked tasks for the newly completed grace
525 * period is in fact empty. It is a serious bug to complete a grace 525 * period is in fact empty. It is a serious bug to complete a grace
526 * period that still has RCU readers blocked! This function must be 526 * period that still has RCU readers blocked! This function must be
527 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock 527 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
528 * must be held by the caller. 528 * must be held by the caller.
529 * 529 *
530 * Also, if there are blocked tasks on the list, they automatically 530 * Also, if there are blocked tasks on the list, they automatically
531 * block the newly created grace period, so set up ->gp_tasks accordingly. 531 * block the newly created grace period, so set up ->gp_tasks accordingly.
532 */ 532 */
533 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 533 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
534 { 534 {
535 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); 535 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
536 if (!list_empty(&rnp->blkd_tasks)) 536 if (!list_empty(&rnp->blkd_tasks))
537 rnp->gp_tasks = rnp->blkd_tasks.next; 537 rnp->gp_tasks = rnp->blkd_tasks.next;
538 WARN_ON_ONCE(rnp->qsmask); 538 WARN_ON_ONCE(rnp->qsmask);
539 } 539 }
540 540
541 #ifdef CONFIG_HOTPLUG_CPU 541 #ifdef CONFIG_HOTPLUG_CPU
542 542
543 /* 543 /*
544 * Handle tasklist migration for case in which all CPUs covered by the 544 * Handle tasklist migration for case in which all CPUs covered by the
545 * specified rcu_node have gone offline. Move them up to the root 545 * specified rcu_node have gone offline. Move them up to the root
546 * rcu_node. The reason for not just moving them to the immediate 546 * rcu_node. The reason for not just moving them to the immediate
547 * parent is to remove the need for rcu_read_unlock_special() to 547 * parent is to remove the need for rcu_read_unlock_special() to
548 * make more than two attempts to acquire the target rcu_node's lock. 548 * make more than two attempts to acquire the target rcu_node's lock.
549 * Returns true if there were tasks blocking the current RCU grace 549 * Returns true if there were tasks blocking the current RCU grace
550 * period. 550 * period.
551 * 551 *
552 * Returns 1 if there was previously a task blocking the current grace 552 * Returns 1 if there was previously a task blocking the current grace
553 * period on the specified rcu_node structure. 553 * period on the specified rcu_node structure.
554 * 554 *
555 * The caller must hold rnp->lock with irqs disabled. 555 * The caller must hold rnp->lock with irqs disabled.
556 */ 556 */
557 static int rcu_preempt_offline_tasks(struct rcu_state *rsp, 557 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
558 struct rcu_node *rnp, 558 struct rcu_node *rnp,
559 struct rcu_data *rdp) 559 struct rcu_data *rdp)
560 { 560 {
561 struct list_head *lp; 561 struct list_head *lp;
562 struct list_head *lp_root; 562 struct list_head *lp_root;
563 int retval = 0; 563 int retval = 0;
564 struct rcu_node *rnp_root = rcu_get_root(rsp); 564 struct rcu_node *rnp_root = rcu_get_root(rsp);
565 struct task_struct *t; 565 struct task_struct *t;
566 566
567 if (rnp == rnp_root) { 567 if (rnp == rnp_root) {
568 WARN_ONCE(1, "Last CPU thought to be offlined?"); 568 WARN_ONCE(1, "Last CPU thought to be offlined?");
569 return 0; /* Shouldn't happen: at least one CPU online. */ 569 return 0; /* Shouldn't happen: at least one CPU online. */
570 } 570 }
571 571
572 /* If we are on an internal node, complain bitterly. */ 572 /* If we are on an internal node, complain bitterly. */
573 WARN_ON_ONCE(rnp != rdp->mynode); 573 WARN_ON_ONCE(rnp != rdp->mynode);
574 574
575 /* 575 /*
576 * Move tasks up to root rcu_node. Don't try to get fancy for 576 * Move tasks up to root rcu_node. Don't try to get fancy for
577 * this corner-case operation -- just put this node's tasks 577 * this corner-case operation -- just put this node's tasks
578 * at the head of the root node's list, and update the root node's 578 * at the head of the root node's list, and update the root node's
579 * ->gp_tasks and ->exp_tasks pointers to those of this node's, 579 * ->gp_tasks and ->exp_tasks pointers to those of this node's,
580 * if non-NULL. This might result in waiting for more tasks than 580 * if non-NULL. This might result in waiting for more tasks than
581 * absolutely necessary, but this is a good performance/complexity 581 * absolutely necessary, but this is a good performance/complexity
582 * tradeoff. 582 * tradeoff.
583 */ 583 */
584 if (rcu_preempt_blocked_readers_cgp(rnp)) 584 if (rcu_preempt_blocked_readers_cgp(rnp))
585 retval |= RCU_OFL_TASKS_NORM_GP; 585 retval |= RCU_OFL_TASKS_NORM_GP;
586 if (rcu_preempted_readers_exp(rnp)) 586 if (rcu_preempted_readers_exp(rnp))
587 retval |= RCU_OFL_TASKS_EXP_GP; 587 retval |= RCU_OFL_TASKS_EXP_GP;
588 lp = &rnp->blkd_tasks; 588 lp = &rnp->blkd_tasks;
589 lp_root = &rnp_root->blkd_tasks; 589 lp_root = &rnp_root->blkd_tasks;
590 while (!list_empty(lp)) { 590 while (!list_empty(lp)) {
591 t = list_entry(lp->next, typeof(*t), rcu_node_entry); 591 t = list_entry(lp->next, typeof(*t), rcu_node_entry);
592 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ 592 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
593 list_del(&t->rcu_node_entry); 593 list_del(&t->rcu_node_entry);
594 t->rcu_blocked_node = rnp_root; 594 t->rcu_blocked_node = rnp_root;
595 list_add(&t->rcu_node_entry, lp_root); 595 list_add(&t->rcu_node_entry, lp_root);
596 if (&t->rcu_node_entry == rnp->gp_tasks) 596 if (&t->rcu_node_entry == rnp->gp_tasks)
597 rnp_root->gp_tasks = rnp->gp_tasks; 597 rnp_root->gp_tasks = rnp->gp_tasks;
598 if (&t->rcu_node_entry == rnp->exp_tasks) 598 if (&t->rcu_node_entry == rnp->exp_tasks)
599 rnp_root->exp_tasks = rnp->exp_tasks; 599 rnp_root->exp_tasks = rnp->exp_tasks;
600 #ifdef CONFIG_RCU_BOOST 600 #ifdef CONFIG_RCU_BOOST
601 if (&t->rcu_node_entry == rnp->boost_tasks) 601 if (&t->rcu_node_entry == rnp->boost_tasks)
602 rnp_root->boost_tasks = rnp->boost_tasks; 602 rnp_root->boost_tasks = rnp->boost_tasks;
603 #endif /* #ifdef CONFIG_RCU_BOOST */ 603 #endif /* #ifdef CONFIG_RCU_BOOST */
604 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ 604 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
605 } 605 }
606 606
607 #ifdef CONFIG_RCU_BOOST 607 #ifdef CONFIG_RCU_BOOST
608 /* In case root is being boosted and leaf is not. */ 608 /* In case root is being boosted and leaf is not. */
609 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ 609 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
610 if (rnp_root->boost_tasks != NULL && 610 if (rnp_root->boost_tasks != NULL &&
611 rnp_root->boost_tasks != rnp_root->gp_tasks) 611 rnp_root->boost_tasks != rnp_root->gp_tasks)
612 rnp_root->boost_tasks = rnp_root->gp_tasks; 612 rnp_root->boost_tasks = rnp_root->gp_tasks;
613 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ 613 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
614 #endif /* #ifdef CONFIG_RCU_BOOST */ 614 #endif /* #ifdef CONFIG_RCU_BOOST */
615 615
616 rnp->gp_tasks = NULL; 616 rnp->gp_tasks = NULL;
617 rnp->exp_tasks = NULL; 617 rnp->exp_tasks = NULL;
618 return retval; 618 return retval;
619 } 619 }
620 620
621 /* 621 /*
622 * Do CPU-offline processing for preemptible RCU. 622 * Do CPU-offline processing for preemptible RCU.
623 */ 623 */
624 static void rcu_preempt_offline_cpu(int cpu) 624 static void rcu_preempt_offline_cpu(int cpu)
625 { 625 {
626 __rcu_offline_cpu(cpu, &rcu_preempt_state); 626 __rcu_offline_cpu(cpu, &rcu_preempt_state);
627 } 627 }
628 628
629 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 629 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
630 630
631 /* 631 /*
632 * Check for a quiescent state from the current CPU. When a task blocks, 632 * Check for a quiescent state from the current CPU. When a task blocks,
633 * the task is recorded in the corresponding CPU's rcu_node structure, 633 * the task is recorded in the corresponding CPU's rcu_node structure,
634 * which is checked elsewhere. 634 * which is checked elsewhere.
635 * 635 *
636 * Caller must disable hard irqs. 636 * Caller must disable hard irqs.
637 */ 637 */
638 static void rcu_preempt_check_callbacks(int cpu) 638 static void rcu_preempt_check_callbacks(int cpu)
639 { 639 {
640 struct task_struct *t = current; 640 struct task_struct *t = current;
641 641
642 if (t->rcu_read_lock_nesting == 0) { 642 if (t->rcu_read_lock_nesting == 0) {
643 rcu_preempt_qs(cpu); 643 rcu_preempt_qs(cpu);
644 return; 644 return;
645 } 645 }
646 if (t->rcu_read_lock_nesting > 0 && 646 if (t->rcu_read_lock_nesting > 0 &&
647 per_cpu(rcu_preempt_data, cpu).qs_pending) 647 per_cpu(rcu_preempt_data, cpu).qs_pending)
648 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; 648 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
649 } 649 }
650 650
651 /* 651 /*
652 * Process callbacks for preemptible RCU. 652 * Process callbacks for preemptible RCU.
653 */ 653 */
654 static void rcu_preempt_process_callbacks(void) 654 static void rcu_preempt_process_callbacks(void)
655 { 655 {
656 __rcu_process_callbacks(&rcu_preempt_state, 656 __rcu_process_callbacks(&rcu_preempt_state,
657 &__get_cpu_var(rcu_preempt_data)); 657 &__get_cpu_var(rcu_preempt_data));
658 } 658 }
659 659
660 #ifdef CONFIG_RCU_BOOST 660 #ifdef CONFIG_RCU_BOOST
661 661
662 static void rcu_preempt_do_callbacks(void) 662 static void rcu_preempt_do_callbacks(void)
663 { 663 {
664 rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data)); 664 rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data));
665 } 665 }
666 666
667 #endif /* #ifdef CONFIG_RCU_BOOST */ 667 #endif /* #ifdef CONFIG_RCU_BOOST */
668 668
669 /* 669 /*
670 * Queue a preemptible-RCU callback for invocation after a grace period. 670 * Queue a preemptible-RCU callback for invocation after a grace period.
671 */ 671 */
672 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 672 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
673 { 673 {
674 __call_rcu(head, func, &rcu_preempt_state); 674 __call_rcu(head, func, &rcu_preempt_state);
675 } 675 }
676 EXPORT_SYMBOL_GPL(call_rcu); 676 EXPORT_SYMBOL_GPL(call_rcu);
677 677
678 /** 678 /**
679 * synchronize_rcu - wait until a grace period has elapsed. 679 * synchronize_rcu - wait until a grace period has elapsed.
680 * 680 *
681 * Control will return to the caller some time after a full grace 681 * Control will return to the caller some time after a full grace
682 * period has elapsed, in other words after all currently executing RCU 682 * period has elapsed, in other words after all currently executing RCU
683 * read-side critical sections have completed. Note, however, that 683 * read-side critical sections have completed. Note, however, that
684 * upon return from synchronize_rcu(), the caller might well be executing 684 * upon return from synchronize_rcu(), the caller might well be executing
685 * concurrently with new RCU read-side critical sections that began while 685 * concurrently with new RCU read-side critical sections that began while
686 * synchronize_rcu() was waiting. RCU read-side critical sections are 686 * synchronize_rcu() was waiting. RCU read-side critical sections are
687 * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. 687 * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
688 */ 688 */
689 void synchronize_rcu(void) 689 void synchronize_rcu(void)
690 { 690 {
691 if (!rcu_scheduler_active) 691 if (!rcu_scheduler_active)
692 return; 692 return;
693 wait_rcu_gp(call_rcu); 693 wait_rcu_gp(call_rcu);
694 } 694 }
695 EXPORT_SYMBOL_GPL(synchronize_rcu); 695 EXPORT_SYMBOL_GPL(synchronize_rcu);
696 696
697 static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); 697 static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
698 static long sync_rcu_preempt_exp_count; 698 static long sync_rcu_preempt_exp_count;
699 static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); 699 static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
700 700
701 /* 701 /*
702 * Return non-zero if there are any tasks in RCU read-side critical 702 * Return non-zero if there are any tasks in RCU read-side critical
703 * sections blocking the current preemptible-RCU expedited grace period. 703 * sections blocking the current preemptible-RCU expedited grace period.
704 * If there is no preemptible-RCU expedited grace period currently in 704 * If there is no preemptible-RCU expedited grace period currently in
705 * progress, returns zero unconditionally. 705 * progress, returns zero unconditionally.
706 */ 706 */
707 static int rcu_preempted_readers_exp(struct rcu_node *rnp) 707 static int rcu_preempted_readers_exp(struct rcu_node *rnp)
708 { 708 {
709 return rnp->exp_tasks != NULL; 709 return rnp->exp_tasks != NULL;
710 } 710 }
711 711
712 /* 712 /*
713 * return non-zero if there is no RCU expedited grace period in progress 713 * return non-zero if there is no RCU expedited grace period in progress
714 * for the specified rcu_node structure, in other words, if all CPUs and 714 * for the specified rcu_node structure, in other words, if all CPUs and
715 * tasks covered by the specified rcu_node structure have done their bit 715 * tasks covered by the specified rcu_node structure have done their bit
716 * for the current expedited grace period. Works only for preemptible 716 * for the current expedited grace period. Works only for preemptible
717 * RCU -- other RCU implementation use other means. 717 * RCU -- other RCU implementation use other means.
718 * 718 *
719 * Caller must hold sync_rcu_preempt_exp_mutex. 719 * Caller must hold sync_rcu_preempt_exp_mutex.
720 */ 720 */
721 static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) 721 static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
722 { 722 {
723 return !rcu_preempted_readers_exp(rnp) && 723 return !rcu_preempted_readers_exp(rnp) &&
724 ACCESS_ONCE(rnp->expmask) == 0; 724 ACCESS_ONCE(rnp->expmask) == 0;
725 } 725 }
726 726
727 /* 727 /*
728 * Report the exit from RCU read-side critical section for the last task 728 * Report the exit from RCU read-side critical section for the last task
729 * that queued itself during or before the current expedited preemptible-RCU 729 * that queued itself during or before the current expedited preemptible-RCU
730 * grace period. This event is reported either to the rcu_node structure on 730 * grace period. This event is reported either to the rcu_node structure on
731 * which the task was queued or to one of that rcu_node structure's ancestors, 731 * which the task was queued or to one of that rcu_node structure's ancestors,
732 * recursively up the tree. (Calm down, calm down, we do the recursion 732 * recursively up the tree. (Calm down, calm down, we do the recursion
733 * iteratively!) 733 * iteratively!)
734 * 734 *
735 * Most callers will set the "wake" flag, but the task initiating the 735 * Most callers will set the "wake" flag, but the task initiating the
736 * expedited grace period need not wake itself. 736 * expedited grace period need not wake itself.
737 * 737 *
738 * Caller must hold sync_rcu_preempt_exp_mutex. 738 * Caller must hold sync_rcu_preempt_exp_mutex.
739 */ 739 */
740 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, 740 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
741 bool wake) 741 bool wake)
742 { 742 {
743 unsigned long flags; 743 unsigned long flags;
744 unsigned long mask; 744 unsigned long mask;
745 745
746 raw_spin_lock_irqsave(&rnp->lock, flags); 746 raw_spin_lock_irqsave(&rnp->lock, flags);
747 for (;;) { 747 for (;;) {
748 if (!sync_rcu_preempt_exp_done(rnp)) { 748 if (!sync_rcu_preempt_exp_done(rnp)) {
749 raw_spin_unlock_irqrestore(&rnp->lock, flags); 749 raw_spin_unlock_irqrestore(&rnp->lock, flags);
750 break; 750 break;
751 } 751 }
752 if (rnp->parent == NULL) { 752 if (rnp->parent == NULL) {
753 raw_spin_unlock_irqrestore(&rnp->lock, flags); 753 raw_spin_unlock_irqrestore(&rnp->lock, flags);
754 if (wake) 754 if (wake)
755 wake_up(&sync_rcu_preempt_exp_wq); 755 wake_up(&sync_rcu_preempt_exp_wq);
756 break; 756 break;
757 } 757 }
758 mask = rnp->grpmask; 758 mask = rnp->grpmask;
759 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 759 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
760 rnp = rnp->parent; 760 rnp = rnp->parent;
761 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 761 raw_spin_lock(&rnp->lock); /* irqs already disabled */
762 rnp->expmask &= ~mask; 762 rnp->expmask &= ~mask;
763 } 763 }
764 } 764 }
765 765
766 /* 766 /*
767 * Snapshot the tasks blocking the newly started preemptible-RCU expedited 767 * Snapshot the tasks blocking the newly started preemptible-RCU expedited
768 * grace period for the specified rcu_node structure. If there are no such 768 * grace period for the specified rcu_node structure. If there are no such
769 * tasks, report it up the rcu_node hierarchy. 769 * tasks, report it up the rcu_node hierarchy.
770 * 770 *
771 * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock. 771 * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock.
772 */ 772 */
773 static void 773 static void
774 sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) 774 sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
775 { 775 {
776 unsigned long flags; 776 unsigned long flags;
777 int must_wait = 0; 777 int must_wait = 0;
778 778
779 raw_spin_lock_irqsave(&rnp->lock, flags); 779 raw_spin_lock_irqsave(&rnp->lock, flags);
780 if (list_empty(&rnp->blkd_tasks)) 780 if (list_empty(&rnp->blkd_tasks))
781 raw_spin_unlock_irqrestore(&rnp->lock, flags); 781 raw_spin_unlock_irqrestore(&rnp->lock, flags);
782 else { 782 else {
783 rnp->exp_tasks = rnp->blkd_tasks.next; 783 rnp->exp_tasks = rnp->blkd_tasks.next;
784 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ 784 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
785 must_wait = 1; 785 must_wait = 1;
786 } 786 }
787 if (!must_wait) 787 if (!must_wait)
788 rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */ 788 rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */
789 } 789 }
790 790
791 /* 791 /*
792 * Wait for an rcu-preempt grace period, but expedite it. The basic idea 792 * Wait for an rcu-preempt grace period, but expedite it. The basic idea
793 * is to invoke synchronize_sched_expedited() to push all the tasks to 793 * is to invoke synchronize_sched_expedited() to push all the tasks to
794 * the ->blkd_tasks lists and wait for this list to drain. 794 * the ->blkd_tasks lists and wait for this list to drain.
795 */ 795 */
796 void synchronize_rcu_expedited(void) 796 void synchronize_rcu_expedited(void)
797 { 797 {
798 unsigned long flags; 798 unsigned long flags;
799 struct rcu_node *rnp; 799 struct rcu_node *rnp;
800 struct rcu_state *rsp = &rcu_preempt_state; 800 struct rcu_state *rsp = &rcu_preempt_state;
801 long snap; 801 long snap;
802 int trycount = 0; 802 int trycount = 0;
803 803
804 smp_mb(); /* Caller's modifications seen first by other CPUs. */ 804 smp_mb(); /* Caller's modifications seen first by other CPUs. */
805 snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1; 805 snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
806 smp_mb(); /* Above access cannot bleed into critical section. */ 806 smp_mb(); /* Above access cannot bleed into critical section. */
807 807
808 /* 808 /*
809 * Acquire lock, falling back to synchronize_rcu() if too many 809 * Acquire lock, falling back to synchronize_rcu() if too many
810 * lock-acquisition failures. Of course, if someone does the 810 * lock-acquisition failures. Of course, if someone does the
811 * expedited grace period for us, just leave. 811 * expedited grace period for us, just leave.
812 */ 812 */
813 while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { 813 while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
814 if (trycount++ < 10) 814 if (trycount++ < 10)
815 udelay(trycount * num_online_cpus()); 815 udelay(trycount * num_online_cpus());
816 else { 816 else {
817 synchronize_rcu(); 817 synchronize_rcu();
818 return; 818 return;
819 } 819 }
820 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) 820 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
821 goto mb_ret; /* Others did our work for us. */ 821 goto mb_ret; /* Others did our work for us. */
822 } 822 }
823 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) 823 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
824 goto unlock_mb_ret; /* Others did our work for us. */ 824 goto unlock_mb_ret; /* Others did our work for us. */
825 825
826 /* force all RCU readers onto ->blkd_tasks lists. */ 826 /* force all RCU readers onto ->blkd_tasks lists. */
827 synchronize_sched_expedited(); 827 synchronize_sched_expedited();
828 828
829 raw_spin_lock_irqsave(&rsp->onofflock, flags); 829 raw_spin_lock_irqsave(&rsp->onofflock, flags);
830 830
831 /* Initialize ->expmask for all non-leaf rcu_node structures. */ 831 /* Initialize ->expmask for all non-leaf rcu_node structures. */
832 rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { 832 rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
833 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 833 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
834 rnp->expmask = rnp->qsmaskinit; 834 rnp->expmask = rnp->qsmaskinit;
835 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 835 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
836 } 836 }
837 837
838 /* Snapshot current state of ->blkd_tasks lists. */ 838 /* Snapshot current state of ->blkd_tasks lists. */
839 rcu_for_each_leaf_node(rsp, rnp) 839 rcu_for_each_leaf_node(rsp, rnp)
840 sync_rcu_preempt_exp_init(rsp, rnp); 840 sync_rcu_preempt_exp_init(rsp, rnp);
841 if (NUM_RCU_NODES > 1) 841 if (NUM_RCU_NODES > 1)
842 sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); 842 sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
843 843
844 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 844 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
845 845
846 /* Wait for snapshotted ->blkd_tasks lists to drain. */ 846 /* Wait for snapshotted ->blkd_tasks lists to drain. */
847 rnp = rcu_get_root(rsp); 847 rnp = rcu_get_root(rsp);
848 wait_event(sync_rcu_preempt_exp_wq, 848 wait_event(sync_rcu_preempt_exp_wq,
849 sync_rcu_preempt_exp_done(rnp)); 849 sync_rcu_preempt_exp_done(rnp));
850 850
851 /* Clean up and exit. */ 851 /* Clean up and exit. */
852 smp_mb(); /* ensure expedited GP seen before counter increment. */ 852 smp_mb(); /* ensure expedited GP seen before counter increment. */
853 ACCESS_ONCE(sync_rcu_preempt_exp_count)++; 853 ACCESS_ONCE(sync_rcu_preempt_exp_count)++;
854 unlock_mb_ret: 854 unlock_mb_ret:
855 mutex_unlock(&sync_rcu_preempt_exp_mutex); 855 mutex_unlock(&sync_rcu_preempt_exp_mutex);
856 mb_ret: 856 mb_ret:
857 smp_mb(); /* ensure subsequent action seen after grace period. */ 857 smp_mb(); /* ensure subsequent action seen after grace period. */
858 } 858 }
859 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 859 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
860 860
861 /* 861 /*
862 * Check to see if there is any immediate preemptible-RCU-related work 862 * Check to see if there is any immediate preemptible-RCU-related work
863 * to be done. 863 * to be done.
864 */ 864 */
865 static int rcu_preempt_pending(int cpu) 865 static int rcu_preempt_pending(int cpu)
866 { 866 {
867 return __rcu_pending(&rcu_preempt_state, 867 return __rcu_pending(&rcu_preempt_state,
868 &per_cpu(rcu_preempt_data, cpu)); 868 &per_cpu(rcu_preempt_data, cpu));
869 } 869 }
870 870
871 /* 871 /*
872 * Does preemptible RCU need the CPU to stay out of dynticks mode? 872 * Does preemptible RCU need the CPU to stay out of dynticks mode?
873 */ 873 */
874 static int rcu_preempt_needs_cpu(int cpu) 874 static int rcu_preempt_needs_cpu(int cpu)
875 { 875 {
876 return !!per_cpu(rcu_preempt_data, cpu).nxtlist; 876 return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
877 } 877 }
878 878
879 /** 879 /**
880 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. 880 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
881 */ 881 */
882 void rcu_barrier(void) 882 void rcu_barrier(void)
883 { 883 {
884 _rcu_barrier(&rcu_preempt_state, call_rcu); 884 _rcu_barrier(&rcu_preempt_state, call_rcu);
885 } 885 }
886 EXPORT_SYMBOL_GPL(rcu_barrier); 886 EXPORT_SYMBOL_GPL(rcu_barrier);
887 887
888 /* 888 /*
889 * Initialize preemptible RCU's per-CPU data. 889 * Initialize preemptible RCU's per-CPU data.
890 */ 890 */
891 static void __cpuinit rcu_preempt_init_percpu_data(int cpu) 891 static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
892 { 892 {
893 rcu_init_percpu_data(cpu, &rcu_preempt_state, 1); 893 rcu_init_percpu_data(cpu, &rcu_preempt_state, 1);
894 } 894 }
895 895
896 /* 896 /*
897 * Move preemptible RCU's callbacks from dying CPU to other online CPU. 897 * Move preemptible RCU's callbacks from dying CPU to other online CPU.
898 */ 898 */
899 static void rcu_preempt_send_cbs_to_online(void) 899 static void rcu_preempt_send_cbs_to_online(void)
900 { 900 {
901 rcu_send_cbs_to_online(&rcu_preempt_state); 901 rcu_send_cbs_to_online(&rcu_preempt_state);
902 } 902 }
903 903
904 /* 904 /*
905 * Initialize preemptible RCU's state structures. 905 * Initialize preemptible RCU's state structures.
906 */ 906 */
907 static void __init __rcu_init_preempt(void) 907 static void __init __rcu_init_preempt(void)
908 { 908 {
909 rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); 909 rcu_init_one(&rcu_preempt_state, &rcu_preempt_data);
910 } 910 }
911 911
912 /* 912 /*
913 * Check for a task exiting while in a preemptible-RCU read-side 913 * Check for a task exiting while in a preemptible-RCU read-side
914 * critical section, clean up if so. No need to issue warnings, 914 * critical section, clean up if so. No need to issue warnings,
915 * as debug_check_no_locks_held() already does this if lockdep 915 * as debug_check_no_locks_held() already does this if lockdep
916 * is enabled. 916 * is enabled.
917 */ 917 */
918 void exit_rcu(void) 918 void exit_rcu(void)
919 { 919 {
920 struct task_struct *t = current; 920 struct task_struct *t = current;
921 921
922 if (t->rcu_read_lock_nesting == 0) 922 if (t->rcu_read_lock_nesting == 0)
923 return; 923 return;
924 t->rcu_read_lock_nesting = 1; 924 t->rcu_read_lock_nesting = 1;
925 __rcu_read_unlock(); 925 __rcu_read_unlock();
926 } 926 }
927 927
928 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 928 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
929 929
930 static struct rcu_state *rcu_state = &rcu_sched_state; 930 static struct rcu_state *rcu_state = &rcu_sched_state;
931 931
932 /* 932 /*
933 * Tell them what RCU they are running. 933 * Tell them what RCU they are running.
934 */ 934 */
935 static void __init rcu_bootup_announce(void) 935 static void __init rcu_bootup_announce(void)
936 { 936 {
937 printk(KERN_INFO "Hierarchical RCU implementation.\n"); 937 printk(KERN_INFO "Hierarchical RCU implementation.\n");
938 rcu_bootup_announce_oddness(); 938 rcu_bootup_announce_oddness();
939 } 939 }
940 940
941 /* 941 /*
942 * Return the number of RCU batches processed thus far for debug & stats. 942 * Return the number of RCU batches processed thus far for debug & stats.
943 */ 943 */
944 long rcu_batches_completed(void) 944 long rcu_batches_completed(void)
945 { 945 {
946 return rcu_batches_completed_sched(); 946 return rcu_batches_completed_sched();
947 } 947 }
948 EXPORT_SYMBOL_GPL(rcu_batches_completed); 948 EXPORT_SYMBOL_GPL(rcu_batches_completed);
949 949
950 /* 950 /*
951 * Force a quiescent state for RCU, which, because there is no preemptible 951 * Force a quiescent state for RCU, which, because there is no preemptible
952 * RCU, becomes the same as rcu-sched. 952 * RCU, becomes the same as rcu-sched.
953 */ 953 */
954 void rcu_force_quiescent_state(void) 954 void rcu_force_quiescent_state(void)
955 { 955 {
956 rcu_sched_force_quiescent_state(); 956 rcu_sched_force_quiescent_state();
957 } 957 }
958 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 958 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
959 959
960 /* 960 /*
961 * Because preemptible RCU does not exist, we never have to check for 961 * Because preemptible RCU does not exist, we never have to check for
962 * CPUs being in quiescent states. 962 * CPUs being in quiescent states.
963 */ 963 */
964 static void rcu_preempt_note_context_switch(int cpu) 964 static void rcu_preempt_note_context_switch(int cpu)
965 { 965 {
966 } 966 }
967 967
968 /* 968 /*
969 * Because preemptible RCU does not exist, there are never any preempted 969 * Because preemptible RCU does not exist, there are never any preempted
970 * RCU readers. 970 * RCU readers.
971 */ 971 */
972 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) 972 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
973 { 973 {
974 return 0; 974 return 0;
975 } 975 }
976 976
977 #ifdef CONFIG_HOTPLUG_CPU 977 #ifdef CONFIG_HOTPLUG_CPU
978 978
979 /* Because preemptible RCU does not exist, no quieting of tasks. */ 979 /* Because preemptible RCU does not exist, no quieting of tasks. */
980 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) 980 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
981 { 981 {
982 raw_spin_unlock_irqrestore(&rnp->lock, flags); 982 raw_spin_unlock_irqrestore(&rnp->lock, flags);
983 } 983 }
984 984
985 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 985 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
986 986
987 /* 987 /*
988 * Because preemptible RCU does not exist, we never have to check for 988 * Because preemptible RCU does not exist, we never have to check for
989 * tasks blocked within RCU read-side critical sections. 989 * tasks blocked within RCU read-side critical sections.
990 */ 990 */
991 static void rcu_print_detail_task_stall(struct rcu_state *rsp) 991 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
992 { 992 {
993 } 993 }
994 994
995 /* 995 /*
996 * Because preemptible RCU does not exist, we never have to check for 996 * Because preemptible RCU does not exist, we never have to check for
997 * tasks blocked within RCU read-side critical sections. 997 * tasks blocked within RCU read-side critical sections.
998 */ 998 */
999 static int rcu_print_task_stall(struct rcu_node *rnp) 999 static int rcu_print_task_stall(struct rcu_node *rnp)
1000 { 1000 {
1001 return 0; 1001 return 0;
1002 } 1002 }
1003 1003
1004 /* 1004 /*
1005 * Because preemptible RCU does not exist, there is no need to suppress 1005 * Because preemptible RCU does not exist, there is no need to suppress
1006 * its CPU stall warnings. 1006 * its CPU stall warnings.
1007 */ 1007 */
1008 static void rcu_preempt_stall_reset(void) 1008 static void rcu_preempt_stall_reset(void)
1009 { 1009 {
1010 } 1010 }
1011 1011
1012 /* 1012 /*
1013 * Because there is no preemptible RCU, there can be no readers blocked, 1013 * Because there is no preemptible RCU, there can be no readers blocked,
1014 * so there is no need to check for blocked tasks. So check only for 1014 * so there is no need to check for blocked tasks. So check only for
1015 * bogus qsmask values. 1015 * bogus qsmask values.
1016 */ 1016 */
1017 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 1017 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
1018 { 1018 {
1019 WARN_ON_ONCE(rnp->qsmask); 1019 WARN_ON_ONCE(rnp->qsmask);
1020 } 1020 }
1021 1021
1022 #ifdef CONFIG_HOTPLUG_CPU 1022 #ifdef CONFIG_HOTPLUG_CPU
1023 1023
1024 /* 1024 /*
1025 * Because preemptible RCU does not exist, it never needs to migrate 1025 * Because preemptible RCU does not exist, it never needs to migrate
1026 * tasks that were blocked within RCU read-side critical sections, and 1026 * tasks that were blocked within RCU read-side critical sections, and
1027 * such non-existent tasks cannot possibly have been blocking the current 1027 * such non-existent tasks cannot possibly have been blocking the current
1028 * grace period. 1028 * grace period.
1029 */ 1029 */
1030 static int rcu_preempt_offline_tasks(struct rcu_state *rsp, 1030 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
1031 struct rcu_node *rnp, 1031 struct rcu_node *rnp,
1032 struct rcu_data *rdp) 1032 struct rcu_data *rdp)
1033 { 1033 {
1034 return 0; 1034 return 0;
1035 } 1035 }
1036 1036
1037 /* 1037 /*
1038 * Because preemptible RCU does not exist, it never needs CPU-offline 1038 * Because preemptible RCU does not exist, it never needs CPU-offline
1039 * processing. 1039 * processing.
1040 */ 1040 */
1041 static void rcu_preempt_offline_cpu(int cpu) 1041 static void rcu_preempt_offline_cpu(int cpu)
1042 { 1042 {
1043 } 1043 }
1044 1044
1045 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 1045 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
1046 1046
1047 /* 1047 /*
1048 * Because preemptible RCU does not exist, it never has any callbacks 1048 * Because preemptible RCU does not exist, it never has any callbacks
1049 * to check. 1049 * to check.
1050 */ 1050 */
1051 static void rcu_preempt_check_callbacks(int cpu) 1051 static void rcu_preempt_check_callbacks(int cpu)
1052 { 1052 {
1053 } 1053 }
1054 1054
1055 /* 1055 /*
1056 * Because preemptible RCU does not exist, it never has any callbacks 1056 * Because preemptible RCU does not exist, it never has any callbacks
1057 * to process. 1057 * to process.
1058 */ 1058 */
1059 static void rcu_preempt_process_callbacks(void) 1059 static void rcu_preempt_process_callbacks(void)
1060 { 1060 {
1061 } 1061 }
1062 1062
1063 /* 1063 /*
1064 * Wait for an rcu-preempt grace period, but make it happen quickly. 1064 * Wait for an rcu-preempt grace period, but make it happen quickly.
1065 * But because preemptible RCU does not exist, map to rcu-sched. 1065 * But because preemptible RCU does not exist, map to rcu-sched.
1066 */ 1066 */
1067 void synchronize_rcu_expedited(void) 1067 void synchronize_rcu_expedited(void)
1068 { 1068 {
1069 synchronize_sched_expedited(); 1069 synchronize_sched_expedited();
1070 } 1070 }
1071 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 1071 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
1072 1072
1073 #ifdef CONFIG_HOTPLUG_CPU 1073 #ifdef CONFIG_HOTPLUG_CPU
1074 1074
1075 /* 1075 /*
1076 * Because preemptible RCU does not exist, there is never any need to 1076 * Because preemptible RCU does not exist, there is never any need to
1077 * report on tasks preempted in RCU read-side critical sections during 1077 * report on tasks preempted in RCU read-side critical sections during
1078 * expedited RCU grace periods. 1078 * expedited RCU grace periods.
1079 */ 1079 */
1080 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, 1080 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
1081 bool wake) 1081 bool wake)
1082 { 1082 {
1083 } 1083 }
1084 1084
1085 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 1085 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
1086 1086
1087 /* 1087 /*
1088 * Because preemptible RCU does not exist, it never has any work to do. 1088 * Because preemptible RCU does not exist, it never has any work to do.
1089 */ 1089 */
1090 static int rcu_preempt_pending(int cpu) 1090 static int rcu_preempt_pending(int cpu)
1091 { 1091 {
1092 return 0; 1092 return 0;
1093 } 1093 }
1094 1094
1095 /* 1095 /*
1096 * Because preemptible RCU does not exist, it never needs any CPU. 1096 * Because preemptible RCU does not exist, it never needs any CPU.
1097 */ 1097 */
1098 static int rcu_preempt_needs_cpu(int cpu) 1098 static int rcu_preempt_needs_cpu(int cpu)
1099 { 1099 {
1100 return 0; 1100 return 0;
1101 } 1101 }
1102 1102
1103 /* 1103 /*
1104 * Because preemptible RCU does not exist, rcu_barrier() is just 1104 * Because preemptible RCU does not exist, rcu_barrier() is just
1105 * another name for rcu_barrier_sched(). 1105 * another name for rcu_barrier_sched().
1106 */ 1106 */
1107 void rcu_barrier(void) 1107 void rcu_barrier(void)
1108 { 1108 {
1109 rcu_barrier_sched(); 1109 rcu_barrier_sched();
1110 } 1110 }
1111 EXPORT_SYMBOL_GPL(rcu_barrier); 1111 EXPORT_SYMBOL_GPL(rcu_barrier);
1112 1112
1113 /* 1113 /*
1114 * Because preemptible RCU does not exist, there is no per-CPU 1114 * Because preemptible RCU does not exist, there is no per-CPU
1115 * data to initialize. 1115 * data to initialize.
1116 */ 1116 */
1117 static void __cpuinit rcu_preempt_init_percpu_data(int cpu) 1117 static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
1118 { 1118 {
1119 } 1119 }
1120 1120
1121 /* 1121 /*
1122 * Because there is no preemptible RCU, there are no callbacks to move. 1122 * Because there is no preemptible RCU, there are no callbacks to move.
1123 */ 1123 */
1124 static void rcu_preempt_send_cbs_to_online(void) 1124 static void rcu_preempt_send_cbs_to_online(void)
1125 { 1125 {
1126 } 1126 }
1127 1127
1128 /* 1128 /*
1129 * Because preemptible RCU does not exist, it need not be initialized. 1129 * Because preemptible RCU does not exist, it need not be initialized.
1130 */ 1130 */
1131 static void __init __rcu_init_preempt(void) 1131 static void __init __rcu_init_preempt(void)
1132 { 1132 {
1133 } 1133 }
1134 1134
1135 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ 1135 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
1136 1136
1137 #ifdef CONFIG_RCU_BOOST 1137 #ifdef CONFIG_RCU_BOOST
1138 1138
1139 #include "rtmutex_common.h" 1139 #include "rtmutex_common.h"
1140 1140
1141 #ifdef CONFIG_RCU_TRACE 1141 #ifdef CONFIG_RCU_TRACE
1142 1142
1143 static void rcu_initiate_boost_trace(struct rcu_node *rnp) 1143 static void rcu_initiate_boost_trace(struct rcu_node *rnp)
1144 { 1144 {
1145 if (list_empty(&rnp->blkd_tasks)) 1145 if (list_empty(&rnp->blkd_tasks))
1146 rnp->n_balk_blkd_tasks++; 1146 rnp->n_balk_blkd_tasks++;
1147 else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL) 1147 else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)
1148 rnp->n_balk_exp_gp_tasks++; 1148 rnp->n_balk_exp_gp_tasks++;
1149 else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL) 1149 else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL)
1150 rnp->n_balk_boost_tasks++; 1150 rnp->n_balk_boost_tasks++;
1151 else if (rnp->gp_tasks != NULL && rnp->qsmask != 0) 1151 else if (rnp->gp_tasks != NULL && rnp->qsmask != 0)
1152 rnp->n_balk_notblocked++; 1152 rnp->n_balk_notblocked++;
1153 else if (rnp->gp_tasks != NULL && 1153 else if (rnp->gp_tasks != NULL &&
1154 ULONG_CMP_LT(jiffies, rnp->boost_time)) 1154 ULONG_CMP_LT(jiffies, rnp->boost_time))
1155 rnp->n_balk_notyet++; 1155 rnp->n_balk_notyet++;
1156 else 1156 else
1157 rnp->n_balk_nos++; 1157 rnp->n_balk_nos++;
1158 } 1158 }
1159 1159
1160 #else /* #ifdef CONFIG_RCU_TRACE */ 1160 #else /* #ifdef CONFIG_RCU_TRACE */
1161 1161
1162 static void rcu_initiate_boost_trace(struct rcu_node *rnp) 1162 static void rcu_initiate_boost_trace(struct rcu_node *rnp)
1163 { 1163 {
1164 } 1164 }
1165 1165
1166 #endif /* #else #ifdef CONFIG_RCU_TRACE */ 1166 #endif /* #else #ifdef CONFIG_RCU_TRACE */
1167 1167
1168 static struct lock_class_key rcu_boost_class; 1168 static struct lock_class_key rcu_boost_class;
1169 1169
1170 /* 1170 /*
1171 * Carry out RCU priority boosting on the task indicated by ->exp_tasks 1171 * Carry out RCU priority boosting on the task indicated by ->exp_tasks
1172 * or ->boost_tasks, advancing the pointer to the next task in the 1172 * or ->boost_tasks, advancing the pointer to the next task in the
1173 * ->blkd_tasks list. 1173 * ->blkd_tasks list.
1174 * 1174 *
1175 * Note that irqs must be enabled: boosting the task can block. 1175 * Note that irqs must be enabled: boosting the task can block.
1176 * Returns 1 if there are more tasks needing to be boosted. 1176 * Returns 1 if there are more tasks needing to be boosted.
1177 */ 1177 */
1178 static int rcu_boost(struct rcu_node *rnp) 1178 static int rcu_boost(struct rcu_node *rnp)
1179 { 1179 {
1180 unsigned long flags; 1180 unsigned long flags;
1181 struct rt_mutex mtx; 1181 struct rt_mutex mtx;
1182 struct task_struct *t; 1182 struct task_struct *t;
1183 struct list_head *tb; 1183 struct list_head *tb;
1184 1184
1185 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) 1185 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL)
1186 return 0; /* Nothing left to boost. */ 1186 return 0; /* Nothing left to boost. */
1187 1187
1188 raw_spin_lock_irqsave(&rnp->lock, flags); 1188 raw_spin_lock_irqsave(&rnp->lock, flags);
1189 1189
1190 /* 1190 /*
1191 * Recheck under the lock: all tasks in need of boosting 1191 * Recheck under the lock: all tasks in need of boosting
1192 * might exit their RCU read-side critical sections on their own. 1192 * might exit their RCU read-side critical sections on their own.
1193 */ 1193 */
1194 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) { 1194 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
1195 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1195 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1196 return 0; 1196 return 0;
1197 } 1197 }
1198 1198
1199 /* 1199 /*
1200 * Preferentially boost tasks blocking expedited grace periods. 1200 * Preferentially boost tasks blocking expedited grace periods.
1201 * This cannot starve the normal grace periods because a second 1201 * This cannot starve the normal grace periods because a second
1202 * expedited grace period must boost all blocked tasks, including 1202 * expedited grace period must boost all blocked tasks, including
1203 * those blocking the pre-existing normal grace period. 1203 * those blocking the pre-existing normal grace period.
1204 */ 1204 */
1205 if (rnp->exp_tasks != NULL) { 1205 if (rnp->exp_tasks != NULL) {
1206 tb = rnp->exp_tasks; 1206 tb = rnp->exp_tasks;
1207 rnp->n_exp_boosts++; 1207 rnp->n_exp_boosts++;
1208 } else { 1208 } else {
1209 tb = rnp->boost_tasks; 1209 tb = rnp->boost_tasks;
1210 rnp->n_normal_boosts++; 1210 rnp->n_normal_boosts++;
1211 } 1211 }
1212 rnp->n_tasks_boosted++; 1212 rnp->n_tasks_boosted++;
1213 1213
1214 /* 1214 /*
1215 * We boost task t by manufacturing an rt_mutex that appears to 1215 * We boost task t by manufacturing an rt_mutex that appears to
1216 * be held by task t. We leave a pointer to that rt_mutex where 1216 * be held by task t. We leave a pointer to that rt_mutex where
1217 * task t can find it, and task t will release the mutex when it 1217 * task t can find it, and task t will release the mutex when it
1218 * exits its outermost RCU read-side critical section. Then 1218 * exits its outermost RCU read-side critical section. Then
1219 * simply acquiring this artificial rt_mutex will boost task 1219 * simply acquiring this artificial rt_mutex will boost task
1220 * t's priority. (Thanks to tglx for suggesting this approach!) 1220 * t's priority. (Thanks to tglx for suggesting this approach!)
1221 * 1221 *
1222 * Note that task t must acquire rnp->lock to remove itself from 1222 * Note that task t must acquire rnp->lock to remove itself from
1223 * the ->blkd_tasks list, which it will do from exit() if from 1223 * the ->blkd_tasks list, which it will do from exit() if from
1224 * nowhere else. We therefore are guaranteed that task t will 1224 * nowhere else. We therefore are guaranteed that task t will
1225 * stay around at least until we drop rnp->lock. Note that 1225 * stay around at least until we drop rnp->lock. Note that
1226 * rnp->lock also resolves races between our priority boosting 1226 * rnp->lock also resolves races between our priority boosting
1227 * and task t's exiting its outermost RCU read-side critical 1227 * and task t's exiting its outermost RCU read-side critical
1228 * section. 1228 * section.
1229 */ 1229 */
1230 t = container_of(tb, struct task_struct, rcu_node_entry); 1230 t = container_of(tb, struct task_struct, rcu_node_entry);
1231 rt_mutex_init_proxy_locked(&mtx, t); 1231 rt_mutex_init_proxy_locked(&mtx, t);
1232 /* Avoid lockdep false positives. This rt_mutex is its own thing. */ 1232 /* Avoid lockdep false positives. This rt_mutex is its own thing. */
1233 lockdep_set_class_and_name(&mtx.wait_lock, &rcu_boost_class, 1233 lockdep_set_class_and_name(&mtx.wait_lock, &rcu_boost_class,
1234 "rcu_boost_mutex"); 1234 "rcu_boost_mutex");
1235 t->rcu_boost_mutex = &mtx; 1235 t->rcu_boost_mutex = &mtx;
1236 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1236 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1237 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ 1237 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
1238 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ 1238 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
1239 1239
1240 return rnp->exp_tasks != NULL || rnp->boost_tasks != NULL; 1240 return rnp->exp_tasks != NULL || rnp->boost_tasks != NULL;
1241 } 1241 }
1242 1242
1243 /* 1243 /*
1244 * Timer handler to initiate waking up of boost kthreads that 1244 * Timer handler to initiate waking up of boost kthreads that
1245 * have yielded the CPU due to excessive numbers of tasks to 1245 * have yielded the CPU due to excessive numbers of tasks to
1246 * boost. We wake up the per-rcu_node kthread, which in turn 1246 * boost. We wake up the per-rcu_node kthread, which in turn
1247 * will wake up the booster kthread. 1247 * will wake up the booster kthread.
1248 */ 1248 */
1249 static void rcu_boost_kthread_timer(unsigned long arg) 1249 static void rcu_boost_kthread_timer(unsigned long arg)
1250 { 1250 {
1251 invoke_rcu_node_kthread((struct rcu_node *)arg); 1251 invoke_rcu_node_kthread((struct rcu_node *)arg);
1252 } 1252 }
1253 1253
1254 /* 1254 /*
1255 * Priority-boosting kthread. One per leaf rcu_node and one for the 1255 * Priority-boosting kthread. One per leaf rcu_node and one for the
1256 * root rcu_node. 1256 * root rcu_node.
1257 */ 1257 */
1258 static int rcu_boost_kthread(void *arg) 1258 static int rcu_boost_kthread(void *arg)
1259 { 1259 {
1260 struct rcu_node *rnp = (struct rcu_node *)arg; 1260 struct rcu_node *rnp = (struct rcu_node *)arg;
1261 int spincnt = 0; 1261 int spincnt = 0;
1262 int more2boost; 1262 int more2boost;
1263 1263
1264 trace_rcu_utilization("Start boost kthread@init"); 1264 trace_rcu_utilization("Start boost kthread@init");
1265 for (;;) { 1265 for (;;) {
1266 rnp->boost_kthread_status = RCU_KTHREAD_WAITING; 1266 rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
1267 trace_rcu_utilization("End boost kthread@rcu_wait"); 1267 trace_rcu_utilization("End boost kthread@rcu_wait");
1268 rcu_wait(rnp->boost_tasks || rnp->exp_tasks); 1268 rcu_wait(rnp->boost_tasks || rnp->exp_tasks);
1269 trace_rcu_utilization("Start boost kthread@rcu_wait"); 1269 trace_rcu_utilization("Start boost kthread@rcu_wait");
1270 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; 1270 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
1271 more2boost = rcu_boost(rnp); 1271 more2boost = rcu_boost(rnp);
1272 if (more2boost) 1272 if (more2boost)
1273 spincnt++; 1273 spincnt++;
1274 else 1274 else
1275 spincnt = 0; 1275 spincnt = 0;
1276 if (spincnt > 10) { 1276 if (spincnt > 10) {
1277 trace_rcu_utilization("End boost kthread@rcu_yield"); 1277 trace_rcu_utilization("End boost kthread@rcu_yield");
1278 rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp); 1278 rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp);
1279 trace_rcu_utilization("Start boost kthread@rcu_yield"); 1279 trace_rcu_utilization("Start boost kthread@rcu_yield");
1280 spincnt = 0; 1280 spincnt = 0;
1281 } 1281 }
1282 } 1282 }
1283 /* NOTREACHED */ 1283 /* NOTREACHED */
1284 trace_rcu_utilization("End boost kthread@notreached"); 1284 trace_rcu_utilization("End boost kthread@notreached");
1285 return 0; 1285 return 0;
1286 } 1286 }
1287 1287
1288 /* 1288 /*
1289 * Check to see if it is time to start boosting RCU readers that are 1289 * Check to see if it is time to start boosting RCU readers that are
1290 * blocking the current grace period, and, if so, tell the per-rcu_node 1290 * blocking the current grace period, and, if so, tell the per-rcu_node
1291 * kthread to start boosting them. If there is an expedited grace 1291 * kthread to start boosting them. If there is an expedited grace
1292 * period in progress, it is always time to boost. 1292 * period in progress, it is always time to boost.
1293 * 1293 *
1294 * The caller must hold rnp->lock, which this function releases, 1294 * The caller must hold rnp->lock, which this function releases,
1295 * but irqs remain disabled. The ->boost_kthread_task is immortal, 1295 * but irqs remain disabled. The ->boost_kthread_task is immortal,
1296 * so we don't need to worry about it going away. 1296 * so we don't need to worry about it going away.
1297 */ 1297 */
1298 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) 1298 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1299 { 1299 {
1300 struct task_struct *t; 1300 struct task_struct *t;
1301 1301
1302 if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) { 1302 if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
1303 rnp->n_balk_exp_gp_tasks++; 1303 rnp->n_balk_exp_gp_tasks++;
1304 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1304 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1305 return; 1305 return;
1306 } 1306 }
1307 if (rnp->exp_tasks != NULL || 1307 if (rnp->exp_tasks != NULL ||
1308 (rnp->gp_tasks != NULL && 1308 (rnp->gp_tasks != NULL &&
1309 rnp->boost_tasks == NULL && 1309 rnp->boost_tasks == NULL &&
1310 rnp->qsmask == 0 && 1310 rnp->qsmask == 0 &&
1311 ULONG_CMP_GE(jiffies, rnp->boost_time))) { 1311 ULONG_CMP_GE(jiffies, rnp->boost_time))) {
1312 if (rnp->exp_tasks == NULL) 1312 if (rnp->exp_tasks == NULL)
1313 rnp->boost_tasks = rnp->gp_tasks; 1313 rnp->boost_tasks = rnp->gp_tasks;
1314 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1314 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1315 t = rnp->boost_kthread_task; 1315 t = rnp->boost_kthread_task;
1316 if (t != NULL) 1316 if (t != NULL)
1317 wake_up_process(t); 1317 wake_up_process(t);
1318 } else { 1318 } else {
1319 rcu_initiate_boost_trace(rnp); 1319 rcu_initiate_boost_trace(rnp);
1320 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1320 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1321 } 1321 }
1322 } 1322 }
1323 1323
1324 /* 1324 /*
1325 * Wake up the per-CPU kthread to invoke RCU callbacks. 1325 * Wake up the per-CPU kthread to invoke RCU callbacks.
1326 */ 1326 */
1327 static void invoke_rcu_callbacks_kthread(void) 1327 static void invoke_rcu_callbacks_kthread(void)
1328 { 1328 {
1329 unsigned long flags; 1329 unsigned long flags;
1330 1330
1331 local_irq_save(flags); 1331 local_irq_save(flags);
1332 __this_cpu_write(rcu_cpu_has_work, 1); 1332 __this_cpu_write(rcu_cpu_has_work, 1);
1333 if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && 1333 if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
1334 current != __this_cpu_read(rcu_cpu_kthread_task)) 1334 current != __this_cpu_read(rcu_cpu_kthread_task))
1335 wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); 1335 wake_up_process(__this_cpu_read(rcu_cpu_kthread_task));
1336 local_irq_restore(flags); 1336 local_irq_restore(flags);
1337 } 1337 }
1338 1338
1339 /* 1339 /*
1340 * Set the affinity of the boost kthread. The CPU-hotplug locks are 1340 * Set the affinity of the boost kthread. The CPU-hotplug locks are
1341 * held, so no one should be messing with the existence of the boost 1341 * held, so no one should be messing with the existence of the boost
1342 * kthread. 1342 * kthread.
1343 */ 1343 */
1344 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, 1344 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
1345 cpumask_var_t cm) 1345 cpumask_var_t cm)
1346 { 1346 {
1347 struct task_struct *t; 1347 struct task_struct *t;
1348 1348
1349 t = rnp->boost_kthread_task; 1349 t = rnp->boost_kthread_task;
1350 if (t != NULL) 1350 if (t != NULL)
1351 set_cpus_allowed_ptr(rnp->boost_kthread_task, cm); 1351 set_cpus_allowed_ptr(rnp->boost_kthread_task, cm);
1352 } 1352 }
1353 1353
1354 #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000) 1354 #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
1355 1355
1356 /* 1356 /*
1357 * Do priority-boost accounting for the start of a new grace period. 1357 * Do priority-boost accounting for the start of a new grace period.
1358 */ 1358 */
1359 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) 1359 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1360 { 1360 {
1361 rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES; 1361 rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
1362 } 1362 }
1363 1363
1364 /* 1364 /*
1365 * Create an RCU-boost kthread for the specified node if one does not 1365 * Create an RCU-boost kthread for the specified node if one does not
1366 * already exist. We only create this kthread for preemptible RCU. 1366 * already exist. We only create this kthread for preemptible RCU.
1367 * Returns zero if all is well, a negated errno otherwise. 1367 * Returns zero if all is well, a negated errno otherwise.
1368 */ 1368 */
1369 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, 1369 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1370 struct rcu_node *rnp, 1370 struct rcu_node *rnp,
1371 int rnp_index) 1371 int rnp_index)
1372 { 1372 {
1373 unsigned long flags; 1373 unsigned long flags;
1374 struct sched_param sp; 1374 struct sched_param sp;
1375 struct task_struct *t; 1375 struct task_struct *t;
1376 1376
1377 if (&rcu_preempt_state != rsp) 1377 if (&rcu_preempt_state != rsp)
1378 return 0; 1378 return 0;
1379 rsp->boost = 1; 1379 rsp->boost = 1;
1380 if (rnp->boost_kthread_task != NULL) 1380 if (rnp->boost_kthread_task != NULL)
1381 return 0; 1381 return 0;
1382 t = kthread_create(rcu_boost_kthread, (void *)rnp, 1382 t = kthread_create(rcu_boost_kthread, (void *)rnp,
1383 "rcub/%d", rnp_index); 1383 "rcub/%d", rnp_index);
1384 if (IS_ERR(t)) 1384 if (IS_ERR(t))
1385 return PTR_ERR(t); 1385 return PTR_ERR(t);
1386 raw_spin_lock_irqsave(&rnp->lock, flags); 1386 raw_spin_lock_irqsave(&rnp->lock, flags);
1387 rnp->boost_kthread_task = t; 1387 rnp->boost_kthread_task = t;
1388 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1388 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1389 sp.sched_priority = RCU_BOOST_PRIO; 1389 sp.sched_priority = RCU_BOOST_PRIO;
1390 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1390 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1391 wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ 1391 wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
1392 return 0; 1392 return 0;
1393 } 1393 }
1394 1394
1395 #ifdef CONFIG_HOTPLUG_CPU 1395 #ifdef CONFIG_HOTPLUG_CPU
1396 1396
1397 /* 1397 /*
1398 * Stop the RCU's per-CPU kthread when its CPU goes offline,. 1398 * Stop the RCU's per-CPU kthread when its CPU goes offline,.
1399 */ 1399 */
1400 static void rcu_stop_cpu_kthread(int cpu) 1400 static void rcu_stop_cpu_kthread(int cpu)
1401 { 1401 {
1402 struct task_struct *t; 1402 struct task_struct *t;
1403 1403
1404 /* Stop the CPU's kthread. */ 1404 /* Stop the CPU's kthread. */
1405 t = per_cpu(rcu_cpu_kthread_task, cpu); 1405 t = per_cpu(rcu_cpu_kthread_task, cpu);
1406 if (t != NULL) { 1406 if (t != NULL) {
1407 per_cpu(rcu_cpu_kthread_task, cpu) = NULL; 1407 per_cpu(rcu_cpu_kthread_task, cpu) = NULL;
1408 kthread_stop(t); 1408 kthread_stop(t);
1409 } 1409 }
1410 } 1410 }
1411 1411
1412 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 1412 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
1413 1413
1414 static void rcu_kthread_do_work(void) 1414 static void rcu_kthread_do_work(void)
1415 { 1415 {
1416 rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); 1416 rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data));
1417 rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); 1417 rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
1418 rcu_preempt_do_callbacks(); 1418 rcu_preempt_do_callbacks();
1419 } 1419 }
1420 1420
1421 /* 1421 /*
1422 * Wake up the specified per-rcu_node-structure kthread. 1422 * Wake up the specified per-rcu_node-structure kthread.
1423 * Because the per-rcu_node kthreads are immortal, we don't need 1423 * Because the per-rcu_node kthreads are immortal, we don't need
1424 * to do anything to keep them alive. 1424 * to do anything to keep them alive.
1425 */ 1425 */
1426 static void invoke_rcu_node_kthread(struct rcu_node *rnp) 1426 static void invoke_rcu_node_kthread(struct rcu_node *rnp)
1427 { 1427 {
1428 struct task_struct *t; 1428 struct task_struct *t;
1429 1429
1430 t = rnp->node_kthread_task; 1430 t = rnp->node_kthread_task;
1431 if (t != NULL) 1431 if (t != NULL)
1432 wake_up_process(t); 1432 wake_up_process(t);
1433 } 1433 }
1434 1434
1435 /* 1435 /*
1436 * Set the specified CPU's kthread to run RT or not, as specified by 1436 * Set the specified CPU's kthread to run RT or not, as specified by
1437 * the to_rt argument. The CPU-hotplug locks are held, so the task 1437 * the to_rt argument. The CPU-hotplug locks are held, so the task
1438 * is not going away. 1438 * is not going away.
1439 */ 1439 */
1440 static void rcu_cpu_kthread_setrt(int cpu, int to_rt) 1440 static void rcu_cpu_kthread_setrt(int cpu, int to_rt)
1441 { 1441 {
1442 int policy; 1442 int policy;
1443 struct sched_param sp; 1443 struct sched_param sp;
1444 struct task_struct *t; 1444 struct task_struct *t;
1445 1445
1446 t = per_cpu(rcu_cpu_kthread_task, cpu); 1446 t = per_cpu(rcu_cpu_kthread_task, cpu);
1447 if (t == NULL) 1447 if (t == NULL)
1448 return; 1448 return;
1449 if (to_rt) { 1449 if (to_rt) {
1450 policy = SCHED_FIFO; 1450 policy = SCHED_FIFO;
1451 sp.sched_priority = RCU_KTHREAD_PRIO; 1451 sp.sched_priority = RCU_KTHREAD_PRIO;
1452 } else { 1452 } else {
1453 policy = SCHED_NORMAL; 1453 policy = SCHED_NORMAL;
1454 sp.sched_priority = 0; 1454 sp.sched_priority = 0;
1455 } 1455 }
1456 sched_setscheduler_nocheck(t, policy, &sp); 1456 sched_setscheduler_nocheck(t, policy, &sp);
1457 } 1457 }
1458 1458
1459 /* 1459 /*
1460 * Timer handler to initiate the waking up of per-CPU kthreads that 1460 * Timer handler to initiate the waking up of per-CPU kthreads that
1461 * have yielded the CPU due to excess numbers of RCU callbacks. 1461 * have yielded the CPU due to excess numbers of RCU callbacks.
1462 * We wake up the per-rcu_node kthread, which in turn will wake up 1462 * We wake up the per-rcu_node kthread, which in turn will wake up
1463 * the booster kthread. 1463 * the booster kthread.
1464 */ 1464 */
1465 static void rcu_cpu_kthread_timer(unsigned long arg) 1465 static void rcu_cpu_kthread_timer(unsigned long arg)
1466 { 1466 {
1467 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); 1467 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg);
1468 struct rcu_node *rnp = rdp->mynode; 1468 struct rcu_node *rnp = rdp->mynode;
1469 1469
1470 atomic_or(rdp->grpmask, &rnp->wakemask); 1470 atomic_or(rdp->grpmask, &rnp->wakemask);
1471 invoke_rcu_node_kthread(rnp); 1471 invoke_rcu_node_kthread(rnp);
1472 } 1472 }
1473 1473
1474 /* 1474 /*
1475 * Drop to non-real-time priority and yield, but only after posting a 1475 * Drop to non-real-time priority and yield, but only after posting a
1476 * timer that will cause us to regain our real-time priority if we 1476 * timer that will cause us to regain our real-time priority if we
1477 * remain preempted. Either way, we restore our real-time priority 1477 * remain preempted. Either way, we restore our real-time priority
1478 * before returning. 1478 * before returning.
1479 */ 1479 */
1480 static void rcu_yield(void (*f)(unsigned long), unsigned long arg) 1480 static void rcu_yield(void (*f)(unsigned long), unsigned long arg)
1481 { 1481 {
1482 struct sched_param sp; 1482 struct sched_param sp;
1483 struct timer_list yield_timer; 1483 struct timer_list yield_timer;
1484 int prio = current->rt_priority; 1484 int prio = current->rt_priority;
1485 1485
1486 setup_timer_on_stack(&yield_timer, f, arg); 1486 setup_timer_on_stack(&yield_timer, f, arg);
1487 mod_timer(&yield_timer, jiffies + 2); 1487 mod_timer(&yield_timer, jiffies + 2);
1488 sp.sched_priority = 0; 1488 sp.sched_priority = 0;
1489 sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); 1489 sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp);
1490 set_user_nice(current, 19); 1490 set_user_nice(current, 19);
1491 schedule(); 1491 schedule();
1492 set_user_nice(current, 0); 1492 set_user_nice(current, 0);
1493 sp.sched_priority = prio; 1493 sp.sched_priority = prio;
1494 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1494 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1495 del_timer(&yield_timer); 1495 del_timer(&yield_timer);
1496 } 1496 }
1497 1497
1498 /* 1498 /*
1499 * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU. 1499 * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU.
1500 * This can happen while the corresponding CPU is either coming online 1500 * This can happen while the corresponding CPU is either coming online
1501 * or going offline. We cannot wait until the CPU is fully online 1501 * or going offline. We cannot wait until the CPU is fully online
1502 * before starting the kthread, because the various notifier functions 1502 * before starting the kthread, because the various notifier functions
1503 * can wait for RCU grace periods. So we park rcu_cpu_kthread() until 1503 * can wait for RCU grace periods. So we park rcu_cpu_kthread() until
1504 * the corresponding CPU is online. 1504 * the corresponding CPU is online.
1505 * 1505 *
1506 * Return 1 if the kthread needs to stop, 0 otherwise. 1506 * Return 1 if the kthread needs to stop, 0 otherwise.
1507 * 1507 *
1508 * Caller must disable bh. This function can momentarily enable it. 1508 * Caller must disable bh. This function can momentarily enable it.
1509 */ 1509 */
1510 static int rcu_cpu_kthread_should_stop(int cpu) 1510 static int rcu_cpu_kthread_should_stop(int cpu)
1511 { 1511 {
1512 while (cpu_is_offline(cpu) || 1512 while (cpu_is_offline(cpu) ||
1513 !cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)) || 1513 !cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)) ||
1514 smp_processor_id() != cpu) { 1514 smp_processor_id() != cpu) {
1515 if (kthread_should_stop()) 1515 if (kthread_should_stop())
1516 return 1; 1516 return 1;
1517 per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; 1517 per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
1518 per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id(); 1518 per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id();
1519 local_bh_enable(); 1519 local_bh_enable();
1520 schedule_timeout_uninterruptible(1); 1520 schedule_timeout_uninterruptible(1);
1521 if (!cpumask_equal(&current->cpus_allowed, cpumask_of(cpu))) 1521 if (!cpumask_equal(&current->cpus_allowed, cpumask_of(cpu)))
1522 set_cpus_allowed_ptr(current, cpumask_of(cpu)); 1522 set_cpus_allowed_ptr(current, cpumask_of(cpu));
1523 local_bh_disable(); 1523 local_bh_disable();
1524 } 1524 }
1525 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; 1525 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
1526 return 0; 1526 return 0;
1527 } 1527 }
1528 1528
1529 /* 1529 /*
1530 * Per-CPU kernel thread that invokes RCU callbacks. This replaces the 1530 * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
1531 * RCU softirq used in flavors and configurations of RCU that do not 1531 * RCU softirq used in flavors and configurations of RCU that do not
1532 * support RCU priority boosting. 1532 * support RCU priority boosting.
1533 */ 1533 */
1534 static int rcu_cpu_kthread(void *arg) 1534 static int rcu_cpu_kthread(void *arg)
1535 { 1535 {
1536 int cpu = (int)(long)arg; 1536 int cpu = (int)(long)arg;
1537 unsigned long flags; 1537 unsigned long flags;
1538 int spincnt = 0; 1538 int spincnt = 0;
1539 unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); 1539 unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu);
1540 char work; 1540 char work;
1541 char *workp = &per_cpu(rcu_cpu_has_work, cpu); 1541 char *workp = &per_cpu(rcu_cpu_has_work, cpu);
1542 1542
1543 trace_rcu_utilization("Start CPU kthread@init"); 1543 trace_rcu_utilization("Start CPU kthread@init");
1544 for (;;) { 1544 for (;;) {
1545 *statusp = RCU_KTHREAD_WAITING; 1545 *statusp = RCU_KTHREAD_WAITING;
1546 trace_rcu_utilization("End CPU kthread@rcu_wait"); 1546 trace_rcu_utilization("End CPU kthread@rcu_wait");
1547 rcu_wait(*workp != 0 || kthread_should_stop()); 1547 rcu_wait(*workp != 0 || kthread_should_stop());
1548 trace_rcu_utilization("Start CPU kthread@rcu_wait"); 1548 trace_rcu_utilization("Start CPU kthread@rcu_wait");
1549 local_bh_disable(); 1549 local_bh_disable();
1550 if (rcu_cpu_kthread_should_stop(cpu)) { 1550 if (rcu_cpu_kthread_should_stop(cpu)) {
1551 local_bh_enable(); 1551 local_bh_enable();
1552 break; 1552 break;
1553 } 1553 }
1554 *statusp = RCU_KTHREAD_RUNNING; 1554 *statusp = RCU_KTHREAD_RUNNING;
1555 per_cpu(rcu_cpu_kthread_loops, cpu)++; 1555 per_cpu(rcu_cpu_kthread_loops, cpu)++;
1556 local_irq_save(flags); 1556 local_irq_save(flags);
1557 work = *workp; 1557 work = *workp;
1558 *workp = 0; 1558 *workp = 0;
1559 local_irq_restore(flags); 1559 local_irq_restore(flags);
1560 if (work) 1560 if (work)
1561 rcu_kthread_do_work(); 1561 rcu_kthread_do_work();
1562 local_bh_enable(); 1562 local_bh_enable();
1563 if (*workp != 0) 1563 if (*workp != 0)
1564 spincnt++; 1564 spincnt++;
1565 else 1565 else
1566 spincnt = 0; 1566 spincnt = 0;
1567 if (spincnt > 10) { 1567 if (spincnt > 10) {
1568 *statusp = RCU_KTHREAD_YIELDING; 1568 *statusp = RCU_KTHREAD_YIELDING;
1569 trace_rcu_utilization("End CPU kthread@rcu_yield"); 1569 trace_rcu_utilization("End CPU kthread@rcu_yield");
1570 rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); 1570 rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu);
1571 trace_rcu_utilization("Start CPU kthread@rcu_yield"); 1571 trace_rcu_utilization("Start CPU kthread@rcu_yield");
1572 spincnt = 0; 1572 spincnt = 0;
1573 } 1573 }
1574 } 1574 }
1575 *statusp = RCU_KTHREAD_STOPPED; 1575 *statusp = RCU_KTHREAD_STOPPED;
1576 trace_rcu_utilization("End CPU kthread@term"); 1576 trace_rcu_utilization("End CPU kthread@term");
1577 return 0; 1577 return 0;
1578 } 1578 }
1579 1579
1580 /* 1580 /*
1581 * Spawn a per-CPU kthread, setting up affinity and priority. 1581 * Spawn a per-CPU kthread, setting up affinity and priority.
1582 * Because the CPU hotplug lock is held, no other CPU will be attempting 1582 * Because the CPU hotplug lock is held, no other CPU will be attempting
1583 * to manipulate rcu_cpu_kthread_task. There might be another CPU 1583 * to manipulate rcu_cpu_kthread_task. There might be another CPU
1584 * attempting to access it during boot, but the locking in kthread_bind() 1584 * attempting to access it during boot, but the locking in kthread_bind()
1585 * will enforce sufficient ordering. 1585 * will enforce sufficient ordering.
1586 * 1586 *
1587 * Please note that we cannot simply refuse to wake up the per-CPU 1587 * Please note that we cannot simply refuse to wake up the per-CPU
1588 * kthread because kthreads are created in TASK_UNINTERRUPTIBLE state, 1588 * kthread because kthreads are created in TASK_UNINTERRUPTIBLE state,
1589 * which can result in softlockup complaints if the task ends up being 1589 * which can result in softlockup complaints if the task ends up being
1590 * idle for more than a couple of minutes. 1590 * idle for more than a couple of minutes.
1591 * 1591 *
1592 * However, please note also that we cannot bind the per-CPU kthread to its 1592 * However, please note also that we cannot bind the per-CPU kthread to its
1593 * CPU until that CPU is fully online. We also cannot wait until the 1593 * CPU until that CPU is fully online. We also cannot wait until the
1594 * CPU is fully online before we create its per-CPU kthread, as this would 1594 * CPU is fully online before we create its per-CPU kthread, as this would
1595 * deadlock the system when CPU notifiers tried waiting for grace 1595 * deadlock the system when CPU notifiers tried waiting for grace
1596 * periods. So we bind the per-CPU kthread to its CPU only if the CPU 1596 * periods. So we bind the per-CPU kthread to its CPU only if the CPU
1597 * is online. If its CPU is not yet fully online, then the code in 1597 * is online. If its CPU is not yet fully online, then the code in
1598 * rcu_cpu_kthread() will wait until it is fully online, and then do 1598 * rcu_cpu_kthread() will wait until it is fully online, and then do
1599 * the binding. 1599 * the binding.
1600 */ 1600 */
1601 static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) 1601 static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
1602 { 1602 {
1603 struct sched_param sp; 1603 struct sched_param sp;
1604 struct task_struct *t; 1604 struct task_struct *t;
1605 1605
1606 if (!rcu_scheduler_fully_active || 1606 if (!rcu_scheduler_fully_active ||
1607 per_cpu(rcu_cpu_kthread_task, cpu) != NULL) 1607 per_cpu(rcu_cpu_kthread_task, cpu) != NULL)
1608 return 0; 1608 return 0;
1609 t = kthread_create_on_node(rcu_cpu_kthread, 1609 t = kthread_create_on_node(rcu_cpu_kthread,
1610 (void *)(long)cpu, 1610 (void *)(long)cpu,
1611 cpu_to_node(cpu), 1611 cpu_to_node(cpu),
1612 "rcuc/%d", cpu); 1612 "rcuc/%d", cpu);
1613 if (IS_ERR(t)) 1613 if (IS_ERR(t))
1614 return PTR_ERR(t); 1614 return PTR_ERR(t);
1615 if (cpu_online(cpu)) 1615 if (cpu_online(cpu))
1616 kthread_bind(t, cpu); 1616 kthread_bind(t, cpu);
1617 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; 1617 per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
1618 WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); 1618 WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
1619 sp.sched_priority = RCU_KTHREAD_PRIO; 1619 sp.sched_priority = RCU_KTHREAD_PRIO;
1620 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1620 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1621 per_cpu(rcu_cpu_kthread_task, cpu) = t; 1621 per_cpu(rcu_cpu_kthread_task, cpu) = t;
1622 wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */ 1622 wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */
1623 return 0; 1623 return 0;
1624 } 1624 }
1625 1625
1626 /* 1626 /*
1627 * Per-rcu_node kthread, which is in charge of waking up the per-CPU 1627 * Per-rcu_node kthread, which is in charge of waking up the per-CPU
1628 * kthreads when needed. We ignore requests to wake up kthreads 1628 * kthreads when needed. We ignore requests to wake up kthreads
1629 * for offline CPUs, which is OK because force_quiescent_state() 1629 * for offline CPUs, which is OK because force_quiescent_state()
1630 * takes care of this case. 1630 * takes care of this case.
1631 */ 1631 */
1632 static int rcu_node_kthread(void *arg) 1632 static int rcu_node_kthread(void *arg)
1633 { 1633 {
1634 int cpu; 1634 int cpu;
1635 unsigned long flags; 1635 unsigned long flags;
1636 unsigned long mask; 1636 unsigned long mask;
1637 struct rcu_node *rnp = (struct rcu_node *)arg; 1637 struct rcu_node *rnp = (struct rcu_node *)arg;
1638 struct sched_param sp; 1638 struct sched_param sp;
1639 struct task_struct *t; 1639 struct task_struct *t;
1640 1640
1641 for (;;) { 1641 for (;;) {
1642 rnp->node_kthread_status = RCU_KTHREAD_WAITING; 1642 rnp->node_kthread_status = RCU_KTHREAD_WAITING;
1643 rcu_wait(atomic_read(&rnp->wakemask) != 0); 1643 rcu_wait(atomic_read(&rnp->wakemask) != 0);
1644 rnp->node_kthread_status = RCU_KTHREAD_RUNNING; 1644 rnp->node_kthread_status = RCU_KTHREAD_RUNNING;
1645 raw_spin_lock_irqsave(&rnp->lock, flags); 1645 raw_spin_lock_irqsave(&rnp->lock, flags);
1646 mask = atomic_xchg(&rnp->wakemask, 0); 1646 mask = atomic_xchg(&rnp->wakemask, 0);
1647 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ 1647 rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */
1648 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { 1648 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) {
1649 if ((mask & 0x1) == 0) 1649 if ((mask & 0x1) == 0)
1650 continue; 1650 continue;
1651 preempt_disable(); 1651 preempt_disable();
1652 t = per_cpu(rcu_cpu_kthread_task, cpu); 1652 t = per_cpu(rcu_cpu_kthread_task, cpu);
1653 if (!cpu_online(cpu) || t == NULL) { 1653 if (!cpu_online(cpu) || t == NULL) {
1654 preempt_enable(); 1654 preempt_enable();
1655 continue; 1655 continue;
1656 } 1656 }
1657 per_cpu(rcu_cpu_has_work, cpu) = 1; 1657 per_cpu(rcu_cpu_has_work, cpu) = 1;
1658 sp.sched_priority = RCU_KTHREAD_PRIO; 1658 sp.sched_priority = RCU_KTHREAD_PRIO;
1659 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1659 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1660 preempt_enable(); 1660 preempt_enable();
1661 } 1661 }
1662 } 1662 }
1663 /* NOTREACHED */ 1663 /* NOTREACHED */
1664 rnp->node_kthread_status = RCU_KTHREAD_STOPPED; 1664 rnp->node_kthread_status = RCU_KTHREAD_STOPPED;
1665 return 0; 1665 return 0;
1666 } 1666 }
1667 1667
1668 /* 1668 /*
1669 * Set the per-rcu_node kthread's affinity to cover all CPUs that are 1669 * Set the per-rcu_node kthread's affinity to cover all CPUs that are
1670 * served by the rcu_node in question. The CPU hotplug lock is still 1670 * served by the rcu_node in question. The CPU hotplug lock is still
1671 * held, so the value of rnp->qsmaskinit will be stable. 1671 * held, so the value of rnp->qsmaskinit will be stable.
1672 * 1672 *
1673 * We don't include outgoingcpu in the affinity set, use -1 if there is 1673 * We don't include outgoingcpu in the affinity set, use -1 if there is
1674 * no outgoing CPU. If there are no CPUs left in the affinity set, 1674 * no outgoing CPU. If there are no CPUs left in the affinity set,
1675 * this function allows the kthread to execute on any CPU. 1675 * this function allows the kthread to execute on any CPU.
1676 */ 1676 */
1677 static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) 1677 static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1678 { 1678 {
1679 cpumask_var_t cm; 1679 cpumask_var_t cm;
1680 int cpu; 1680 int cpu;
1681 unsigned long mask = rnp->qsmaskinit; 1681 unsigned long mask = rnp->qsmaskinit;
1682 1682
1683 if (rnp->node_kthread_task == NULL) 1683 if (rnp->node_kthread_task == NULL)
1684 return; 1684 return;
1685 if (!alloc_cpumask_var(&cm, GFP_KERNEL)) 1685 if (!alloc_cpumask_var(&cm, GFP_KERNEL))
1686 return; 1686 return;
1687 cpumask_clear(cm); 1687 cpumask_clear(cm);
1688 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) 1688 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
1689 if ((mask & 0x1) && cpu != outgoingcpu) 1689 if ((mask & 0x1) && cpu != outgoingcpu)
1690 cpumask_set_cpu(cpu, cm); 1690 cpumask_set_cpu(cpu, cm);
1691 if (cpumask_weight(cm) == 0) { 1691 if (cpumask_weight(cm) == 0) {
1692 cpumask_setall(cm); 1692 cpumask_setall(cm);
1693 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) 1693 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++)
1694 cpumask_clear_cpu(cpu, cm); 1694 cpumask_clear_cpu(cpu, cm);
1695 WARN_ON_ONCE(cpumask_weight(cm) == 0); 1695 WARN_ON_ONCE(cpumask_weight(cm) == 0);
1696 } 1696 }
1697 set_cpus_allowed_ptr(rnp->node_kthread_task, cm); 1697 set_cpus_allowed_ptr(rnp->node_kthread_task, cm);
1698 rcu_boost_kthread_setaffinity(rnp, cm); 1698 rcu_boost_kthread_setaffinity(rnp, cm);
1699 free_cpumask_var(cm); 1699 free_cpumask_var(cm);
1700 } 1700 }
1701 1701
1702 /* 1702 /*
1703 * Spawn a per-rcu_node kthread, setting priority and affinity. 1703 * Spawn a per-rcu_node kthread, setting priority and affinity.
1704 * Called during boot before online/offline can happen, or, if 1704 * Called during boot before online/offline can happen, or, if
1705 * during runtime, with the main CPU-hotplug locks held. So only 1705 * during runtime, with the main CPU-hotplug locks held. So only
1706 * one of these can be executing at a time. 1706 * one of these can be executing at a time.
1707 */ 1707 */
1708 static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, 1708 static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
1709 struct rcu_node *rnp) 1709 struct rcu_node *rnp)
1710 { 1710 {
1711 unsigned long flags; 1711 unsigned long flags;
1712 int rnp_index = rnp - &rsp->node[0]; 1712 int rnp_index = rnp - &rsp->node[0];
1713 struct sched_param sp; 1713 struct sched_param sp;
1714 struct task_struct *t; 1714 struct task_struct *t;
1715 1715
1716 if (!rcu_scheduler_fully_active || 1716 if (!rcu_scheduler_fully_active ||
1717 rnp->qsmaskinit == 0) 1717 rnp->qsmaskinit == 0)
1718 return 0; 1718 return 0;
1719 if (rnp->node_kthread_task == NULL) { 1719 if (rnp->node_kthread_task == NULL) {
1720 t = kthread_create(rcu_node_kthread, (void *)rnp, 1720 t = kthread_create(rcu_node_kthread, (void *)rnp,
1721 "rcun/%d", rnp_index); 1721 "rcun/%d", rnp_index);
1722 if (IS_ERR(t)) 1722 if (IS_ERR(t))
1723 return PTR_ERR(t); 1723 return PTR_ERR(t);
1724 raw_spin_lock_irqsave(&rnp->lock, flags); 1724 raw_spin_lock_irqsave(&rnp->lock, flags);
1725 rnp->node_kthread_task = t; 1725 rnp->node_kthread_task = t;
1726 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1726 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1727 sp.sched_priority = 99; 1727 sp.sched_priority = 99;
1728 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1728 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1729 wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ 1729 wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
1730 } 1730 }
1731 return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); 1731 return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index);
1732 } 1732 }
1733 1733
1734 /* 1734 /*
1735 * Spawn all kthreads -- called as soon as the scheduler is running. 1735 * Spawn all kthreads -- called as soon as the scheduler is running.
1736 */ 1736 */
1737 static int __init rcu_spawn_kthreads(void) 1737 static int __init rcu_spawn_kthreads(void)
1738 { 1738 {
1739 int cpu; 1739 int cpu;
1740 struct rcu_node *rnp; 1740 struct rcu_node *rnp;
1741 1741
1742 rcu_scheduler_fully_active = 1; 1742 rcu_scheduler_fully_active = 1;
1743 for_each_possible_cpu(cpu) { 1743 for_each_possible_cpu(cpu) {
1744 per_cpu(rcu_cpu_has_work, cpu) = 0; 1744 per_cpu(rcu_cpu_has_work, cpu) = 0;
1745 if (cpu_online(cpu)) 1745 if (cpu_online(cpu))
1746 (void)rcu_spawn_one_cpu_kthread(cpu); 1746 (void)rcu_spawn_one_cpu_kthread(cpu);
1747 } 1747 }
1748 rnp = rcu_get_root(rcu_state); 1748 rnp = rcu_get_root(rcu_state);
1749 (void)rcu_spawn_one_node_kthread(rcu_state, rnp); 1749 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1750 if (NUM_RCU_NODES > 1) { 1750 if (NUM_RCU_NODES > 1) {
1751 rcu_for_each_leaf_node(rcu_state, rnp) 1751 rcu_for_each_leaf_node(rcu_state, rnp)
1752 (void)rcu_spawn_one_node_kthread(rcu_state, rnp); 1752 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1753 } 1753 }
1754 return 0; 1754 return 0;
1755 } 1755 }
1756 early_initcall(rcu_spawn_kthreads); 1756 early_initcall(rcu_spawn_kthreads);
1757 1757
1758 static void __cpuinit rcu_prepare_kthreads(int cpu) 1758 static void __cpuinit rcu_prepare_kthreads(int cpu)
1759 { 1759 {
1760 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 1760 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
1761 struct rcu_node *rnp = rdp->mynode; 1761 struct rcu_node *rnp = rdp->mynode;
1762 1762
1763 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ 1763 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
1764 if (rcu_scheduler_fully_active) { 1764 if (rcu_scheduler_fully_active) {
1765 (void)rcu_spawn_one_cpu_kthread(cpu); 1765 (void)rcu_spawn_one_cpu_kthread(cpu);
1766 if (rnp->node_kthread_task == NULL) 1766 if (rnp->node_kthread_task == NULL)
1767 (void)rcu_spawn_one_node_kthread(rcu_state, rnp); 1767 (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
1768 } 1768 }
1769 } 1769 }
1770 1770
1771 #else /* #ifdef CONFIG_RCU_BOOST */ 1771 #else /* #ifdef CONFIG_RCU_BOOST */
1772 1772
1773 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) 1773 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1774 { 1774 {
1775 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1775 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1776 } 1776 }
1777 1777
1778 static void invoke_rcu_callbacks_kthread(void) 1778 static void invoke_rcu_callbacks_kthread(void)
1779 { 1779 {
1780 WARN_ON_ONCE(1); 1780 WARN_ON_ONCE(1);
1781 } 1781 }
1782 1782
1783 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) 1783 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1784 { 1784 {
1785 } 1785 }
1786 1786
1787 #ifdef CONFIG_HOTPLUG_CPU 1787 #ifdef CONFIG_HOTPLUG_CPU
1788 1788
1789 static void rcu_stop_cpu_kthread(int cpu) 1789 static void rcu_stop_cpu_kthread(int cpu)
1790 { 1790 {
1791 } 1791 }
1792 1792
1793 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 1793 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
1794 1794
1795 static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) 1795 static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1796 { 1796 {
1797 } 1797 }
1798 1798
1799 static void rcu_cpu_kthread_setrt(int cpu, int to_rt) 1799 static void rcu_cpu_kthread_setrt(int cpu, int to_rt)
1800 { 1800 {
1801 } 1801 }
1802 1802
1803 static int __init rcu_scheduler_really_started(void) 1803 static int __init rcu_scheduler_really_started(void)
1804 { 1804 {
1805 rcu_scheduler_fully_active = 1; 1805 rcu_scheduler_fully_active = 1;
1806 return 0; 1806 return 0;
1807 } 1807 }
1808 early_initcall(rcu_scheduler_really_started); 1808 early_initcall(rcu_scheduler_really_started);
1809 1809
1810 static void __cpuinit rcu_prepare_kthreads(int cpu) 1810 static void __cpuinit rcu_prepare_kthreads(int cpu)
1811 { 1811 {
1812 } 1812 }
1813 1813
1814 #endif /* #else #ifdef CONFIG_RCU_BOOST */ 1814 #endif /* #else #ifdef CONFIG_RCU_BOOST */
1815 1815
1816 #ifndef CONFIG_SMP 1816 #ifndef CONFIG_SMP
1817 1817
1818 void synchronize_sched_expedited(void) 1818 void synchronize_sched_expedited(void)
1819 { 1819 {
1820 cond_resched(); 1820 cond_resched();
1821 } 1821 }
1822 EXPORT_SYMBOL_GPL(synchronize_sched_expedited); 1822 EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
1823 1823
1824 #else /* #ifndef CONFIG_SMP */ 1824 #else /* #ifndef CONFIG_SMP */
1825 1825
1826 static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0); 1826 static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
1827 static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0); 1827 static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
1828 1828
1829 static int synchronize_sched_expedited_cpu_stop(void *data) 1829 static int synchronize_sched_expedited_cpu_stop(void *data)
1830 { 1830 {
1831 /* 1831 /*
1832 * There must be a full memory barrier on each affected CPU 1832 * There must be a full memory barrier on each affected CPU
1833 * between the time that try_stop_cpus() is called and the 1833 * between the time that try_stop_cpus() is called and the
1834 * time that it returns. 1834 * time that it returns.
1835 * 1835 *
1836 * In the current initial implementation of cpu_stop, the 1836 * In the current initial implementation of cpu_stop, the
1837 * above condition is already met when the control reaches 1837 * above condition is already met when the control reaches
1838 * this point and the following smp_mb() is not strictly 1838 * this point and the following smp_mb() is not strictly
1839 * necessary. Do smp_mb() anyway for documentation and 1839 * necessary. Do smp_mb() anyway for documentation and
1840 * robustness against future implementation changes. 1840 * robustness against future implementation changes.
1841 */ 1841 */
1842 smp_mb(); /* See above comment block. */ 1842 smp_mb(); /* See above comment block. */
1843 return 0; 1843 return 0;
1844 } 1844 }
1845 1845
1846 /* 1846 /*
1847 * Wait for an rcu-sched grace period to elapse, but use "big hammer" 1847 * Wait for an rcu-sched grace period to elapse, but use "big hammer"
1848 * approach to force grace period to end quickly. This consumes 1848 * approach to force grace period to end quickly. This consumes
1849 * significant time on all CPUs, and is thus not recommended for 1849 * significant time on all CPUs, and is thus not recommended for
1850 * any sort of common-case code. 1850 * any sort of common-case code.
1851 * 1851 *
1852 * Note that it is illegal to call this function while holding any 1852 * Note that it is illegal to call this function while holding any
1853 * lock that is acquired by a CPU-hotplug notifier. Failing to 1853 * lock that is acquired by a CPU-hotplug notifier. Failing to
1854 * observe this restriction will result in deadlock. 1854 * observe this restriction will result in deadlock.
1855 * 1855 *
1856 * This implementation can be thought of as an application of ticket 1856 * This implementation can be thought of as an application of ticket
1857 * locking to RCU, with sync_sched_expedited_started and 1857 * locking to RCU, with sync_sched_expedited_started and
1858 * sync_sched_expedited_done taking on the roles of the halves 1858 * sync_sched_expedited_done taking on the roles of the halves
1859 * of the ticket-lock word. Each task atomically increments 1859 * of the ticket-lock word. Each task atomically increments
1860 * sync_sched_expedited_started upon entry, snapshotting the old value, 1860 * sync_sched_expedited_started upon entry, snapshotting the old value,
1861 * then attempts to stop all the CPUs. If this succeeds, then each 1861 * then attempts to stop all the CPUs. If this succeeds, then each
1862 * CPU will have executed a context switch, resulting in an RCU-sched 1862 * CPU will have executed a context switch, resulting in an RCU-sched
1863 * grace period. We are then done, so we use atomic_cmpxchg() to 1863 * grace period. We are then done, so we use atomic_cmpxchg() to
1864 * update sync_sched_expedited_done to match our snapshot -- but 1864 * update sync_sched_expedited_done to match our snapshot -- but
1865 * only if someone else has not already advanced past our snapshot. 1865 * only if someone else has not already advanced past our snapshot.
1866 * 1866 *
1867 * On the other hand, if try_stop_cpus() fails, we check the value 1867 * On the other hand, if try_stop_cpus() fails, we check the value
1868 * of sync_sched_expedited_done. If it has advanced past our 1868 * of sync_sched_expedited_done. If it has advanced past our
1869 * initial snapshot, then someone else must have forced a grace period 1869 * initial snapshot, then someone else must have forced a grace period
1870 * some time after we took our snapshot. In this case, our work is 1870 * some time after we took our snapshot. In this case, our work is
1871 * done for us, and we can simply return. Otherwise, we try again, 1871 * done for us, and we can simply return. Otherwise, we try again,
1872 * but keep our initial snapshot for purposes of checking for someone 1872 * but keep our initial snapshot for purposes of checking for someone
1873 * doing our work for us. 1873 * doing our work for us.
1874 * 1874 *
1875 * If we fail too many times in a row, we fall back to synchronize_sched(). 1875 * If we fail too many times in a row, we fall back to synchronize_sched().
1876 */ 1876 */
1877 void synchronize_sched_expedited(void) 1877 void synchronize_sched_expedited(void)
1878 { 1878 {
1879 int firstsnap, s, snap, trycount = 0; 1879 int firstsnap, s, snap, trycount = 0;
1880 1880
1881 /* Note that atomic_inc_return() implies full memory barrier. */ 1881 /* Note that atomic_inc_return() implies full memory barrier. */
1882 firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started); 1882 firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started);
1883 get_online_cpus(); 1883 get_online_cpus();
1884 1884
1885 /* 1885 /*
1886 * Each pass through the following loop attempts to force a 1886 * Each pass through the following loop attempts to force a
1887 * context switch on each CPU. 1887 * context switch on each CPU.
1888 */ 1888 */
1889 while (try_stop_cpus(cpu_online_mask, 1889 while (try_stop_cpus(cpu_online_mask,
1890 synchronize_sched_expedited_cpu_stop, 1890 synchronize_sched_expedited_cpu_stop,
1891 NULL) == -EAGAIN) { 1891 NULL) == -EAGAIN) {
1892 put_online_cpus(); 1892 put_online_cpus();
1893 1893
1894 /* No joy, try again later. Or just synchronize_sched(). */ 1894 /* No joy, try again later. Or just synchronize_sched(). */
1895 if (trycount++ < 10) 1895 if (trycount++ < 10)
1896 udelay(trycount * num_online_cpus()); 1896 udelay(trycount * num_online_cpus());
1897 else { 1897 else {
1898 synchronize_sched(); 1898 synchronize_sched();
1899 return; 1899 return;
1900 } 1900 }
1901 1901
1902 /* Check to see if someone else did our work for us. */ 1902 /* Check to see if someone else did our work for us. */
1903 s = atomic_read(&sync_sched_expedited_done); 1903 s = atomic_read(&sync_sched_expedited_done);
1904 if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) { 1904 if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) {
1905 smp_mb(); /* ensure test happens before caller kfree */ 1905 smp_mb(); /* ensure test happens before caller kfree */
1906 return; 1906 return;
1907 } 1907 }
1908 1908
1909 /* 1909 /*
1910 * Refetching sync_sched_expedited_started allows later 1910 * Refetching sync_sched_expedited_started allows later
1911 * callers to piggyback on our grace period. We subtract 1911 * callers to piggyback on our grace period. We subtract
1912 * 1 to get the same token that the last incrementer got. 1912 * 1 to get the same token that the last incrementer got.
1913 * We retry after they started, so our grace period works 1913 * We retry after they started, so our grace period works
1914 * for them, and they started after our first try, so their 1914 * for them, and they started after our first try, so their
1915 * grace period works for us. 1915 * grace period works for us.
1916 */ 1916 */
1917 get_online_cpus(); 1917 get_online_cpus();
1918 snap = atomic_read(&sync_sched_expedited_started); 1918 snap = atomic_read(&sync_sched_expedited_started);
1919 smp_mb(); /* ensure read is before try_stop_cpus(). */ 1919 smp_mb(); /* ensure read is before try_stop_cpus(). */
1920 } 1920 }
1921 1921
1922 /* 1922 /*
1923 * Everyone up to our most recent fetch is covered by our grace 1923 * Everyone up to our most recent fetch is covered by our grace
1924 * period. Update the counter, but only if our work is still 1924 * period. Update the counter, but only if our work is still
1925 * relevant -- which it won't be if someone who started later 1925 * relevant -- which it won't be if someone who started later
1926 * than we did beat us to the punch. 1926 * than we did beat us to the punch.
1927 */ 1927 */
1928 do { 1928 do {
1929 s = atomic_read(&sync_sched_expedited_done); 1929 s = atomic_read(&sync_sched_expedited_done);
1930 if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) { 1930 if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) {
1931 smp_mb(); /* ensure test happens before caller kfree */ 1931 smp_mb(); /* ensure test happens before caller kfree */
1932 break; 1932 break;
1933 } 1933 }
1934 } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s); 1934 } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s);
1935 1935
1936 put_online_cpus(); 1936 put_online_cpus();
1937 } 1937 }
1938 EXPORT_SYMBOL_GPL(synchronize_sched_expedited); 1938 EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
1939 1939
1940 #endif /* #else #ifndef CONFIG_SMP */ 1940 #endif /* #else #ifndef CONFIG_SMP */
1941 1941
1942 #if !defined(CONFIG_RCU_FAST_NO_HZ) 1942 #if !defined(CONFIG_RCU_FAST_NO_HZ)
1943 1943
1944 /* 1944 /*
1945 * Check to see if any future RCU-related work will need to be done 1945 * Check to see if any future RCU-related work will need to be done
1946 * by the current CPU, even if none need be done immediately, returning 1946 * by the current CPU, even if none need be done immediately, returning
1947 * 1 if so. This function is part of the RCU implementation; it is -not- 1947 * 1 if so. This function is part of the RCU implementation; it is -not-
1948 * an exported member of the RCU API. 1948 * an exported member of the RCU API.
1949 * 1949 *
1950 * Because we have preemptible RCU, just check whether this CPU needs 1950 * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
1951 * any flavor of RCU. Do not chew up lots of CPU cycles with preemption 1951 * any flavor of RCU.
1952 * disabled in a most-likely vain attempt to cause RCU not to need this CPU.
1953 */ 1952 */
1954 int rcu_needs_cpu(int cpu) 1953 int rcu_needs_cpu(int cpu)
1955 { 1954 {
1956 return rcu_cpu_has_callbacks(cpu); 1955 return rcu_cpu_has_callbacks(cpu);
1957 } 1956 }
1958 1957
1959 /* 1958 /*
1959 * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it.
1960 */
1961 static void rcu_prepare_for_idle_init(int cpu)
1962 {
1963 }
1964
1965 /*
1966 * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
1967 * after it.
1968 */
1969 static void rcu_cleanup_after_idle(int cpu)
1970 {
1971 }
1972
1973 /*
1960 * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=y, 1974 * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=y,
1961 * is nothing. 1975 * is nothing.
1962 */ 1976 */
1963 static void rcu_prepare_for_idle(int cpu) 1977 static void rcu_prepare_for_idle(int cpu)
1964 { 1978 {
1965 } 1979 }
1966 1980
1967 #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 1981 #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
1968 1982
1969 #define RCU_NEEDS_CPU_FLUSHES 5 1983 #define RCU_NEEDS_CPU_FLUSHES 5 /* Allow for callback self-repost. */
1984 #define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */
1970 static DEFINE_PER_CPU(int, rcu_dyntick_drain); 1985 static DEFINE_PER_CPU(int, rcu_dyntick_drain);
1971 static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); 1986 static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
1987 static DEFINE_PER_CPU(struct hrtimer, rcu_idle_gp_timer);
1988 static ktime_t rcu_idle_gp_wait;
1972 1989
1973 /* 1990 /*
1974 * Allow the CPU to enter dyntick-idle mode if either: (1) There are no 1991 * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
1975 * callbacks on this CPU, (2) this CPU has not yet attempted to enter 1992 * callbacks on this CPU, (2) this CPU has not yet attempted to enter
1976 * dyntick-idle mode, or (3) this CPU is in the process of attempting to 1993 * dyntick-idle mode, or (3) this CPU is in the process of attempting to
1977 * enter dyntick-idle mode. Otherwise, if we have recently tried and failed 1994 * enter dyntick-idle mode. Otherwise, if we have recently tried and failed
1978 * to enter dyntick-idle mode, we refuse to try to enter it. After all, 1995 * to enter dyntick-idle mode, we refuse to try to enter it. After all,
1979 * it is better to incur scheduling-clock interrupts than to spin 1996 * it is better to incur scheduling-clock interrupts than to spin
1980 * continuously for the same time duration! 1997 * continuously for the same time duration!
1981 */ 1998 */
1982 int rcu_needs_cpu(int cpu) 1999 int rcu_needs_cpu(int cpu)
1983 { 2000 {
1984 /* If no callbacks, RCU doesn't need the CPU. */ 2001 /* If no callbacks, RCU doesn't need the CPU. */
1985 if (!rcu_cpu_has_callbacks(cpu)) 2002 if (!rcu_cpu_has_callbacks(cpu))
1986 return 0; 2003 return 0;
1987 /* Otherwise, RCU needs the CPU only if it recently tried and failed. */ 2004 /* Otherwise, RCU needs the CPU only if it recently tried and failed. */
1988 return per_cpu(rcu_dyntick_holdoff, cpu) == jiffies; 2005 return per_cpu(rcu_dyntick_holdoff, cpu) == jiffies;
1989 } 2006 }
1990 2007
1991 /* 2008 /*
2009 * Timer handler used to force CPU to start pushing its remaining RCU
2010 * callbacks in the case where it entered dyntick-idle mode with callbacks
2011 * pending. The hander doesn't really need to do anything because the
2012 * real work is done upon re-entry to idle, or by the next scheduling-clock
2013 * interrupt should idle not be re-entered.
2014 */
2015 static enum hrtimer_restart rcu_idle_gp_timer_func(struct hrtimer *hrtp)
2016 {
2017 trace_rcu_prep_idle("Timer");
2018 return HRTIMER_NORESTART;
2019 }
2020
2021 /*
2022 * Initialize the timer used to pull CPUs out of dyntick-idle mode.
2023 */
2024 static void rcu_prepare_for_idle_init(int cpu)
2025 {
2026 static int firsttime = 1;
2027 struct hrtimer *hrtp = &per_cpu(rcu_idle_gp_timer, cpu);
2028
2029 hrtimer_init(hrtp, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2030 hrtp->function = rcu_idle_gp_timer_func;
2031 if (firsttime) {
2032 unsigned int upj = jiffies_to_usecs(RCU_IDLE_GP_DELAY);
2033
2034 rcu_idle_gp_wait = ns_to_ktime(upj * (u64)1000);
2035 firsttime = 0;
2036 }
2037 }
2038
2039 /*
2040 * Clean up for exit from idle. Because we are exiting from idle, there
2041 * is no longer any point to rcu_idle_gp_timer, so cancel it. This will
2042 * do nothing if this timer is not active, so just cancel it unconditionally.
2043 */
2044 static void rcu_cleanup_after_idle(int cpu)
2045 {
2046 hrtimer_cancel(&per_cpu(rcu_idle_gp_timer, cpu));
2047 }
2048
2049 /*
1992 * Check to see if any RCU-related work can be done by the current CPU, 2050 * Check to see if any RCU-related work can be done by the current CPU,
1993 * and if so, schedule a softirq to get it done. This function is part 2051 * and if so, schedule a softirq to get it done. This function is part
1994 * of the RCU implementation; it is -not- an exported member of the RCU API. 2052 * of the RCU implementation; it is -not- an exported member of the RCU API.
1995 * 2053 *
1996 * The idea is for the current CPU to clear out all work required by the 2054 * The idea is for the current CPU to clear out all work required by the
1997 * RCU core for the current grace period, so that this CPU can be permitted 2055 * RCU core for the current grace period, so that this CPU can be permitted
1998 * to enter dyntick-idle mode. In some cases, it will need to be awakened 2056 * to enter dyntick-idle mode. In some cases, it will need to be awakened
1999 * at the end of the grace period by whatever CPU ends the grace period. 2057 * at the end of the grace period by whatever CPU ends the grace period.
2000 * This allows CPUs to go dyntick-idle more quickly, and to reduce the 2058 * This allows CPUs to go dyntick-idle more quickly, and to reduce the
2001 * number of wakeups by a modest integer factor. 2059 * number of wakeups by a modest integer factor.
2002 * 2060 *
2003 * Because it is not legal to invoke rcu_process_callbacks() with irqs 2061 * Because it is not legal to invoke rcu_process_callbacks() with irqs
2004 * disabled, we do one pass of force_quiescent_state(), then do a 2062 * disabled, we do one pass of force_quiescent_state(), then do a
2005 * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked 2063 * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked
2006 * later. The per-cpu rcu_dyntick_drain variable controls the sequencing. 2064 * later. The per-cpu rcu_dyntick_drain variable controls the sequencing.
2007 * 2065 *
2008 * The caller must have disabled interrupts. 2066 * The caller must have disabled interrupts.
2009 */ 2067 */
2010 static void rcu_prepare_for_idle(int cpu) 2068 static void rcu_prepare_for_idle(int cpu)
2011 { 2069 {
2012 unsigned long flags; 2070 unsigned long flags;
2013 2071
2014 local_irq_save(flags); 2072 local_irq_save(flags);
2015 2073
2016 /* 2074 /*
2017 * If there are no callbacks on this CPU, enter dyntick-idle mode. 2075 * If there are no callbacks on this CPU, enter dyntick-idle mode.
2018 * Also reset state to avoid prejudicing later attempts. 2076 * Also reset state to avoid prejudicing later attempts.
2019 */ 2077 */
2020 if (!rcu_cpu_has_callbacks(cpu)) { 2078 if (!rcu_cpu_has_callbacks(cpu)) {
2021 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; 2079 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
2022 per_cpu(rcu_dyntick_drain, cpu) = 0; 2080 per_cpu(rcu_dyntick_drain, cpu) = 0;
2023 local_irq_restore(flags); 2081 local_irq_restore(flags);
2024 trace_rcu_prep_idle("No callbacks"); 2082 trace_rcu_prep_idle("No callbacks");
2025 return; 2083 return;
2026 } 2084 }
2027 2085
2028 /* 2086 /*
2029 * If in holdoff mode, just return. We will presumably have 2087 * If in holdoff mode, just return. We will presumably have
2030 * refrained from disabling the scheduling-clock tick. 2088 * refrained from disabling the scheduling-clock tick.
2031 */ 2089 */
2032 if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) { 2090 if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) {
2033 local_irq_restore(flags); 2091 local_irq_restore(flags);
2034 trace_rcu_prep_idle("In holdoff"); 2092 trace_rcu_prep_idle("In holdoff");
2035 return; 2093 return;
2036 } 2094 }
2037 2095
2038 /* Check and update the rcu_dyntick_drain sequencing. */ 2096 /* Check and update the rcu_dyntick_drain sequencing. */
2039 if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { 2097 if (per_cpu(rcu_dyntick_drain, cpu) <= 0) {
2040 /* First time through, initialize the counter. */ 2098 /* First time through, initialize the counter. */
2041 per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES; 2099 per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES;
2042 } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { 2100 } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
2101 /* Can we go dyntick-idle despite still having callbacks? */
2102 if (!rcu_pending(cpu)) {
2103 trace_rcu_prep_idle("Dyntick with callbacks");
2104 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
2105 hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu),
2106 rcu_idle_gp_wait, HRTIMER_MODE_REL);
2107 return; /* Nothing more to do immediately. */
2108 }
2109
2043 /* We have hit the limit, so time to give up. */ 2110 /* We have hit the limit, so time to give up. */
2044 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; 2111 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
2045 local_irq_restore(flags); 2112 local_irq_restore(flags);
2046 trace_rcu_prep_idle("Begin holdoff"); 2113 trace_rcu_prep_idle("Begin holdoff");
2047 invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ 2114 invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */
2048 return; 2115 return;
2049 } 2116 }
2050 2117
2051 /* 2118 /*
2052 * Do one step of pushing the remaining RCU callbacks through 2119 * Do one step of pushing the remaining RCU callbacks through
2053 * the RCU core state machine. 2120 * the RCU core state machine.
2054 */ 2121 */
2055 #ifdef CONFIG_TREE_PREEMPT_RCU 2122 #ifdef CONFIG_TREE_PREEMPT_RCU
2056 if (per_cpu(rcu_preempt_data, cpu).nxtlist) { 2123 if (per_cpu(rcu_preempt_data, cpu).nxtlist) {
2057 local_irq_restore(flags); 2124 local_irq_restore(flags);
2058 rcu_preempt_qs(cpu); 2125 rcu_preempt_qs(cpu);
2059 force_quiescent_state(&rcu_preempt_state, 0); 2126 force_quiescent_state(&rcu_preempt_state, 0);
2060 local_irq_save(flags); 2127 local_irq_save(flags);
2061 } 2128 }
2062 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 2129 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
2063 if (per_cpu(rcu_sched_data, cpu).nxtlist) { 2130 if (per_cpu(rcu_sched_data, cpu).nxtlist) {
2064 local_irq_restore(flags); 2131 local_irq_restore(flags);
2065 rcu_sched_qs(cpu); 2132 rcu_sched_qs(cpu);
2066 force_quiescent_state(&rcu_sched_state, 0); 2133 force_quiescent_state(&rcu_sched_state, 0);
2067 local_irq_save(flags); 2134 local_irq_save(flags);
2068 } 2135 }
2069 if (per_cpu(rcu_bh_data, cpu).nxtlist) { 2136 if (per_cpu(rcu_bh_data, cpu).nxtlist) {
2070 local_irq_restore(flags); 2137 local_irq_restore(flags);
2071 rcu_bh_qs(cpu); 2138 rcu_bh_qs(cpu);
2072 force_quiescent_state(&rcu_bh_state, 0); 2139 force_quiescent_state(&rcu_bh_state, 0);
2073 local_irq_save(flags); 2140 local_irq_save(flags);
2074 } 2141 }
2075 2142
2076 /* 2143 /*
2077 * If RCU callbacks are still pending, RCU still needs this CPU. 2144 * If RCU callbacks are still pending, RCU still needs this CPU.
2078 * So try forcing the callbacks through the grace period. 2145 * So try forcing the callbacks through the grace period.
2079 */ 2146 */
2080 if (rcu_cpu_has_callbacks(cpu)) { 2147 if (rcu_cpu_has_callbacks(cpu)) {
2081 local_irq_restore(flags); 2148 local_irq_restore(flags);
2082 trace_rcu_prep_idle("More callbacks"); 2149 trace_rcu_prep_idle("More callbacks");
2083 invoke_rcu_core(); 2150 invoke_rcu_core();
2084 } else { 2151 } else {
2085 local_irq_restore(flags); 2152 local_irq_restore(flags);
2086 trace_rcu_prep_idle("Callbacks drained"); 2153 trace_rcu_prep_idle("Callbacks drained");
2087 } 2154 }
2088 } 2155 }
2089 2156
2090 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 2157 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */