Commit 7cb92499000e3c86dae653077b1465458a039ef6
Committed by
Paul E. McKenney
1 parent
3842a0832a
Exists in
master
and in
6 other branches
rcu: Permit dyntick-idle with callbacks pending
The current implementation of RCU_FAST_NO_HZ prevents CPUs from entering dyntick-idle state if they have RCU callbacks pending. Unfortunately, this has the side-effect of often preventing them from entering this state, especially if at least one other CPU is not in dyntick-idle state. However, the resulting per-tick wakeup is wasteful in many cases: if the CPU has already fully responded to the current RCU grace period, there will be nothing for it to do until this grace period ends, which will frequently take several jiffies. This commit therefore permits a CPU that has done everything that the current grace period has asked of it (rcu_pending() == 0) even if it still as RCU callbacks pending. However, such a CPU posts a timer to wake it up several jiffies later (6 jiffies, based on experience with grace-period lengths). This wakeup is required to handle situations that can result in all CPUs being in dyntick-idle mode, thus failing to ever complete the current grace period. If a CPU wakes up before the timer goes off, then it cancels that timer, thus avoiding spurious wakeups. Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Showing 4 changed files with 78 additions and 5 deletions Inline Diff
include/trace/events/rcu.h
1 | #undef TRACE_SYSTEM | 1 | #undef TRACE_SYSTEM |
2 | #define TRACE_SYSTEM rcu | 2 | #define TRACE_SYSTEM rcu |
3 | 3 | ||
4 | #if !defined(_TRACE_RCU_H) || defined(TRACE_HEADER_MULTI_READ) | 4 | #if !defined(_TRACE_RCU_H) || defined(TRACE_HEADER_MULTI_READ) |
5 | #define _TRACE_RCU_H | 5 | #define _TRACE_RCU_H |
6 | 6 | ||
7 | #include <linux/tracepoint.h> | 7 | #include <linux/tracepoint.h> |
8 | 8 | ||
9 | /* | 9 | /* |
10 | * Tracepoint for start/end markers used for utilization calculations. | 10 | * Tracepoint for start/end markers used for utilization calculations. |
11 | * By convention, the string is of the following forms: | 11 | * By convention, the string is of the following forms: |
12 | * | 12 | * |
13 | * "Start <activity>" -- Mark the start of the specified activity, | 13 | * "Start <activity>" -- Mark the start of the specified activity, |
14 | * such as "context switch". Nesting is permitted. | 14 | * such as "context switch". Nesting is permitted. |
15 | * "End <activity>" -- Mark the end of the specified activity. | 15 | * "End <activity>" -- Mark the end of the specified activity. |
16 | * | 16 | * |
17 | * An "@" character within "<activity>" is a comment character: Data | 17 | * An "@" character within "<activity>" is a comment character: Data |
18 | * reduction scripts will ignore the "@" and the remainder of the line. | 18 | * reduction scripts will ignore the "@" and the remainder of the line. |
19 | */ | 19 | */ |
20 | TRACE_EVENT(rcu_utilization, | 20 | TRACE_EVENT(rcu_utilization, |
21 | 21 | ||
22 | TP_PROTO(char *s), | 22 | TP_PROTO(char *s), |
23 | 23 | ||
24 | TP_ARGS(s), | 24 | TP_ARGS(s), |
25 | 25 | ||
26 | TP_STRUCT__entry( | 26 | TP_STRUCT__entry( |
27 | __field(char *, s) | 27 | __field(char *, s) |
28 | ), | 28 | ), |
29 | 29 | ||
30 | TP_fast_assign( | 30 | TP_fast_assign( |
31 | __entry->s = s; | 31 | __entry->s = s; |
32 | ), | 32 | ), |
33 | 33 | ||
34 | TP_printk("%s", __entry->s) | 34 | TP_printk("%s", __entry->s) |
35 | ); | 35 | ); |
36 | 36 | ||
37 | #ifdef CONFIG_RCU_TRACE | 37 | #ifdef CONFIG_RCU_TRACE |
38 | 38 | ||
39 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) | 39 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) |
40 | 40 | ||
41 | /* | 41 | /* |
42 | * Tracepoint for grace-period events: starting and ending a grace | 42 | * Tracepoint for grace-period events: starting and ending a grace |
43 | * period ("start" and "end", respectively), a CPU noting the start | 43 | * period ("start" and "end", respectively), a CPU noting the start |
44 | * of a new grace period or the end of an old grace period ("cpustart" | 44 | * of a new grace period or the end of an old grace period ("cpustart" |
45 | * and "cpuend", respectively), a CPU passing through a quiescent | 45 | * and "cpuend", respectively), a CPU passing through a quiescent |
46 | * state ("cpuqs"), a CPU coming online or going offline ("cpuonl" | 46 | * state ("cpuqs"), a CPU coming online or going offline ("cpuonl" |
47 | * and "cpuofl", respectively), and a CPU being kicked for being too | 47 | * and "cpuofl", respectively), and a CPU being kicked for being too |
48 | * long in dyntick-idle mode ("kick"). | 48 | * long in dyntick-idle mode ("kick"). |
49 | */ | 49 | */ |
50 | TRACE_EVENT(rcu_grace_period, | 50 | TRACE_EVENT(rcu_grace_period, |
51 | 51 | ||
52 | TP_PROTO(char *rcuname, unsigned long gpnum, char *gpevent), | 52 | TP_PROTO(char *rcuname, unsigned long gpnum, char *gpevent), |
53 | 53 | ||
54 | TP_ARGS(rcuname, gpnum, gpevent), | 54 | TP_ARGS(rcuname, gpnum, gpevent), |
55 | 55 | ||
56 | TP_STRUCT__entry( | 56 | TP_STRUCT__entry( |
57 | __field(char *, rcuname) | 57 | __field(char *, rcuname) |
58 | __field(unsigned long, gpnum) | 58 | __field(unsigned long, gpnum) |
59 | __field(char *, gpevent) | 59 | __field(char *, gpevent) |
60 | ), | 60 | ), |
61 | 61 | ||
62 | TP_fast_assign( | 62 | TP_fast_assign( |
63 | __entry->rcuname = rcuname; | 63 | __entry->rcuname = rcuname; |
64 | __entry->gpnum = gpnum; | 64 | __entry->gpnum = gpnum; |
65 | __entry->gpevent = gpevent; | 65 | __entry->gpevent = gpevent; |
66 | ), | 66 | ), |
67 | 67 | ||
68 | TP_printk("%s %lu %s", | 68 | TP_printk("%s %lu %s", |
69 | __entry->rcuname, __entry->gpnum, __entry->gpevent) | 69 | __entry->rcuname, __entry->gpnum, __entry->gpevent) |
70 | ); | 70 | ); |
71 | 71 | ||
72 | /* | 72 | /* |
73 | * Tracepoint for grace-period-initialization events. These are | 73 | * Tracepoint for grace-period-initialization events. These are |
74 | * distinguished by the type of RCU, the new grace-period number, the | 74 | * distinguished by the type of RCU, the new grace-period number, the |
75 | * rcu_node structure level, the starting and ending CPU covered by the | 75 | * rcu_node structure level, the starting and ending CPU covered by the |
76 | * rcu_node structure, and the mask of CPUs that will be waited for. | 76 | * rcu_node structure, and the mask of CPUs that will be waited for. |
77 | * All but the type of RCU are extracted from the rcu_node structure. | 77 | * All but the type of RCU are extracted from the rcu_node structure. |
78 | */ | 78 | */ |
79 | TRACE_EVENT(rcu_grace_period_init, | 79 | TRACE_EVENT(rcu_grace_period_init, |
80 | 80 | ||
81 | TP_PROTO(char *rcuname, unsigned long gpnum, u8 level, | 81 | TP_PROTO(char *rcuname, unsigned long gpnum, u8 level, |
82 | int grplo, int grphi, unsigned long qsmask), | 82 | int grplo, int grphi, unsigned long qsmask), |
83 | 83 | ||
84 | TP_ARGS(rcuname, gpnum, level, grplo, grphi, qsmask), | 84 | TP_ARGS(rcuname, gpnum, level, grplo, grphi, qsmask), |
85 | 85 | ||
86 | TP_STRUCT__entry( | 86 | TP_STRUCT__entry( |
87 | __field(char *, rcuname) | 87 | __field(char *, rcuname) |
88 | __field(unsigned long, gpnum) | 88 | __field(unsigned long, gpnum) |
89 | __field(u8, level) | 89 | __field(u8, level) |
90 | __field(int, grplo) | 90 | __field(int, grplo) |
91 | __field(int, grphi) | 91 | __field(int, grphi) |
92 | __field(unsigned long, qsmask) | 92 | __field(unsigned long, qsmask) |
93 | ), | 93 | ), |
94 | 94 | ||
95 | TP_fast_assign( | 95 | TP_fast_assign( |
96 | __entry->rcuname = rcuname; | 96 | __entry->rcuname = rcuname; |
97 | __entry->gpnum = gpnum; | 97 | __entry->gpnum = gpnum; |
98 | __entry->level = level; | 98 | __entry->level = level; |
99 | __entry->grplo = grplo; | 99 | __entry->grplo = grplo; |
100 | __entry->grphi = grphi; | 100 | __entry->grphi = grphi; |
101 | __entry->qsmask = qsmask; | 101 | __entry->qsmask = qsmask; |
102 | ), | 102 | ), |
103 | 103 | ||
104 | TP_printk("%s %lu %u %d %d %lx", | 104 | TP_printk("%s %lu %u %d %d %lx", |
105 | __entry->rcuname, __entry->gpnum, __entry->level, | 105 | __entry->rcuname, __entry->gpnum, __entry->level, |
106 | __entry->grplo, __entry->grphi, __entry->qsmask) | 106 | __entry->grplo, __entry->grphi, __entry->qsmask) |
107 | ); | 107 | ); |
108 | 108 | ||
109 | /* | 109 | /* |
110 | * Tracepoint for tasks blocking within preemptible-RCU read-side | 110 | * Tracepoint for tasks blocking within preemptible-RCU read-side |
111 | * critical sections. Track the type of RCU (which one day might | 111 | * critical sections. Track the type of RCU (which one day might |
112 | * include SRCU), the grace-period number that the task is blocking | 112 | * include SRCU), the grace-period number that the task is blocking |
113 | * (the current or the next), and the task's PID. | 113 | * (the current or the next), and the task's PID. |
114 | */ | 114 | */ |
115 | TRACE_EVENT(rcu_preempt_task, | 115 | TRACE_EVENT(rcu_preempt_task, |
116 | 116 | ||
117 | TP_PROTO(char *rcuname, int pid, unsigned long gpnum), | 117 | TP_PROTO(char *rcuname, int pid, unsigned long gpnum), |
118 | 118 | ||
119 | TP_ARGS(rcuname, pid, gpnum), | 119 | TP_ARGS(rcuname, pid, gpnum), |
120 | 120 | ||
121 | TP_STRUCT__entry( | 121 | TP_STRUCT__entry( |
122 | __field(char *, rcuname) | 122 | __field(char *, rcuname) |
123 | __field(unsigned long, gpnum) | 123 | __field(unsigned long, gpnum) |
124 | __field(int, pid) | 124 | __field(int, pid) |
125 | ), | 125 | ), |
126 | 126 | ||
127 | TP_fast_assign( | 127 | TP_fast_assign( |
128 | __entry->rcuname = rcuname; | 128 | __entry->rcuname = rcuname; |
129 | __entry->gpnum = gpnum; | 129 | __entry->gpnum = gpnum; |
130 | __entry->pid = pid; | 130 | __entry->pid = pid; |
131 | ), | 131 | ), |
132 | 132 | ||
133 | TP_printk("%s %lu %d", | 133 | TP_printk("%s %lu %d", |
134 | __entry->rcuname, __entry->gpnum, __entry->pid) | 134 | __entry->rcuname, __entry->gpnum, __entry->pid) |
135 | ); | 135 | ); |
136 | 136 | ||
137 | /* | 137 | /* |
138 | * Tracepoint for tasks that blocked within a given preemptible-RCU | 138 | * Tracepoint for tasks that blocked within a given preemptible-RCU |
139 | * read-side critical section exiting that critical section. Track the | 139 | * read-side critical section exiting that critical section. Track the |
140 | * type of RCU (which one day might include SRCU) and the task's PID. | 140 | * type of RCU (which one day might include SRCU) and the task's PID. |
141 | */ | 141 | */ |
142 | TRACE_EVENT(rcu_unlock_preempted_task, | 142 | TRACE_EVENT(rcu_unlock_preempted_task, |
143 | 143 | ||
144 | TP_PROTO(char *rcuname, unsigned long gpnum, int pid), | 144 | TP_PROTO(char *rcuname, unsigned long gpnum, int pid), |
145 | 145 | ||
146 | TP_ARGS(rcuname, gpnum, pid), | 146 | TP_ARGS(rcuname, gpnum, pid), |
147 | 147 | ||
148 | TP_STRUCT__entry( | 148 | TP_STRUCT__entry( |
149 | __field(char *, rcuname) | 149 | __field(char *, rcuname) |
150 | __field(unsigned long, gpnum) | 150 | __field(unsigned long, gpnum) |
151 | __field(int, pid) | 151 | __field(int, pid) |
152 | ), | 152 | ), |
153 | 153 | ||
154 | TP_fast_assign( | 154 | TP_fast_assign( |
155 | __entry->rcuname = rcuname; | 155 | __entry->rcuname = rcuname; |
156 | __entry->gpnum = gpnum; | 156 | __entry->gpnum = gpnum; |
157 | __entry->pid = pid; | 157 | __entry->pid = pid; |
158 | ), | 158 | ), |
159 | 159 | ||
160 | TP_printk("%s %lu %d", __entry->rcuname, __entry->gpnum, __entry->pid) | 160 | TP_printk("%s %lu %d", __entry->rcuname, __entry->gpnum, __entry->pid) |
161 | ); | 161 | ); |
162 | 162 | ||
163 | /* | 163 | /* |
164 | * Tracepoint for quiescent-state-reporting events. These are | 164 | * Tracepoint for quiescent-state-reporting events. These are |
165 | * distinguished by the type of RCU, the grace-period number, the | 165 | * distinguished by the type of RCU, the grace-period number, the |
166 | * mask of quiescent lower-level entities, the rcu_node structure level, | 166 | * mask of quiescent lower-level entities, the rcu_node structure level, |
167 | * the starting and ending CPU covered by the rcu_node structure, and | 167 | * the starting and ending CPU covered by the rcu_node structure, and |
168 | * whether there are any blocked tasks blocking the current grace period. | 168 | * whether there are any blocked tasks blocking the current grace period. |
169 | * All but the type of RCU are extracted from the rcu_node structure. | 169 | * All but the type of RCU are extracted from the rcu_node structure. |
170 | */ | 170 | */ |
171 | TRACE_EVENT(rcu_quiescent_state_report, | 171 | TRACE_EVENT(rcu_quiescent_state_report, |
172 | 172 | ||
173 | TP_PROTO(char *rcuname, unsigned long gpnum, | 173 | TP_PROTO(char *rcuname, unsigned long gpnum, |
174 | unsigned long mask, unsigned long qsmask, | 174 | unsigned long mask, unsigned long qsmask, |
175 | u8 level, int grplo, int grphi, int gp_tasks), | 175 | u8 level, int grplo, int grphi, int gp_tasks), |
176 | 176 | ||
177 | TP_ARGS(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks), | 177 | TP_ARGS(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks), |
178 | 178 | ||
179 | TP_STRUCT__entry( | 179 | TP_STRUCT__entry( |
180 | __field(char *, rcuname) | 180 | __field(char *, rcuname) |
181 | __field(unsigned long, gpnum) | 181 | __field(unsigned long, gpnum) |
182 | __field(unsigned long, mask) | 182 | __field(unsigned long, mask) |
183 | __field(unsigned long, qsmask) | 183 | __field(unsigned long, qsmask) |
184 | __field(u8, level) | 184 | __field(u8, level) |
185 | __field(int, grplo) | 185 | __field(int, grplo) |
186 | __field(int, grphi) | 186 | __field(int, grphi) |
187 | __field(u8, gp_tasks) | 187 | __field(u8, gp_tasks) |
188 | ), | 188 | ), |
189 | 189 | ||
190 | TP_fast_assign( | 190 | TP_fast_assign( |
191 | __entry->rcuname = rcuname; | 191 | __entry->rcuname = rcuname; |
192 | __entry->gpnum = gpnum; | 192 | __entry->gpnum = gpnum; |
193 | __entry->mask = mask; | 193 | __entry->mask = mask; |
194 | __entry->qsmask = qsmask; | 194 | __entry->qsmask = qsmask; |
195 | __entry->level = level; | 195 | __entry->level = level; |
196 | __entry->grplo = grplo; | 196 | __entry->grplo = grplo; |
197 | __entry->grphi = grphi; | 197 | __entry->grphi = grphi; |
198 | __entry->gp_tasks = gp_tasks; | 198 | __entry->gp_tasks = gp_tasks; |
199 | ), | 199 | ), |
200 | 200 | ||
201 | TP_printk("%s %lu %lx>%lx %u %d %d %u", | 201 | TP_printk("%s %lu %lx>%lx %u %d %d %u", |
202 | __entry->rcuname, __entry->gpnum, | 202 | __entry->rcuname, __entry->gpnum, |
203 | __entry->mask, __entry->qsmask, __entry->level, | 203 | __entry->mask, __entry->qsmask, __entry->level, |
204 | __entry->grplo, __entry->grphi, __entry->gp_tasks) | 204 | __entry->grplo, __entry->grphi, __entry->gp_tasks) |
205 | ); | 205 | ); |
206 | 206 | ||
207 | /* | 207 | /* |
208 | * Tracepoint for quiescent states detected by force_quiescent_state(). | 208 | * Tracepoint for quiescent states detected by force_quiescent_state(). |
209 | * These trace events include the type of RCU, the grace-period number | 209 | * These trace events include the type of RCU, the grace-period number |
210 | * that was blocked by the CPU, the CPU itself, and the type of quiescent | 210 | * that was blocked by the CPU, the CPU itself, and the type of quiescent |
211 | * state, which can be "dti" for dyntick-idle mode, "ofl" for CPU offline, | 211 | * state, which can be "dti" for dyntick-idle mode, "ofl" for CPU offline, |
212 | * or "kick" when kicking a CPU that has been in dyntick-idle mode for | 212 | * or "kick" when kicking a CPU that has been in dyntick-idle mode for |
213 | * too long. | 213 | * too long. |
214 | */ | 214 | */ |
215 | TRACE_EVENT(rcu_fqs, | 215 | TRACE_EVENT(rcu_fqs, |
216 | 216 | ||
217 | TP_PROTO(char *rcuname, unsigned long gpnum, int cpu, char *qsevent), | 217 | TP_PROTO(char *rcuname, unsigned long gpnum, int cpu, char *qsevent), |
218 | 218 | ||
219 | TP_ARGS(rcuname, gpnum, cpu, qsevent), | 219 | TP_ARGS(rcuname, gpnum, cpu, qsevent), |
220 | 220 | ||
221 | TP_STRUCT__entry( | 221 | TP_STRUCT__entry( |
222 | __field(char *, rcuname) | 222 | __field(char *, rcuname) |
223 | __field(unsigned long, gpnum) | 223 | __field(unsigned long, gpnum) |
224 | __field(int, cpu) | 224 | __field(int, cpu) |
225 | __field(char *, qsevent) | 225 | __field(char *, qsevent) |
226 | ), | 226 | ), |
227 | 227 | ||
228 | TP_fast_assign( | 228 | TP_fast_assign( |
229 | __entry->rcuname = rcuname; | 229 | __entry->rcuname = rcuname; |
230 | __entry->gpnum = gpnum; | 230 | __entry->gpnum = gpnum; |
231 | __entry->cpu = cpu; | 231 | __entry->cpu = cpu; |
232 | __entry->qsevent = qsevent; | 232 | __entry->qsevent = qsevent; |
233 | ), | 233 | ), |
234 | 234 | ||
235 | TP_printk("%s %lu %d %s", | 235 | TP_printk("%s %lu %d %s", |
236 | __entry->rcuname, __entry->gpnum, | 236 | __entry->rcuname, __entry->gpnum, |
237 | __entry->cpu, __entry->qsevent) | 237 | __entry->cpu, __entry->qsevent) |
238 | ); | 238 | ); |
239 | 239 | ||
240 | #endif /* #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) */ | 240 | #endif /* #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) */ |
241 | 241 | ||
242 | /* | 242 | /* |
243 | * Tracepoint for dyntick-idle entry/exit events. These take a string | 243 | * Tracepoint for dyntick-idle entry/exit events. These take a string |
244 | * as argument: "Start" for entering dyntick-idle mode, "End" for | 244 | * as argument: "Start" for entering dyntick-idle mode, "End" for |
245 | * leaving it, "--=" for events moving towards idle, and "++=" for events | 245 | * leaving it, "--=" for events moving towards idle, and "++=" for events |
246 | * moving away from idle. "Error on entry: not idle task" and "Error on | 246 | * moving away from idle. "Error on entry: not idle task" and "Error on |
247 | * exit: not idle task" indicate that a non-idle task is erroneously | 247 | * exit: not idle task" indicate that a non-idle task is erroneously |
248 | * toying with the idle loop. | 248 | * toying with the idle loop. |
249 | * | 249 | * |
250 | * These events also take a pair of numbers, which indicate the nesting | 250 | * These events also take a pair of numbers, which indicate the nesting |
251 | * depth before and after the event of interest. Note that task-related | 251 | * depth before and after the event of interest. Note that task-related |
252 | * events use the upper bits of each number, while interrupt-related | 252 | * events use the upper bits of each number, while interrupt-related |
253 | * events use the lower bits. | 253 | * events use the lower bits. |
254 | */ | 254 | */ |
255 | TRACE_EVENT(rcu_dyntick, | 255 | TRACE_EVENT(rcu_dyntick, |
256 | 256 | ||
257 | TP_PROTO(char *polarity, long long oldnesting, long long newnesting), | 257 | TP_PROTO(char *polarity, long long oldnesting, long long newnesting), |
258 | 258 | ||
259 | TP_ARGS(polarity, oldnesting, newnesting), | 259 | TP_ARGS(polarity, oldnesting, newnesting), |
260 | 260 | ||
261 | TP_STRUCT__entry( | 261 | TP_STRUCT__entry( |
262 | __field(char *, polarity) | 262 | __field(char *, polarity) |
263 | __field(long long, oldnesting) | 263 | __field(long long, oldnesting) |
264 | __field(long long, newnesting) | 264 | __field(long long, newnesting) |
265 | ), | 265 | ), |
266 | 266 | ||
267 | TP_fast_assign( | 267 | TP_fast_assign( |
268 | __entry->polarity = polarity; | 268 | __entry->polarity = polarity; |
269 | __entry->oldnesting = oldnesting; | 269 | __entry->oldnesting = oldnesting; |
270 | __entry->newnesting = newnesting; | 270 | __entry->newnesting = newnesting; |
271 | ), | 271 | ), |
272 | 272 | ||
273 | TP_printk("%s %llx %llx", __entry->polarity, | 273 | TP_printk("%s %llx %llx", __entry->polarity, |
274 | __entry->oldnesting, __entry->newnesting) | 274 | __entry->oldnesting, __entry->newnesting) |
275 | ); | 275 | ); |
276 | 276 | ||
277 | /* | 277 | /* |
278 | * Tracepoint for RCU preparation for idle, the goal being to get RCU | 278 | * Tracepoint for RCU preparation for idle, the goal being to get RCU |
279 | * processing done so that the current CPU can shut off its scheduling | 279 | * processing done so that the current CPU can shut off its scheduling |
280 | * clock and enter dyntick-idle mode. One way to accomplish this is | 280 | * clock and enter dyntick-idle mode. One way to accomplish this is |
281 | * to drain all RCU callbacks from this CPU, and the other is to have | 281 | * to drain all RCU callbacks from this CPU, and the other is to have |
282 | * done everything RCU requires for the current grace period. In this | 282 | * done everything RCU requires for the current grace period. In this |
283 | * latter case, the CPU will be awakened at the end of the current grace | 283 | * latter case, the CPU will be awakened at the end of the current grace |
284 | * period in order to process the remainder of its callbacks. | 284 | * period in order to process the remainder of its callbacks. |
285 | * | 285 | * |
286 | * These tracepoints take a string as argument: | 286 | * These tracepoints take a string as argument: |
287 | * | 287 | * |
288 | * "No callbacks": Nothing to do, no callbacks on this CPU. | 288 | * "No callbacks": Nothing to do, no callbacks on this CPU. |
289 | * "In holdoff": Nothing to do, holding off after unsuccessful attempt. | 289 | * "In holdoff": Nothing to do, holding off after unsuccessful attempt. |
290 | * "Begin holdoff": Attempt failed, don't retry until next jiffy. | 290 | * "Begin holdoff": Attempt failed, don't retry until next jiffy. |
291 | * "Dyntick with callbacks": Entering dyntick-idle despite callbacks. | ||
291 | * "More callbacks": Still more callbacks, try again to clear them out. | 292 | * "More callbacks": Still more callbacks, try again to clear them out. |
292 | * "Callbacks drained": All callbacks processed, off to dyntick idle! | 293 | * "Callbacks drained": All callbacks processed, off to dyntick idle! |
293 | * "CPU awakened at GP end": | 294 | * "Timer": Timer fired to cause CPU to continue processing callbacks. |
294 | */ | 295 | */ |
295 | TRACE_EVENT(rcu_prep_idle, | 296 | TRACE_EVENT(rcu_prep_idle, |
296 | 297 | ||
297 | TP_PROTO(char *reason), | 298 | TP_PROTO(char *reason), |
298 | 299 | ||
299 | TP_ARGS(reason), | 300 | TP_ARGS(reason), |
300 | 301 | ||
301 | TP_STRUCT__entry( | 302 | TP_STRUCT__entry( |
302 | __field(char *, reason) | 303 | __field(char *, reason) |
303 | ), | 304 | ), |
304 | 305 | ||
305 | TP_fast_assign( | 306 | TP_fast_assign( |
306 | __entry->reason = reason; | 307 | __entry->reason = reason; |
307 | ), | 308 | ), |
308 | 309 | ||
309 | TP_printk("%s", __entry->reason) | 310 | TP_printk("%s", __entry->reason) |
310 | ); | 311 | ); |
311 | 312 | ||
312 | /* | 313 | /* |
313 | * Tracepoint for the registration of a single RCU callback function. | 314 | * Tracepoint for the registration of a single RCU callback function. |
314 | * The first argument is the type of RCU, the second argument is | 315 | * The first argument is the type of RCU, the second argument is |
315 | * a pointer to the RCU callback itself, and the third element is the | 316 | * a pointer to the RCU callback itself, and the third element is the |
316 | * new RCU callback queue length for the current CPU. | 317 | * new RCU callback queue length for the current CPU. |
317 | */ | 318 | */ |
318 | TRACE_EVENT(rcu_callback, | 319 | TRACE_EVENT(rcu_callback, |
319 | 320 | ||
320 | TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen), | 321 | TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen), |
321 | 322 | ||
322 | TP_ARGS(rcuname, rhp, qlen), | 323 | TP_ARGS(rcuname, rhp, qlen), |
323 | 324 | ||
324 | TP_STRUCT__entry( | 325 | TP_STRUCT__entry( |
325 | __field(char *, rcuname) | 326 | __field(char *, rcuname) |
326 | __field(void *, rhp) | 327 | __field(void *, rhp) |
327 | __field(void *, func) | 328 | __field(void *, func) |
328 | __field(long, qlen) | 329 | __field(long, qlen) |
329 | ), | 330 | ), |
330 | 331 | ||
331 | TP_fast_assign( | 332 | TP_fast_assign( |
332 | __entry->rcuname = rcuname; | 333 | __entry->rcuname = rcuname; |
333 | __entry->rhp = rhp; | 334 | __entry->rhp = rhp; |
334 | __entry->func = rhp->func; | 335 | __entry->func = rhp->func; |
335 | __entry->qlen = qlen; | 336 | __entry->qlen = qlen; |
336 | ), | 337 | ), |
337 | 338 | ||
338 | TP_printk("%s rhp=%p func=%pf %ld", | 339 | TP_printk("%s rhp=%p func=%pf %ld", |
339 | __entry->rcuname, __entry->rhp, __entry->func, __entry->qlen) | 340 | __entry->rcuname, __entry->rhp, __entry->func, __entry->qlen) |
340 | ); | 341 | ); |
341 | 342 | ||
342 | /* | 343 | /* |
343 | * Tracepoint for the registration of a single RCU callback of the special | 344 | * Tracepoint for the registration of a single RCU callback of the special |
344 | * kfree() form. The first argument is the RCU type, the second argument | 345 | * kfree() form. The first argument is the RCU type, the second argument |
345 | * is a pointer to the RCU callback, the third argument is the offset | 346 | * is a pointer to the RCU callback, the third argument is the offset |
346 | * of the callback within the enclosing RCU-protected data structure, | 347 | * of the callback within the enclosing RCU-protected data structure, |
347 | * and the fourth argument is the new RCU callback queue length for the | 348 | * and the fourth argument is the new RCU callback queue length for the |
348 | * current CPU. | 349 | * current CPU. |
349 | */ | 350 | */ |
350 | TRACE_EVENT(rcu_kfree_callback, | 351 | TRACE_EVENT(rcu_kfree_callback, |
351 | 352 | ||
352 | TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset, | 353 | TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset, |
353 | long qlen), | 354 | long qlen), |
354 | 355 | ||
355 | TP_ARGS(rcuname, rhp, offset, qlen), | 356 | TP_ARGS(rcuname, rhp, offset, qlen), |
356 | 357 | ||
357 | TP_STRUCT__entry( | 358 | TP_STRUCT__entry( |
358 | __field(char *, rcuname) | 359 | __field(char *, rcuname) |
359 | __field(void *, rhp) | 360 | __field(void *, rhp) |
360 | __field(unsigned long, offset) | 361 | __field(unsigned long, offset) |
361 | __field(long, qlen) | 362 | __field(long, qlen) |
362 | ), | 363 | ), |
363 | 364 | ||
364 | TP_fast_assign( | 365 | TP_fast_assign( |
365 | __entry->rcuname = rcuname; | 366 | __entry->rcuname = rcuname; |
366 | __entry->rhp = rhp; | 367 | __entry->rhp = rhp; |
367 | __entry->offset = offset; | 368 | __entry->offset = offset; |
368 | __entry->qlen = qlen; | 369 | __entry->qlen = qlen; |
369 | ), | 370 | ), |
370 | 371 | ||
371 | TP_printk("%s rhp=%p func=%ld %ld", | 372 | TP_printk("%s rhp=%p func=%ld %ld", |
372 | __entry->rcuname, __entry->rhp, __entry->offset, | 373 | __entry->rcuname, __entry->rhp, __entry->offset, |
373 | __entry->qlen) | 374 | __entry->qlen) |
374 | ); | 375 | ); |
375 | 376 | ||
376 | /* | 377 | /* |
377 | * Tracepoint for marking the beginning rcu_do_batch, performed to start | 378 | * Tracepoint for marking the beginning rcu_do_batch, performed to start |
378 | * RCU callback invocation. The first argument is the RCU flavor, | 379 | * RCU callback invocation. The first argument is the RCU flavor, |
379 | * the second is the total number of callbacks (including those that | 380 | * the second is the total number of callbacks (including those that |
380 | * are not yet ready to be invoked), and the third argument is the | 381 | * are not yet ready to be invoked), and the third argument is the |
381 | * current RCU-callback batch limit. | 382 | * current RCU-callback batch limit. |
382 | */ | 383 | */ |
383 | TRACE_EVENT(rcu_batch_start, | 384 | TRACE_EVENT(rcu_batch_start, |
384 | 385 | ||
385 | TP_PROTO(char *rcuname, long qlen, int blimit), | 386 | TP_PROTO(char *rcuname, long qlen, int blimit), |
386 | 387 | ||
387 | TP_ARGS(rcuname, qlen, blimit), | 388 | TP_ARGS(rcuname, qlen, blimit), |
388 | 389 | ||
389 | TP_STRUCT__entry( | 390 | TP_STRUCT__entry( |
390 | __field(char *, rcuname) | 391 | __field(char *, rcuname) |
391 | __field(long, qlen) | 392 | __field(long, qlen) |
392 | __field(int, blimit) | 393 | __field(int, blimit) |
393 | ), | 394 | ), |
394 | 395 | ||
395 | TP_fast_assign( | 396 | TP_fast_assign( |
396 | __entry->rcuname = rcuname; | 397 | __entry->rcuname = rcuname; |
397 | __entry->qlen = qlen; | 398 | __entry->qlen = qlen; |
398 | __entry->blimit = blimit; | 399 | __entry->blimit = blimit; |
399 | ), | 400 | ), |
400 | 401 | ||
401 | TP_printk("%s CBs=%ld bl=%d", | 402 | TP_printk("%s CBs=%ld bl=%d", |
402 | __entry->rcuname, __entry->qlen, __entry->blimit) | 403 | __entry->rcuname, __entry->qlen, __entry->blimit) |
403 | ); | 404 | ); |
404 | 405 | ||
405 | /* | 406 | /* |
406 | * Tracepoint for the invocation of a single RCU callback function. | 407 | * Tracepoint for the invocation of a single RCU callback function. |
407 | * The first argument is the type of RCU, and the second argument is | 408 | * The first argument is the type of RCU, and the second argument is |
408 | * a pointer to the RCU callback itself. | 409 | * a pointer to the RCU callback itself. |
409 | */ | 410 | */ |
410 | TRACE_EVENT(rcu_invoke_callback, | 411 | TRACE_EVENT(rcu_invoke_callback, |
411 | 412 | ||
412 | TP_PROTO(char *rcuname, struct rcu_head *rhp), | 413 | TP_PROTO(char *rcuname, struct rcu_head *rhp), |
413 | 414 | ||
414 | TP_ARGS(rcuname, rhp), | 415 | TP_ARGS(rcuname, rhp), |
415 | 416 | ||
416 | TP_STRUCT__entry( | 417 | TP_STRUCT__entry( |
417 | __field(char *, rcuname) | 418 | __field(char *, rcuname) |
418 | __field(void *, rhp) | 419 | __field(void *, rhp) |
419 | __field(void *, func) | 420 | __field(void *, func) |
420 | ), | 421 | ), |
421 | 422 | ||
422 | TP_fast_assign( | 423 | TP_fast_assign( |
423 | __entry->rcuname = rcuname; | 424 | __entry->rcuname = rcuname; |
424 | __entry->rhp = rhp; | 425 | __entry->rhp = rhp; |
425 | __entry->func = rhp->func; | 426 | __entry->func = rhp->func; |
426 | ), | 427 | ), |
427 | 428 | ||
428 | TP_printk("%s rhp=%p func=%pf", | 429 | TP_printk("%s rhp=%p func=%pf", |
429 | __entry->rcuname, __entry->rhp, __entry->func) | 430 | __entry->rcuname, __entry->rhp, __entry->func) |
430 | ); | 431 | ); |
431 | 432 | ||
432 | /* | 433 | /* |
433 | * Tracepoint for the invocation of a single RCU callback of the special | 434 | * Tracepoint for the invocation of a single RCU callback of the special |
434 | * kfree() form. The first argument is the RCU flavor, the second | 435 | * kfree() form. The first argument is the RCU flavor, the second |
435 | * argument is a pointer to the RCU callback, and the third argument | 436 | * argument is a pointer to the RCU callback, and the third argument |
436 | * is the offset of the callback within the enclosing RCU-protected | 437 | * is the offset of the callback within the enclosing RCU-protected |
437 | * data structure. | 438 | * data structure. |
438 | */ | 439 | */ |
439 | TRACE_EVENT(rcu_invoke_kfree_callback, | 440 | TRACE_EVENT(rcu_invoke_kfree_callback, |
440 | 441 | ||
441 | TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset), | 442 | TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset), |
442 | 443 | ||
443 | TP_ARGS(rcuname, rhp, offset), | 444 | TP_ARGS(rcuname, rhp, offset), |
444 | 445 | ||
445 | TP_STRUCT__entry( | 446 | TP_STRUCT__entry( |
446 | __field(char *, rcuname) | 447 | __field(char *, rcuname) |
447 | __field(void *, rhp) | 448 | __field(void *, rhp) |
448 | __field(unsigned long, offset) | 449 | __field(unsigned long, offset) |
449 | ), | 450 | ), |
450 | 451 | ||
451 | TP_fast_assign( | 452 | TP_fast_assign( |
452 | __entry->rcuname = rcuname; | 453 | __entry->rcuname = rcuname; |
453 | __entry->rhp = rhp; | 454 | __entry->rhp = rhp; |
454 | __entry->offset = offset; | 455 | __entry->offset = offset; |
455 | ), | 456 | ), |
456 | 457 | ||
457 | TP_printk("%s rhp=%p func=%ld", | 458 | TP_printk("%s rhp=%p func=%ld", |
458 | __entry->rcuname, __entry->rhp, __entry->offset) | 459 | __entry->rcuname, __entry->rhp, __entry->offset) |
459 | ); | 460 | ); |
460 | 461 | ||
461 | /* | 462 | /* |
462 | * Tracepoint for exiting rcu_do_batch after RCU callbacks have been | 463 | * Tracepoint for exiting rcu_do_batch after RCU callbacks have been |
463 | * invoked. The first argument is the name of the RCU flavor and | 464 | * invoked. The first argument is the name of the RCU flavor and |
464 | * the second argument is number of callbacks actually invoked. | 465 | * the second argument is number of callbacks actually invoked. |
465 | */ | 466 | */ |
466 | TRACE_EVENT(rcu_batch_end, | 467 | TRACE_EVENT(rcu_batch_end, |
467 | 468 | ||
468 | TP_PROTO(char *rcuname, int callbacks_invoked), | 469 | TP_PROTO(char *rcuname, int callbacks_invoked), |
469 | 470 | ||
470 | TP_ARGS(rcuname, callbacks_invoked), | 471 | TP_ARGS(rcuname, callbacks_invoked), |
471 | 472 | ||
472 | TP_STRUCT__entry( | 473 | TP_STRUCT__entry( |
473 | __field(char *, rcuname) | 474 | __field(char *, rcuname) |
474 | __field(int, callbacks_invoked) | 475 | __field(int, callbacks_invoked) |
475 | ), | 476 | ), |
476 | 477 | ||
477 | TP_fast_assign( | 478 | TP_fast_assign( |
478 | __entry->rcuname = rcuname; | 479 | __entry->rcuname = rcuname; |
479 | __entry->callbacks_invoked = callbacks_invoked; | 480 | __entry->callbacks_invoked = callbacks_invoked; |
480 | ), | 481 | ), |
481 | 482 | ||
482 | TP_printk("%s CBs-invoked=%d", | 483 | TP_printk("%s CBs-invoked=%d", |
483 | __entry->rcuname, __entry->callbacks_invoked) | 484 | __entry->rcuname, __entry->callbacks_invoked) |
484 | ); | 485 | ); |
485 | 486 | ||
486 | /* | 487 | /* |
487 | * Tracepoint for rcutorture readers. The first argument is the name | 488 | * Tracepoint for rcutorture readers. The first argument is the name |
488 | * of the RCU flavor from rcutorture's viewpoint and the second argument | 489 | * of the RCU flavor from rcutorture's viewpoint and the second argument |
489 | * is the callback address. | 490 | * is the callback address. |
490 | */ | 491 | */ |
491 | TRACE_EVENT(rcu_torture_read, | 492 | TRACE_EVENT(rcu_torture_read, |
492 | 493 | ||
493 | TP_PROTO(char *rcutorturename, struct rcu_head *rhp), | 494 | TP_PROTO(char *rcutorturename, struct rcu_head *rhp), |
494 | 495 | ||
495 | TP_ARGS(rcutorturename, rhp), | 496 | TP_ARGS(rcutorturename, rhp), |
496 | 497 | ||
497 | TP_STRUCT__entry( | 498 | TP_STRUCT__entry( |
498 | __field(char *, rcutorturename) | 499 | __field(char *, rcutorturename) |
499 | __field(struct rcu_head *, rhp) | 500 | __field(struct rcu_head *, rhp) |
500 | ), | 501 | ), |
501 | 502 | ||
502 | TP_fast_assign( | 503 | TP_fast_assign( |
503 | __entry->rcutorturename = rcutorturename; | 504 | __entry->rcutorturename = rcutorturename; |
504 | __entry->rhp = rhp; | 505 | __entry->rhp = rhp; |
505 | ), | 506 | ), |
506 | 507 | ||
507 | TP_printk("%s torture read %p", | 508 | TP_printk("%s torture read %p", |
508 | __entry->rcutorturename, __entry->rhp) | 509 | __entry->rcutorturename, __entry->rhp) |
509 | ); | 510 | ); |
510 | 511 | ||
511 | #else /* #ifdef CONFIG_RCU_TRACE */ | 512 | #else /* #ifdef CONFIG_RCU_TRACE */ |
512 | 513 | ||
513 | #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) | 514 | #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) |
514 | #define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, qsmask) do { } while (0) | 515 | #define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, qsmask) do { } while (0) |
515 | #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) | 516 | #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) |
516 | #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) | 517 | #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) |
517 | #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) | 518 | #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) |
518 | #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) | 519 | #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) |
519 | #define trace_rcu_dyntick(polarity, oldnesting, newnesting) do { } while (0) | 520 | #define trace_rcu_dyntick(polarity, oldnesting, newnesting) do { } while (0) |
520 | #define trace_rcu_prep_idle(reason) do { } while (0) | 521 | #define trace_rcu_prep_idle(reason) do { } while (0) |
521 | #define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) | 522 | #define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) |
522 | #define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) | 523 | #define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) |
523 | #define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) | 524 | #define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) |
524 | #define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) | 525 | #define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) |
525 | #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) | 526 | #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) |
526 | #define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0) | 527 | #define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0) |
527 | #define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) | 528 | #define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) |
528 | 529 | ||
529 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | 530 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ |
530 | 531 | ||
531 | #endif /* _TRACE_RCU_H */ | 532 | #endif /* _TRACE_RCU_H */ |
532 | 533 | ||
533 | /* This part must be outside protection */ | 534 | /* This part must be outside protection */ |
534 | #include <trace/define_trace.h> | 535 | #include <trace/define_trace.h> |
535 | 536 |
kernel/rcutree.c
1 | /* | 1 | /* |
2 | * Read-Copy Update mechanism for mutual exclusion | 2 | * Read-Copy Update mechanism for mutual exclusion |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or modify | 4 | * This program is free software; you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by | 5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation; either version 2 of the License, or | 6 | * the Free Software Foundation; either version 2 of the License, or |
7 | * (at your option) any later version. | 7 | * (at your option) any later version. |
8 | * | 8 | * |
9 | * This program is distributed in the hope that it will be useful, | 9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. | 12 | * GNU General Public License for more details. |
13 | * | 13 | * |
14 | * You should have received a copy of the GNU General Public License | 14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | 16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
17 | * | 17 | * |
18 | * Copyright IBM Corporation, 2008 | 18 | * Copyright IBM Corporation, 2008 |
19 | * | 19 | * |
20 | * Authors: Dipankar Sarma <dipankar@in.ibm.com> | 20 | * Authors: Dipankar Sarma <dipankar@in.ibm.com> |
21 | * Manfred Spraul <manfred@colorfullife.com> | 21 | * Manfred Spraul <manfred@colorfullife.com> |
22 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version | 22 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version |
23 | * | 23 | * |
24 | * Based on the original work by Paul McKenney <paulmck@us.ibm.com> | 24 | * Based on the original work by Paul McKenney <paulmck@us.ibm.com> |
25 | * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. | 25 | * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. |
26 | * | 26 | * |
27 | * For detailed explanation of Read-Copy Update mechanism see - | 27 | * For detailed explanation of Read-Copy Update mechanism see - |
28 | * Documentation/RCU | 28 | * Documentation/RCU |
29 | */ | 29 | */ |
30 | #include <linux/types.h> | 30 | #include <linux/types.h> |
31 | #include <linux/kernel.h> | 31 | #include <linux/kernel.h> |
32 | #include <linux/init.h> | 32 | #include <linux/init.h> |
33 | #include <linux/spinlock.h> | 33 | #include <linux/spinlock.h> |
34 | #include <linux/smp.h> | 34 | #include <linux/smp.h> |
35 | #include <linux/rcupdate.h> | 35 | #include <linux/rcupdate.h> |
36 | #include <linux/interrupt.h> | 36 | #include <linux/interrupt.h> |
37 | #include <linux/sched.h> | 37 | #include <linux/sched.h> |
38 | #include <linux/nmi.h> | 38 | #include <linux/nmi.h> |
39 | #include <linux/atomic.h> | 39 | #include <linux/atomic.h> |
40 | #include <linux/bitops.h> | 40 | #include <linux/bitops.h> |
41 | #include <linux/export.h> | 41 | #include <linux/export.h> |
42 | #include <linux/completion.h> | 42 | #include <linux/completion.h> |
43 | #include <linux/moduleparam.h> | 43 | #include <linux/moduleparam.h> |
44 | #include <linux/percpu.h> | 44 | #include <linux/percpu.h> |
45 | #include <linux/notifier.h> | 45 | #include <linux/notifier.h> |
46 | #include <linux/cpu.h> | 46 | #include <linux/cpu.h> |
47 | #include <linux/mutex.h> | 47 | #include <linux/mutex.h> |
48 | #include <linux/time.h> | 48 | #include <linux/time.h> |
49 | #include <linux/kernel_stat.h> | 49 | #include <linux/kernel_stat.h> |
50 | #include <linux/wait.h> | 50 | #include <linux/wait.h> |
51 | #include <linux/kthread.h> | 51 | #include <linux/kthread.h> |
52 | #include <linux/prefetch.h> | 52 | #include <linux/prefetch.h> |
53 | 53 | ||
54 | #include "rcutree.h" | 54 | #include "rcutree.h" |
55 | #include <trace/events/rcu.h> | 55 | #include <trace/events/rcu.h> |
56 | 56 | ||
57 | #include "rcu.h" | 57 | #include "rcu.h" |
58 | 58 | ||
59 | /* Data structures. */ | 59 | /* Data structures. */ |
60 | 60 | ||
61 | static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | 61 | static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; |
62 | 62 | ||
63 | #define RCU_STATE_INITIALIZER(structname) { \ | 63 | #define RCU_STATE_INITIALIZER(structname) { \ |
64 | .level = { &structname##_state.node[0] }, \ | 64 | .level = { &structname##_state.node[0] }, \ |
65 | .levelcnt = { \ | 65 | .levelcnt = { \ |
66 | NUM_RCU_LVL_0, /* root of hierarchy. */ \ | 66 | NUM_RCU_LVL_0, /* root of hierarchy. */ \ |
67 | NUM_RCU_LVL_1, \ | 67 | NUM_RCU_LVL_1, \ |
68 | NUM_RCU_LVL_2, \ | 68 | NUM_RCU_LVL_2, \ |
69 | NUM_RCU_LVL_3, \ | 69 | NUM_RCU_LVL_3, \ |
70 | NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ | 70 | NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ |
71 | }, \ | 71 | }, \ |
72 | .fqs_state = RCU_GP_IDLE, \ | 72 | .fqs_state = RCU_GP_IDLE, \ |
73 | .gpnum = -300, \ | 73 | .gpnum = -300, \ |
74 | .completed = -300, \ | 74 | .completed = -300, \ |
75 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ | 75 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ |
76 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ | 76 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ |
77 | .n_force_qs = 0, \ | 77 | .n_force_qs = 0, \ |
78 | .n_force_qs_ngp = 0, \ | 78 | .n_force_qs_ngp = 0, \ |
79 | .name = #structname, \ | 79 | .name = #structname, \ |
80 | } | 80 | } |
81 | 81 | ||
82 | struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); | 82 | struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); |
83 | DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); | 83 | DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); |
84 | 84 | ||
85 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh); | 85 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh); |
86 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); | 86 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); |
87 | 87 | ||
88 | static struct rcu_state *rcu_state; | 88 | static struct rcu_state *rcu_state; |
89 | 89 | ||
90 | /* | 90 | /* |
91 | * The rcu_scheduler_active variable transitions from zero to one just | 91 | * The rcu_scheduler_active variable transitions from zero to one just |
92 | * before the first task is spawned. So when this variable is zero, RCU | 92 | * before the first task is spawned. So when this variable is zero, RCU |
93 | * can assume that there is but one task, allowing RCU to (for example) | 93 | * can assume that there is but one task, allowing RCU to (for example) |
94 | * optimized synchronize_sched() to a simple barrier(). When this variable | 94 | * optimized synchronize_sched() to a simple barrier(). When this variable |
95 | * is one, RCU must actually do all the hard work required to detect real | 95 | * is one, RCU must actually do all the hard work required to detect real |
96 | * grace periods. This variable is also used to suppress boot-time false | 96 | * grace periods. This variable is also used to suppress boot-time false |
97 | * positives from lockdep-RCU error checking. | 97 | * positives from lockdep-RCU error checking. |
98 | */ | 98 | */ |
99 | int rcu_scheduler_active __read_mostly; | 99 | int rcu_scheduler_active __read_mostly; |
100 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); | 100 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); |
101 | 101 | ||
102 | /* | 102 | /* |
103 | * The rcu_scheduler_fully_active variable transitions from zero to one | 103 | * The rcu_scheduler_fully_active variable transitions from zero to one |
104 | * during the early_initcall() processing, which is after the scheduler | 104 | * during the early_initcall() processing, which is after the scheduler |
105 | * is capable of creating new tasks. So RCU processing (for example, | 105 | * is capable of creating new tasks. So RCU processing (for example, |
106 | * creating tasks for RCU priority boosting) must be delayed until after | 106 | * creating tasks for RCU priority boosting) must be delayed until after |
107 | * rcu_scheduler_fully_active transitions from zero to one. We also | 107 | * rcu_scheduler_fully_active transitions from zero to one. We also |
108 | * currently delay invocation of any RCU callbacks until after this point. | 108 | * currently delay invocation of any RCU callbacks until after this point. |
109 | * | 109 | * |
110 | * It might later prove better for people registering RCU callbacks during | 110 | * It might later prove better for people registering RCU callbacks during |
111 | * early boot to take responsibility for these callbacks, but one step at | 111 | * early boot to take responsibility for these callbacks, but one step at |
112 | * a time. | 112 | * a time. |
113 | */ | 113 | */ |
114 | static int rcu_scheduler_fully_active __read_mostly; | 114 | static int rcu_scheduler_fully_active __read_mostly; |
115 | 115 | ||
116 | #ifdef CONFIG_RCU_BOOST | 116 | #ifdef CONFIG_RCU_BOOST |
117 | 117 | ||
118 | /* | 118 | /* |
119 | * Control variables for per-CPU and per-rcu_node kthreads. These | 119 | * Control variables for per-CPU and per-rcu_node kthreads. These |
120 | * handle all flavors of RCU. | 120 | * handle all flavors of RCU. |
121 | */ | 121 | */ |
122 | static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); | 122 | static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); |
123 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | 123 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); |
124 | DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); | 124 | DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); |
125 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | 125 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); |
126 | DEFINE_PER_CPU(char, rcu_cpu_has_work); | 126 | DEFINE_PER_CPU(char, rcu_cpu_has_work); |
127 | 127 | ||
128 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 128 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
129 | 129 | ||
130 | static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); | 130 | static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); |
131 | static void invoke_rcu_core(void); | 131 | static void invoke_rcu_core(void); |
132 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); | 132 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); |
133 | 133 | ||
134 | /* | 134 | /* |
135 | * Track the rcutorture test sequence number and the update version | 135 | * Track the rcutorture test sequence number and the update version |
136 | * number within a given test. The rcutorture_testseq is incremented | 136 | * number within a given test. The rcutorture_testseq is incremented |
137 | * on every rcutorture module load and unload, so has an odd value | 137 | * on every rcutorture module load and unload, so has an odd value |
138 | * when a test is running. The rcutorture_vernum is set to zero | 138 | * when a test is running. The rcutorture_vernum is set to zero |
139 | * when rcutorture starts and is incremented on each rcutorture update. | 139 | * when rcutorture starts and is incremented on each rcutorture update. |
140 | * These variables enable correlating rcutorture output with the | 140 | * These variables enable correlating rcutorture output with the |
141 | * RCU tracing information. | 141 | * RCU tracing information. |
142 | */ | 142 | */ |
143 | unsigned long rcutorture_testseq; | 143 | unsigned long rcutorture_testseq; |
144 | unsigned long rcutorture_vernum; | 144 | unsigned long rcutorture_vernum; |
145 | 145 | ||
146 | /* | 146 | /* |
147 | * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s | 147 | * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s |
148 | * permit this function to be invoked without holding the root rcu_node | 148 | * permit this function to be invoked without holding the root rcu_node |
149 | * structure's ->lock, but of course results can be subject to change. | 149 | * structure's ->lock, but of course results can be subject to change. |
150 | */ | 150 | */ |
151 | static int rcu_gp_in_progress(struct rcu_state *rsp) | 151 | static int rcu_gp_in_progress(struct rcu_state *rsp) |
152 | { | 152 | { |
153 | return ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum); | 153 | return ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum); |
154 | } | 154 | } |
155 | 155 | ||
156 | /* | 156 | /* |
157 | * Note a quiescent state. Because we do not need to know | 157 | * Note a quiescent state. Because we do not need to know |
158 | * how many quiescent states passed, just if there was at least | 158 | * how many quiescent states passed, just if there was at least |
159 | * one since the start of the grace period, this just sets a flag. | 159 | * one since the start of the grace period, this just sets a flag. |
160 | * The caller must have disabled preemption. | 160 | * The caller must have disabled preemption. |
161 | */ | 161 | */ |
162 | void rcu_sched_qs(int cpu) | 162 | void rcu_sched_qs(int cpu) |
163 | { | 163 | { |
164 | struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); | 164 | struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); |
165 | 165 | ||
166 | rdp->passed_quiesce_gpnum = rdp->gpnum; | 166 | rdp->passed_quiesce_gpnum = rdp->gpnum; |
167 | barrier(); | 167 | barrier(); |
168 | if (rdp->passed_quiesce == 0) | 168 | if (rdp->passed_quiesce == 0) |
169 | trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs"); | 169 | trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs"); |
170 | rdp->passed_quiesce = 1; | 170 | rdp->passed_quiesce = 1; |
171 | } | 171 | } |
172 | 172 | ||
173 | void rcu_bh_qs(int cpu) | 173 | void rcu_bh_qs(int cpu) |
174 | { | 174 | { |
175 | struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); | 175 | struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); |
176 | 176 | ||
177 | rdp->passed_quiesce_gpnum = rdp->gpnum; | 177 | rdp->passed_quiesce_gpnum = rdp->gpnum; |
178 | barrier(); | 178 | barrier(); |
179 | if (rdp->passed_quiesce == 0) | 179 | if (rdp->passed_quiesce == 0) |
180 | trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs"); | 180 | trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs"); |
181 | rdp->passed_quiesce = 1; | 181 | rdp->passed_quiesce = 1; |
182 | } | 182 | } |
183 | 183 | ||
184 | /* | 184 | /* |
185 | * Note a context switch. This is a quiescent state for RCU-sched, | 185 | * Note a context switch. This is a quiescent state for RCU-sched, |
186 | * and requires special handling for preemptible RCU. | 186 | * and requires special handling for preemptible RCU. |
187 | * The caller must have disabled preemption. | 187 | * The caller must have disabled preemption. |
188 | */ | 188 | */ |
189 | void rcu_note_context_switch(int cpu) | 189 | void rcu_note_context_switch(int cpu) |
190 | { | 190 | { |
191 | trace_rcu_utilization("Start context switch"); | 191 | trace_rcu_utilization("Start context switch"); |
192 | rcu_sched_qs(cpu); | 192 | rcu_sched_qs(cpu); |
193 | rcu_preempt_note_context_switch(cpu); | 193 | rcu_preempt_note_context_switch(cpu); |
194 | trace_rcu_utilization("End context switch"); | 194 | trace_rcu_utilization("End context switch"); |
195 | } | 195 | } |
196 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); | 196 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); |
197 | 197 | ||
198 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { | 198 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { |
199 | .dynticks_nesting = DYNTICK_TASK_NESTING, | 199 | .dynticks_nesting = DYNTICK_TASK_NESTING, |
200 | .dynticks = ATOMIC_INIT(1), | 200 | .dynticks = ATOMIC_INIT(1), |
201 | }; | 201 | }; |
202 | 202 | ||
203 | static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ | 203 | static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ |
204 | static int qhimark = 10000; /* If this many pending, ignore blimit. */ | 204 | static int qhimark = 10000; /* If this many pending, ignore blimit. */ |
205 | static int qlowmark = 100; /* Once only this many pending, use blimit. */ | 205 | static int qlowmark = 100; /* Once only this many pending, use blimit. */ |
206 | 206 | ||
207 | module_param(blimit, int, 0); | 207 | module_param(blimit, int, 0); |
208 | module_param(qhimark, int, 0); | 208 | module_param(qhimark, int, 0); |
209 | module_param(qlowmark, int, 0); | 209 | module_param(qlowmark, int, 0); |
210 | 210 | ||
211 | int rcu_cpu_stall_suppress __read_mostly; | 211 | int rcu_cpu_stall_suppress __read_mostly; |
212 | module_param(rcu_cpu_stall_suppress, int, 0644); | 212 | module_param(rcu_cpu_stall_suppress, int, 0644); |
213 | 213 | ||
214 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed); | 214 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed); |
215 | static int rcu_pending(int cpu); | 215 | static int rcu_pending(int cpu); |
216 | 216 | ||
217 | /* | 217 | /* |
218 | * Return the number of RCU-sched batches processed thus far for debug & stats. | 218 | * Return the number of RCU-sched batches processed thus far for debug & stats. |
219 | */ | 219 | */ |
220 | long rcu_batches_completed_sched(void) | 220 | long rcu_batches_completed_sched(void) |
221 | { | 221 | { |
222 | return rcu_sched_state.completed; | 222 | return rcu_sched_state.completed; |
223 | } | 223 | } |
224 | EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); | 224 | EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); |
225 | 225 | ||
226 | /* | 226 | /* |
227 | * Return the number of RCU BH batches processed thus far for debug & stats. | 227 | * Return the number of RCU BH batches processed thus far for debug & stats. |
228 | */ | 228 | */ |
229 | long rcu_batches_completed_bh(void) | 229 | long rcu_batches_completed_bh(void) |
230 | { | 230 | { |
231 | return rcu_bh_state.completed; | 231 | return rcu_bh_state.completed; |
232 | } | 232 | } |
233 | EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); | 233 | EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); |
234 | 234 | ||
235 | /* | 235 | /* |
236 | * Force a quiescent state for RCU BH. | 236 | * Force a quiescent state for RCU BH. |
237 | */ | 237 | */ |
238 | void rcu_bh_force_quiescent_state(void) | 238 | void rcu_bh_force_quiescent_state(void) |
239 | { | 239 | { |
240 | force_quiescent_state(&rcu_bh_state, 0); | 240 | force_quiescent_state(&rcu_bh_state, 0); |
241 | } | 241 | } |
242 | EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); | 242 | EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); |
243 | 243 | ||
244 | /* | 244 | /* |
245 | * Record the number of times rcutorture tests have been initiated and | 245 | * Record the number of times rcutorture tests have been initiated and |
246 | * terminated. This information allows the debugfs tracing stats to be | 246 | * terminated. This information allows the debugfs tracing stats to be |
247 | * correlated to the rcutorture messages, even when the rcutorture module | 247 | * correlated to the rcutorture messages, even when the rcutorture module |
248 | * is being repeatedly loaded and unloaded. In other words, we cannot | 248 | * is being repeatedly loaded and unloaded. In other words, we cannot |
249 | * store this state in rcutorture itself. | 249 | * store this state in rcutorture itself. |
250 | */ | 250 | */ |
251 | void rcutorture_record_test_transition(void) | 251 | void rcutorture_record_test_transition(void) |
252 | { | 252 | { |
253 | rcutorture_testseq++; | 253 | rcutorture_testseq++; |
254 | rcutorture_vernum = 0; | 254 | rcutorture_vernum = 0; |
255 | } | 255 | } |
256 | EXPORT_SYMBOL_GPL(rcutorture_record_test_transition); | 256 | EXPORT_SYMBOL_GPL(rcutorture_record_test_transition); |
257 | 257 | ||
258 | /* | 258 | /* |
259 | * Record the number of writer passes through the current rcutorture test. | 259 | * Record the number of writer passes through the current rcutorture test. |
260 | * This is also used to correlate debugfs tracing stats with the rcutorture | 260 | * This is also used to correlate debugfs tracing stats with the rcutorture |
261 | * messages. | 261 | * messages. |
262 | */ | 262 | */ |
263 | void rcutorture_record_progress(unsigned long vernum) | 263 | void rcutorture_record_progress(unsigned long vernum) |
264 | { | 264 | { |
265 | rcutorture_vernum++; | 265 | rcutorture_vernum++; |
266 | } | 266 | } |
267 | EXPORT_SYMBOL_GPL(rcutorture_record_progress); | 267 | EXPORT_SYMBOL_GPL(rcutorture_record_progress); |
268 | 268 | ||
269 | /* | 269 | /* |
270 | * Force a quiescent state for RCU-sched. | 270 | * Force a quiescent state for RCU-sched. |
271 | */ | 271 | */ |
272 | void rcu_sched_force_quiescent_state(void) | 272 | void rcu_sched_force_quiescent_state(void) |
273 | { | 273 | { |
274 | force_quiescent_state(&rcu_sched_state, 0); | 274 | force_quiescent_state(&rcu_sched_state, 0); |
275 | } | 275 | } |
276 | EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); | 276 | EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); |
277 | 277 | ||
278 | /* | 278 | /* |
279 | * Does the CPU have callbacks ready to be invoked? | 279 | * Does the CPU have callbacks ready to be invoked? |
280 | */ | 280 | */ |
281 | static int | 281 | static int |
282 | cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp) | 282 | cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp) |
283 | { | 283 | { |
284 | return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]; | 284 | return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]; |
285 | } | 285 | } |
286 | 286 | ||
287 | /* | 287 | /* |
288 | * Does the current CPU require a yet-as-unscheduled grace period? | 288 | * Does the current CPU require a yet-as-unscheduled grace period? |
289 | */ | 289 | */ |
290 | static int | 290 | static int |
291 | cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) | 291 | cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) |
292 | { | 292 | { |
293 | return *rdp->nxttail[RCU_DONE_TAIL] && !rcu_gp_in_progress(rsp); | 293 | return *rdp->nxttail[RCU_DONE_TAIL] && !rcu_gp_in_progress(rsp); |
294 | } | 294 | } |
295 | 295 | ||
296 | /* | 296 | /* |
297 | * Return the root node of the specified rcu_state structure. | 297 | * Return the root node of the specified rcu_state structure. |
298 | */ | 298 | */ |
299 | static struct rcu_node *rcu_get_root(struct rcu_state *rsp) | 299 | static struct rcu_node *rcu_get_root(struct rcu_state *rsp) |
300 | { | 300 | { |
301 | return &rsp->node[0]; | 301 | return &rsp->node[0]; |
302 | } | 302 | } |
303 | 303 | ||
304 | #ifdef CONFIG_SMP | 304 | #ifdef CONFIG_SMP |
305 | 305 | ||
306 | /* | 306 | /* |
307 | * If the specified CPU is offline, tell the caller that it is in | 307 | * If the specified CPU is offline, tell the caller that it is in |
308 | * a quiescent state. Otherwise, whack it with a reschedule IPI. | 308 | * a quiescent state. Otherwise, whack it with a reschedule IPI. |
309 | * Grace periods can end up waiting on an offline CPU when that | 309 | * Grace periods can end up waiting on an offline CPU when that |
310 | * CPU is in the process of coming online -- it will be added to the | 310 | * CPU is in the process of coming online -- it will be added to the |
311 | * rcu_node bitmasks before it actually makes it online. The same thing | 311 | * rcu_node bitmasks before it actually makes it online. The same thing |
312 | * can happen while a CPU is in the process of coming online. Because this | 312 | * can happen while a CPU is in the process of coming online. Because this |
313 | * race is quite rare, we check for it after detecting that the grace | 313 | * race is quite rare, we check for it after detecting that the grace |
314 | * period has been delayed rather than checking each and every CPU | 314 | * period has been delayed rather than checking each and every CPU |
315 | * each and every time we start a new grace period. | 315 | * each and every time we start a new grace period. |
316 | */ | 316 | */ |
317 | static int rcu_implicit_offline_qs(struct rcu_data *rdp) | 317 | static int rcu_implicit_offline_qs(struct rcu_data *rdp) |
318 | { | 318 | { |
319 | /* | 319 | /* |
320 | * If the CPU is offline, it is in a quiescent state. We can | 320 | * If the CPU is offline, it is in a quiescent state. We can |
321 | * trust its state not to change because interrupts are disabled. | 321 | * trust its state not to change because interrupts are disabled. |
322 | */ | 322 | */ |
323 | if (cpu_is_offline(rdp->cpu)) { | 323 | if (cpu_is_offline(rdp->cpu)) { |
324 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); | 324 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); |
325 | rdp->offline_fqs++; | 325 | rdp->offline_fqs++; |
326 | return 1; | 326 | return 1; |
327 | } | 327 | } |
328 | 328 | ||
329 | /* | 329 | /* |
330 | * The CPU is online, so send it a reschedule IPI. This forces | 330 | * The CPU is online, so send it a reschedule IPI. This forces |
331 | * it through the scheduler, and (inefficiently) also handles cases | 331 | * it through the scheduler, and (inefficiently) also handles cases |
332 | * where idle loops fail to inform RCU about the CPU being idle. | 332 | * where idle loops fail to inform RCU about the CPU being idle. |
333 | */ | 333 | */ |
334 | if (rdp->cpu != smp_processor_id()) | 334 | if (rdp->cpu != smp_processor_id()) |
335 | smp_send_reschedule(rdp->cpu); | 335 | smp_send_reschedule(rdp->cpu); |
336 | else | 336 | else |
337 | set_need_resched(); | 337 | set_need_resched(); |
338 | rdp->resched_ipi++; | 338 | rdp->resched_ipi++; |
339 | return 0; | 339 | return 0; |
340 | } | 340 | } |
341 | 341 | ||
342 | #endif /* #ifdef CONFIG_SMP */ | 342 | #endif /* #ifdef CONFIG_SMP */ |
343 | 343 | ||
344 | /* | 344 | /* |
345 | * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle | 345 | * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle |
346 | * | 346 | * |
347 | * If the new value of the ->dynticks_nesting counter now is zero, | 347 | * If the new value of the ->dynticks_nesting counter now is zero, |
348 | * we really have entered idle, and must do the appropriate accounting. | 348 | * we really have entered idle, and must do the appropriate accounting. |
349 | * The caller must have disabled interrupts. | 349 | * The caller must have disabled interrupts. |
350 | */ | 350 | */ |
351 | static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) | 351 | static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) |
352 | { | 352 | { |
353 | if (rdtp->dynticks_nesting) { | 353 | if (rdtp->dynticks_nesting) { |
354 | trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting); | 354 | trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting); |
355 | return; | 355 | return; |
356 | } | 356 | } |
357 | trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting); | 357 | trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting); |
358 | if (!is_idle_task(current)) { | 358 | if (!is_idle_task(current)) { |
359 | struct task_struct *idle = idle_task(smp_processor_id()); | 359 | struct task_struct *idle = idle_task(smp_processor_id()); |
360 | 360 | ||
361 | trace_rcu_dyntick("Error on entry: not idle task", | 361 | trace_rcu_dyntick("Error on entry: not idle task", |
362 | oldval, rdtp->dynticks_nesting); | 362 | oldval, rdtp->dynticks_nesting); |
363 | ftrace_dump(DUMP_ALL); | 363 | ftrace_dump(DUMP_ALL); |
364 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", | 364 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", |
365 | current->pid, current->comm, | 365 | current->pid, current->comm, |
366 | idle->pid, idle->comm); /* must be idle task! */ | 366 | idle->pid, idle->comm); /* must be idle task! */ |
367 | } | 367 | } |
368 | rcu_prepare_for_idle(smp_processor_id()); | 368 | rcu_prepare_for_idle(smp_processor_id()); |
369 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ | 369 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ |
370 | smp_mb__before_atomic_inc(); /* See above. */ | 370 | smp_mb__before_atomic_inc(); /* See above. */ |
371 | atomic_inc(&rdtp->dynticks); | 371 | atomic_inc(&rdtp->dynticks); |
372 | smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ | 372 | smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ |
373 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | 373 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); |
374 | } | 374 | } |
375 | 375 | ||
376 | /** | 376 | /** |
377 | * rcu_idle_enter - inform RCU that current CPU is entering idle | 377 | * rcu_idle_enter - inform RCU that current CPU is entering idle |
378 | * | 378 | * |
379 | * Enter idle mode, in other words, -leave- the mode in which RCU | 379 | * Enter idle mode, in other words, -leave- the mode in which RCU |
380 | * read-side critical sections can occur. (Though RCU read-side | 380 | * read-side critical sections can occur. (Though RCU read-side |
381 | * critical sections can occur in irq handlers in idle, a possibility | 381 | * critical sections can occur in irq handlers in idle, a possibility |
382 | * handled by irq_enter() and irq_exit().) | 382 | * handled by irq_enter() and irq_exit().) |
383 | * | 383 | * |
384 | * We crowbar the ->dynticks_nesting field to zero to allow for | 384 | * We crowbar the ->dynticks_nesting field to zero to allow for |
385 | * the possibility of usermode upcalls having messed up our count | 385 | * the possibility of usermode upcalls having messed up our count |
386 | * of interrupt nesting level during the prior busy period. | 386 | * of interrupt nesting level during the prior busy period. |
387 | */ | 387 | */ |
388 | void rcu_idle_enter(void) | 388 | void rcu_idle_enter(void) |
389 | { | 389 | { |
390 | unsigned long flags; | 390 | unsigned long flags; |
391 | long long oldval; | 391 | long long oldval; |
392 | struct rcu_dynticks *rdtp; | 392 | struct rcu_dynticks *rdtp; |
393 | 393 | ||
394 | local_irq_save(flags); | 394 | local_irq_save(flags); |
395 | rdtp = &__get_cpu_var(rcu_dynticks); | 395 | rdtp = &__get_cpu_var(rcu_dynticks); |
396 | oldval = rdtp->dynticks_nesting; | 396 | oldval = rdtp->dynticks_nesting; |
397 | rdtp->dynticks_nesting = 0; | 397 | rdtp->dynticks_nesting = 0; |
398 | rcu_idle_enter_common(rdtp, oldval); | 398 | rcu_idle_enter_common(rdtp, oldval); |
399 | local_irq_restore(flags); | 399 | local_irq_restore(flags); |
400 | } | 400 | } |
401 | 401 | ||
402 | /** | 402 | /** |
403 | * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle | 403 | * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle |
404 | * | 404 | * |
405 | * Exit from an interrupt handler, which might possibly result in entering | 405 | * Exit from an interrupt handler, which might possibly result in entering |
406 | * idle mode, in other words, leaving the mode in which read-side critical | 406 | * idle mode, in other words, leaving the mode in which read-side critical |
407 | * sections can occur. | 407 | * sections can occur. |
408 | * | 408 | * |
409 | * This code assumes that the idle loop never does anything that might | 409 | * This code assumes that the idle loop never does anything that might |
410 | * result in unbalanced calls to irq_enter() and irq_exit(). If your | 410 | * result in unbalanced calls to irq_enter() and irq_exit(). If your |
411 | * architecture violates this assumption, RCU will give you what you | 411 | * architecture violates this assumption, RCU will give you what you |
412 | * deserve, good and hard. But very infrequently and irreproducibly. | 412 | * deserve, good and hard. But very infrequently and irreproducibly. |
413 | * | 413 | * |
414 | * Use things like work queues to work around this limitation. | 414 | * Use things like work queues to work around this limitation. |
415 | * | 415 | * |
416 | * You have been warned. | 416 | * You have been warned. |
417 | */ | 417 | */ |
418 | void rcu_irq_exit(void) | 418 | void rcu_irq_exit(void) |
419 | { | 419 | { |
420 | unsigned long flags; | 420 | unsigned long flags; |
421 | long long oldval; | 421 | long long oldval; |
422 | struct rcu_dynticks *rdtp; | 422 | struct rcu_dynticks *rdtp; |
423 | 423 | ||
424 | local_irq_save(flags); | 424 | local_irq_save(flags); |
425 | rdtp = &__get_cpu_var(rcu_dynticks); | 425 | rdtp = &__get_cpu_var(rcu_dynticks); |
426 | oldval = rdtp->dynticks_nesting; | 426 | oldval = rdtp->dynticks_nesting; |
427 | rdtp->dynticks_nesting--; | 427 | rdtp->dynticks_nesting--; |
428 | WARN_ON_ONCE(rdtp->dynticks_nesting < 0); | 428 | WARN_ON_ONCE(rdtp->dynticks_nesting < 0); |
429 | rcu_idle_enter_common(rdtp, oldval); | 429 | rcu_idle_enter_common(rdtp, oldval); |
430 | local_irq_restore(flags); | 430 | local_irq_restore(flags); |
431 | } | 431 | } |
432 | 432 | ||
433 | /* | 433 | /* |
434 | * rcu_idle_exit_common - inform RCU that current CPU is moving away from idle | 434 | * rcu_idle_exit_common - inform RCU that current CPU is moving away from idle |
435 | * | 435 | * |
436 | * If the new value of the ->dynticks_nesting counter was previously zero, | 436 | * If the new value of the ->dynticks_nesting counter was previously zero, |
437 | * we really have exited idle, and must do the appropriate accounting. | 437 | * we really have exited idle, and must do the appropriate accounting. |
438 | * The caller must have disabled interrupts. | 438 | * The caller must have disabled interrupts. |
439 | */ | 439 | */ |
440 | static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) | 440 | static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) |
441 | { | 441 | { |
442 | if (oldval) { | 442 | if (oldval) { |
443 | trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting); | 443 | trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting); |
444 | return; | 444 | return; |
445 | } | 445 | } |
446 | smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ | 446 | smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ |
447 | atomic_inc(&rdtp->dynticks); | 447 | atomic_inc(&rdtp->dynticks); |
448 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ | 448 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ |
449 | smp_mb__after_atomic_inc(); /* See above. */ | 449 | smp_mb__after_atomic_inc(); /* See above. */ |
450 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); | 450 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); |
451 | rcu_cleanup_after_idle(smp_processor_id()); | ||
451 | trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); | 452 | trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); |
452 | if (!is_idle_task(current)) { | 453 | if (!is_idle_task(current)) { |
453 | struct task_struct *idle = idle_task(smp_processor_id()); | 454 | struct task_struct *idle = idle_task(smp_processor_id()); |
454 | 455 | ||
455 | trace_rcu_dyntick("Error on exit: not idle task", | 456 | trace_rcu_dyntick("Error on exit: not idle task", |
456 | oldval, rdtp->dynticks_nesting); | 457 | oldval, rdtp->dynticks_nesting); |
457 | ftrace_dump(DUMP_ALL); | 458 | ftrace_dump(DUMP_ALL); |
458 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", | 459 | WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", |
459 | current->pid, current->comm, | 460 | current->pid, current->comm, |
460 | idle->pid, idle->comm); /* must be idle task! */ | 461 | idle->pid, idle->comm); /* must be idle task! */ |
461 | } | 462 | } |
462 | } | 463 | } |
463 | 464 | ||
464 | /** | 465 | /** |
465 | * rcu_idle_exit - inform RCU that current CPU is leaving idle | 466 | * rcu_idle_exit - inform RCU that current CPU is leaving idle |
466 | * | 467 | * |
467 | * Exit idle mode, in other words, -enter- the mode in which RCU | 468 | * Exit idle mode, in other words, -enter- the mode in which RCU |
468 | * read-side critical sections can occur. | 469 | * read-side critical sections can occur. |
469 | * | 470 | * |
470 | * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NESTING to | 471 | * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NESTING to |
471 | * allow for the possibility of usermode upcalls messing up our count | 472 | * allow for the possibility of usermode upcalls messing up our count |
472 | * of interrupt nesting level during the busy period that is just | 473 | * of interrupt nesting level during the busy period that is just |
473 | * now starting. | 474 | * now starting. |
474 | */ | 475 | */ |
475 | void rcu_idle_exit(void) | 476 | void rcu_idle_exit(void) |
476 | { | 477 | { |
477 | unsigned long flags; | 478 | unsigned long flags; |
478 | struct rcu_dynticks *rdtp; | 479 | struct rcu_dynticks *rdtp; |
479 | long long oldval; | 480 | long long oldval; |
480 | 481 | ||
481 | local_irq_save(flags); | 482 | local_irq_save(flags); |
482 | rdtp = &__get_cpu_var(rcu_dynticks); | 483 | rdtp = &__get_cpu_var(rcu_dynticks); |
483 | oldval = rdtp->dynticks_nesting; | 484 | oldval = rdtp->dynticks_nesting; |
484 | WARN_ON_ONCE(oldval != 0); | 485 | WARN_ON_ONCE(oldval != 0); |
485 | rdtp->dynticks_nesting = DYNTICK_TASK_NESTING; | 486 | rdtp->dynticks_nesting = DYNTICK_TASK_NESTING; |
486 | rcu_idle_exit_common(rdtp, oldval); | 487 | rcu_idle_exit_common(rdtp, oldval); |
487 | local_irq_restore(flags); | 488 | local_irq_restore(flags); |
488 | } | 489 | } |
489 | 490 | ||
490 | /** | 491 | /** |
491 | * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle | 492 | * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle |
492 | * | 493 | * |
493 | * Enter an interrupt handler, which might possibly result in exiting | 494 | * Enter an interrupt handler, which might possibly result in exiting |
494 | * idle mode, in other words, entering the mode in which read-side critical | 495 | * idle mode, in other words, entering the mode in which read-side critical |
495 | * sections can occur. | 496 | * sections can occur. |
496 | * | 497 | * |
497 | * Note that the Linux kernel is fully capable of entering an interrupt | 498 | * Note that the Linux kernel is fully capable of entering an interrupt |
498 | * handler that it never exits, for example when doing upcalls to | 499 | * handler that it never exits, for example when doing upcalls to |
499 | * user mode! This code assumes that the idle loop never does upcalls to | 500 | * user mode! This code assumes that the idle loop never does upcalls to |
500 | * user mode. If your architecture does do upcalls from the idle loop (or | 501 | * user mode. If your architecture does do upcalls from the idle loop (or |
501 | * does anything else that results in unbalanced calls to the irq_enter() | 502 | * does anything else that results in unbalanced calls to the irq_enter() |
502 | * and irq_exit() functions), RCU will give you what you deserve, good | 503 | * and irq_exit() functions), RCU will give you what you deserve, good |
503 | * and hard. But very infrequently and irreproducibly. | 504 | * and hard. But very infrequently and irreproducibly. |
504 | * | 505 | * |
505 | * Use things like work queues to work around this limitation. | 506 | * Use things like work queues to work around this limitation. |
506 | * | 507 | * |
507 | * You have been warned. | 508 | * You have been warned. |
508 | */ | 509 | */ |
509 | void rcu_irq_enter(void) | 510 | void rcu_irq_enter(void) |
510 | { | 511 | { |
511 | unsigned long flags; | 512 | unsigned long flags; |
512 | struct rcu_dynticks *rdtp; | 513 | struct rcu_dynticks *rdtp; |
513 | long long oldval; | 514 | long long oldval; |
514 | 515 | ||
515 | local_irq_save(flags); | 516 | local_irq_save(flags); |
516 | rdtp = &__get_cpu_var(rcu_dynticks); | 517 | rdtp = &__get_cpu_var(rcu_dynticks); |
517 | oldval = rdtp->dynticks_nesting; | 518 | oldval = rdtp->dynticks_nesting; |
518 | rdtp->dynticks_nesting++; | 519 | rdtp->dynticks_nesting++; |
519 | WARN_ON_ONCE(rdtp->dynticks_nesting == 0); | 520 | WARN_ON_ONCE(rdtp->dynticks_nesting == 0); |
520 | rcu_idle_exit_common(rdtp, oldval); | 521 | rcu_idle_exit_common(rdtp, oldval); |
521 | local_irq_restore(flags); | 522 | local_irq_restore(flags); |
522 | } | 523 | } |
523 | 524 | ||
524 | /** | 525 | /** |
525 | * rcu_nmi_enter - inform RCU of entry to NMI context | 526 | * rcu_nmi_enter - inform RCU of entry to NMI context |
526 | * | 527 | * |
527 | * If the CPU was idle with dynamic ticks active, and there is no | 528 | * If the CPU was idle with dynamic ticks active, and there is no |
528 | * irq handler running, this updates rdtp->dynticks_nmi to let the | 529 | * irq handler running, this updates rdtp->dynticks_nmi to let the |
529 | * RCU grace-period handling know that the CPU is active. | 530 | * RCU grace-period handling know that the CPU is active. |
530 | */ | 531 | */ |
531 | void rcu_nmi_enter(void) | 532 | void rcu_nmi_enter(void) |
532 | { | 533 | { |
533 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); | 534 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); |
534 | 535 | ||
535 | if (rdtp->dynticks_nmi_nesting == 0 && | 536 | if (rdtp->dynticks_nmi_nesting == 0 && |
536 | (atomic_read(&rdtp->dynticks) & 0x1)) | 537 | (atomic_read(&rdtp->dynticks) & 0x1)) |
537 | return; | 538 | return; |
538 | rdtp->dynticks_nmi_nesting++; | 539 | rdtp->dynticks_nmi_nesting++; |
539 | smp_mb__before_atomic_inc(); /* Force delay from prior write. */ | 540 | smp_mb__before_atomic_inc(); /* Force delay from prior write. */ |
540 | atomic_inc(&rdtp->dynticks); | 541 | atomic_inc(&rdtp->dynticks); |
541 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ | 542 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ |
542 | smp_mb__after_atomic_inc(); /* See above. */ | 543 | smp_mb__after_atomic_inc(); /* See above. */ |
543 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); | 544 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); |
544 | } | 545 | } |
545 | 546 | ||
546 | /** | 547 | /** |
547 | * rcu_nmi_exit - inform RCU of exit from NMI context | 548 | * rcu_nmi_exit - inform RCU of exit from NMI context |
548 | * | 549 | * |
549 | * If the CPU was idle with dynamic ticks active, and there is no | 550 | * If the CPU was idle with dynamic ticks active, and there is no |
550 | * irq handler running, this updates rdtp->dynticks_nmi to let the | 551 | * irq handler running, this updates rdtp->dynticks_nmi to let the |
551 | * RCU grace-period handling know that the CPU is no longer active. | 552 | * RCU grace-period handling know that the CPU is no longer active. |
552 | */ | 553 | */ |
553 | void rcu_nmi_exit(void) | 554 | void rcu_nmi_exit(void) |
554 | { | 555 | { |
555 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); | 556 | struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); |
556 | 557 | ||
557 | if (rdtp->dynticks_nmi_nesting == 0 || | 558 | if (rdtp->dynticks_nmi_nesting == 0 || |
558 | --rdtp->dynticks_nmi_nesting != 0) | 559 | --rdtp->dynticks_nmi_nesting != 0) |
559 | return; | 560 | return; |
560 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ | 561 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ |
561 | smp_mb__before_atomic_inc(); /* See above. */ | 562 | smp_mb__before_atomic_inc(); /* See above. */ |
562 | atomic_inc(&rdtp->dynticks); | 563 | atomic_inc(&rdtp->dynticks); |
563 | smp_mb__after_atomic_inc(); /* Force delay to next write. */ | 564 | smp_mb__after_atomic_inc(); /* Force delay to next write. */ |
564 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); | 565 | WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); |
565 | } | 566 | } |
566 | 567 | ||
567 | #ifdef CONFIG_PROVE_RCU | 568 | #ifdef CONFIG_PROVE_RCU |
568 | 569 | ||
569 | /** | 570 | /** |
570 | * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle | 571 | * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle |
571 | * | 572 | * |
572 | * If the current CPU is in its idle loop and is neither in an interrupt | 573 | * If the current CPU is in its idle loop and is neither in an interrupt |
573 | * or NMI handler, return true. | 574 | * or NMI handler, return true. |
574 | */ | 575 | */ |
575 | int rcu_is_cpu_idle(void) | 576 | int rcu_is_cpu_idle(void) |
576 | { | 577 | { |
577 | int ret; | 578 | int ret; |
578 | 579 | ||
579 | preempt_disable(); | 580 | preempt_disable(); |
580 | ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0; | 581 | ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0; |
581 | preempt_enable(); | 582 | preempt_enable(); |
582 | return ret; | 583 | return ret; |
583 | } | 584 | } |
584 | EXPORT_SYMBOL(rcu_is_cpu_idle); | 585 | EXPORT_SYMBOL(rcu_is_cpu_idle); |
585 | 586 | ||
586 | #endif /* #ifdef CONFIG_PROVE_RCU */ | 587 | #endif /* #ifdef CONFIG_PROVE_RCU */ |
587 | 588 | ||
588 | /** | 589 | /** |
589 | * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle | 590 | * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle |
590 | * | 591 | * |
591 | * If the current CPU is idle or running at a first-level (not nested) | 592 | * If the current CPU is idle or running at a first-level (not nested) |
592 | * interrupt from idle, return true. The caller must have at least | 593 | * interrupt from idle, return true. The caller must have at least |
593 | * disabled preemption. | 594 | * disabled preemption. |
594 | */ | 595 | */ |
595 | int rcu_is_cpu_rrupt_from_idle(void) | 596 | int rcu_is_cpu_rrupt_from_idle(void) |
596 | { | 597 | { |
597 | return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1; | 598 | return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1; |
598 | } | 599 | } |
599 | 600 | ||
600 | #ifdef CONFIG_SMP | 601 | #ifdef CONFIG_SMP |
601 | 602 | ||
602 | /* | 603 | /* |
603 | * Snapshot the specified CPU's dynticks counter so that we can later | 604 | * Snapshot the specified CPU's dynticks counter so that we can later |
604 | * credit them with an implicit quiescent state. Return 1 if this CPU | 605 | * credit them with an implicit quiescent state. Return 1 if this CPU |
605 | * is in dynticks idle mode, which is an extended quiescent state. | 606 | * is in dynticks idle mode, which is an extended quiescent state. |
606 | */ | 607 | */ |
607 | static int dyntick_save_progress_counter(struct rcu_data *rdp) | 608 | static int dyntick_save_progress_counter(struct rcu_data *rdp) |
608 | { | 609 | { |
609 | rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); | 610 | rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); |
610 | return (rdp->dynticks_snap & 0x1) == 0; | 611 | return (rdp->dynticks_snap & 0x1) == 0; |
611 | } | 612 | } |
612 | 613 | ||
613 | /* | 614 | /* |
614 | * Return true if the specified CPU has passed through a quiescent | 615 | * Return true if the specified CPU has passed through a quiescent |
615 | * state by virtue of being in or having passed through an dynticks | 616 | * state by virtue of being in or having passed through an dynticks |
616 | * idle state since the last call to dyntick_save_progress_counter() | 617 | * idle state since the last call to dyntick_save_progress_counter() |
617 | * for this same CPU. | 618 | * for this same CPU. |
618 | */ | 619 | */ |
619 | static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | 620 | static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) |
620 | { | 621 | { |
621 | unsigned int curr; | 622 | unsigned int curr; |
622 | unsigned int snap; | 623 | unsigned int snap; |
623 | 624 | ||
624 | curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks); | 625 | curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks); |
625 | snap = (unsigned int)rdp->dynticks_snap; | 626 | snap = (unsigned int)rdp->dynticks_snap; |
626 | 627 | ||
627 | /* | 628 | /* |
628 | * If the CPU passed through or entered a dynticks idle phase with | 629 | * If the CPU passed through or entered a dynticks idle phase with |
629 | * no active irq/NMI handlers, then we can safely pretend that the CPU | 630 | * no active irq/NMI handlers, then we can safely pretend that the CPU |
630 | * already acknowledged the request to pass through a quiescent | 631 | * already acknowledged the request to pass through a quiescent |
631 | * state. Either way, that CPU cannot possibly be in an RCU | 632 | * state. Either way, that CPU cannot possibly be in an RCU |
632 | * read-side critical section that started before the beginning | 633 | * read-side critical section that started before the beginning |
633 | * of the current RCU grace period. | 634 | * of the current RCU grace period. |
634 | */ | 635 | */ |
635 | if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) { | 636 | if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) { |
636 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti"); | 637 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti"); |
637 | rdp->dynticks_fqs++; | 638 | rdp->dynticks_fqs++; |
638 | return 1; | 639 | return 1; |
639 | } | 640 | } |
640 | 641 | ||
641 | /* Go check for the CPU being offline. */ | 642 | /* Go check for the CPU being offline. */ |
642 | return rcu_implicit_offline_qs(rdp); | 643 | return rcu_implicit_offline_qs(rdp); |
643 | } | 644 | } |
644 | 645 | ||
645 | #endif /* #ifdef CONFIG_SMP */ | 646 | #endif /* #ifdef CONFIG_SMP */ |
646 | 647 | ||
647 | int rcu_cpu_stall_suppress __read_mostly; | 648 | int rcu_cpu_stall_suppress __read_mostly; |
648 | 649 | ||
649 | static void record_gp_stall_check_time(struct rcu_state *rsp) | 650 | static void record_gp_stall_check_time(struct rcu_state *rsp) |
650 | { | 651 | { |
651 | rsp->gp_start = jiffies; | 652 | rsp->gp_start = jiffies; |
652 | rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK; | 653 | rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK; |
653 | } | 654 | } |
654 | 655 | ||
655 | static void print_other_cpu_stall(struct rcu_state *rsp) | 656 | static void print_other_cpu_stall(struct rcu_state *rsp) |
656 | { | 657 | { |
657 | int cpu; | 658 | int cpu; |
658 | long delta; | 659 | long delta; |
659 | unsigned long flags; | 660 | unsigned long flags; |
660 | int ndetected; | 661 | int ndetected; |
661 | struct rcu_node *rnp = rcu_get_root(rsp); | 662 | struct rcu_node *rnp = rcu_get_root(rsp); |
662 | 663 | ||
663 | /* Only let one CPU complain about others per time interval. */ | 664 | /* Only let one CPU complain about others per time interval. */ |
664 | 665 | ||
665 | raw_spin_lock_irqsave(&rnp->lock, flags); | 666 | raw_spin_lock_irqsave(&rnp->lock, flags); |
666 | delta = jiffies - rsp->jiffies_stall; | 667 | delta = jiffies - rsp->jiffies_stall; |
667 | if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) { | 668 | if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) { |
668 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 669 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
669 | return; | 670 | return; |
670 | } | 671 | } |
671 | rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK; | 672 | rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK; |
672 | 673 | ||
673 | /* | 674 | /* |
674 | * Now rat on any tasks that got kicked up to the root rcu_node | 675 | * Now rat on any tasks that got kicked up to the root rcu_node |
675 | * due to CPU offlining. | 676 | * due to CPU offlining. |
676 | */ | 677 | */ |
677 | ndetected = rcu_print_task_stall(rnp); | 678 | ndetected = rcu_print_task_stall(rnp); |
678 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 679 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
679 | 680 | ||
680 | /* | 681 | /* |
681 | * OK, time to rat on our buddy... | 682 | * OK, time to rat on our buddy... |
682 | * See Documentation/RCU/stallwarn.txt for info on how to debug | 683 | * See Documentation/RCU/stallwarn.txt for info on how to debug |
683 | * RCU CPU stall warnings. | 684 | * RCU CPU stall warnings. |
684 | */ | 685 | */ |
685 | printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", | 686 | printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", |
686 | rsp->name); | 687 | rsp->name); |
687 | rcu_for_each_leaf_node(rsp, rnp) { | 688 | rcu_for_each_leaf_node(rsp, rnp) { |
688 | raw_spin_lock_irqsave(&rnp->lock, flags); | 689 | raw_spin_lock_irqsave(&rnp->lock, flags); |
689 | ndetected += rcu_print_task_stall(rnp); | 690 | ndetected += rcu_print_task_stall(rnp); |
690 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 691 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
691 | if (rnp->qsmask == 0) | 692 | if (rnp->qsmask == 0) |
692 | continue; | 693 | continue; |
693 | for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) | 694 | for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) |
694 | if (rnp->qsmask & (1UL << cpu)) { | 695 | if (rnp->qsmask & (1UL << cpu)) { |
695 | printk(" %d", rnp->grplo + cpu); | 696 | printk(" %d", rnp->grplo + cpu); |
696 | ndetected++; | 697 | ndetected++; |
697 | } | 698 | } |
698 | } | 699 | } |
699 | printk("} (detected by %d, t=%ld jiffies)\n", | 700 | printk("} (detected by %d, t=%ld jiffies)\n", |
700 | smp_processor_id(), (long)(jiffies - rsp->gp_start)); | 701 | smp_processor_id(), (long)(jiffies - rsp->gp_start)); |
701 | if (ndetected == 0) | 702 | if (ndetected == 0) |
702 | printk(KERN_ERR "INFO: Stall ended before state dump start\n"); | 703 | printk(KERN_ERR "INFO: Stall ended before state dump start\n"); |
703 | else if (!trigger_all_cpu_backtrace()) | 704 | else if (!trigger_all_cpu_backtrace()) |
704 | dump_stack(); | 705 | dump_stack(); |
705 | 706 | ||
706 | /* If so configured, complain about tasks blocking the grace period. */ | 707 | /* If so configured, complain about tasks blocking the grace period. */ |
707 | 708 | ||
708 | rcu_print_detail_task_stall(rsp); | 709 | rcu_print_detail_task_stall(rsp); |
709 | 710 | ||
710 | force_quiescent_state(rsp, 0); /* Kick them all. */ | 711 | force_quiescent_state(rsp, 0); /* Kick them all. */ |
711 | } | 712 | } |
712 | 713 | ||
713 | static void print_cpu_stall(struct rcu_state *rsp) | 714 | static void print_cpu_stall(struct rcu_state *rsp) |
714 | { | 715 | { |
715 | unsigned long flags; | 716 | unsigned long flags; |
716 | struct rcu_node *rnp = rcu_get_root(rsp); | 717 | struct rcu_node *rnp = rcu_get_root(rsp); |
717 | 718 | ||
718 | /* | 719 | /* |
719 | * OK, time to rat on ourselves... | 720 | * OK, time to rat on ourselves... |
720 | * See Documentation/RCU/stallwarn.txt for info on how to debug | 721 | * See Documentation/RCU/stallwarn.txt for info on how to debug |
721 | * RCU CPU stall warnings. | 722 | * RCU CPU stall warnings. |
722 | */ | 723 | */ |
723 | printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", | 724 | printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", |
724 | rsp->name, smp_processor_id(), jiffies - rsp->gp_start); | 725 | rsp->name, smp_processor_id(), jiffies - rsp->gp_start); |
725 | if (!trigger_all_cpu_backtrace()) | 726 | if (!trigger_all_cpu_backtrace()) |
726 | dump_stack(); | 727 | dump_stack(); |
727 | 728 | ||
728 | raw_spin_lock_irqsave(&rnp->lock, flags); | 729 | raw_spin_lock_irqsave(&rnp->lock, flags); |
729 | if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) | 730 | if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) |
730 | rsp->jiffies_stall = | 731 | rsp->jiffies_stall = |
731 | jiffies + RCU_SECONDS_TILL_STALL_RECHECK; | 732 | jiffies + RCU_SECONDS_TILL_STALL_RECHECK; |
732 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 733 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
733 | 734 | ||
734 | set_need_resched(); /* kick ourselves to get things going. */ | 735 | set_need_resched(); /* kick ourselves to get things going. */ |
735 | } | 736 | } |
736 | 737 | ||
737 | static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) | 738 | static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) |
738 | { | 739 | { |
739 | unsigned long j; | 740 | unsigned long j; |
740 | unsigned long js; | 741 | unsigned long js; |
741 | struct rcu_node *rnp; | 742 | struct rcu_node *rnp; |
742 | 743 | ||
743 | if (rcu_cpu_stall_suppress) | 744 | if (rcu_cpu_stall_suppress) |
744 | return; | 745 | return; |
745 | j = ACCESS_ONCE(jiffies); | 746 | j = ACCESS_ONCE(jiffies); |
746 | js = ACCESS_ONCE(rsp->jiffies_stall); | 747 | js = ACCESS_ONCE(rsp->jiffies_stall); |
747 | rnp = rdp->mynode; | 748 | rnp = rdp->mynode; |
748 | if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) { | 749 | if ((ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) { |
749 | 750 | ||
750 | /* We haven't checked in, so go dump stack. */ | 751 | /* We haven't checked in, so go dump stack. */ |
751 | print_cpu_stall(rsp); | 752 | print_cpu_stall(rsp); |
752 | 753 | ||
753 | } else if (rcu_gp_in_progress(rsp) && | 754 | } else if (rcu_gp_in_progress(rsp) && |
754 | ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) { | 755 | ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) { |
755 | 756 | ||
756 | /* They had a few time units to dump stack, so complain. */ | 757 | /* They had a few time units to dump stack, so complain. */ |
757 | print_other_cpu_stall(rsp); | 758 | print_other_cpu_stall(rsp); |
758 | } | 759 | } |
759 | } | 760 | } |
760 | 761 | ||
761 | static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) | 762 | static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) |
762 | { | 763 | { |
763 | rcu_cpu_stall_suppress = 1; | 764 | rcu_cpu_stall_suppress = 1; |
764 | return NOTIFY_DONE; | 765 | return NOTIFY_DONE; |
765 | } | 766 | } |
766 | 767 | ||
767 | /** | 768 | /** |
768 | * rcu_cpu_stall_reset - prevent further stall warnings in current grace period | 769 | * rcu_cpu_stall_reset - prevent further stall warnings in current grace period |
769 | * | 770 | * |
770 | * Set the stall-warning timeout way off into the future, thus preventing | 771 | * Set the stall-warning timeout way off into the future, thus preventing |
771 | * any RCU CPU stall-warning messages from appearing in the current set of | 772 | * any RCU CPU stall-warning messages from appearing in the current set of |
772 | * RCU grace periods. | 773 | * RCU grace periods. |
773 | * | 774 | * |
774 | * The caller must disable hard irqs. | 775 | * The caller must disable hard irqs. |
775 | */ | 776 | */ |
776 | void rcu_cpu_stall_reset(void) | 777 | void rcu_cpu_stall_reset(void) |
777 | { | 778 | { |
778 | rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; | 779 | rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; |
779 | rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; | 780 | rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; |
780 | rcu_preempt_stall_reset(); | 781 | rcu_preempt_stall_reset(); |
781 | } | 782 | } |
782 | 783 | ||
783 | static struct notifier_block rcu_panic_block = { | 784 | static struct notifier_block rcu_panic_block = { |
784 | .notifier_call = rcu_panic, | 785 | .notifier_call = rcu_panic, |
785 | }; | 786 | }; |
786 | 787 | ||
787 | static void __init check_cpu_stall_init(void) | 788 | static void __init check_cpu_stall_init(void) |
788 | { | 789 | { |
789 | atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); | 790 | atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); |
790 | } | 791 | } |
791 | 792 | ||
792 | /* | 793 | /* |
793 | * Update CPU-local rcu_data state to record the newly noticed grace period. | 794 | * Update CPU-local rcu_data state to record the newly noticed grace period. |
794 | * This is used both when we started the grace period and when we notice | 795 | * This is used both when we started the grace period and when we notice |
795 | * that someone else started the grace period. The caller must hold the | 796 | * that someone else started the grace period. The caller must hold the |
796 | * ->lock of the leaf rcu_node structure corresponding to the current CPU, | 797 | * ->lock of the leaf rcu_node structure corresponding to the current CPU, |
797 | * and must have irqs disabled. | 798 | * and must have irqs disabled. |
798 | */ | 799 | */ |
799 | static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) | 800 | static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) |
800 | { | 801 | { |
801 | if (rdp->gpnum != rnp->gpnum) { | 802 | if (rdp->gpnum != rnp->gpnum) { |
802 | /* | 803 | /* |
803 | * If the current grace period is waiting for this CPU, | 804 | * If the current grace period is waiting for this CPU, |
804 | * set up to detect a quiescent state, otherwise don't | 805 | * set up to detect a quiescent state, otherwise don't |
805 | * go looking for one. | 806 | * go looking for one. |
806 | */ | 807 | */ |
807 | rdp->gpnum = rnp->gpnum; | 808 | rdp->gpnum = rnp->gpnum; |
808 | trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart"); | 809 | trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart"); |
809 | if (rnp->qsmask & rdp->grpmask) { | 810 | if (rnp->qsmask & rdp->grpmask) { |
810 | rdp->qs_pending = 1; | 811 | rdp->qs_pending = 1; |
811 | rdp->passed_quiesce = 0; | 812 | rdp->passed_quiesce = 0; |
812 | } else | 813 | } else |
813 | rdp->qs_pending = 0; | 814 | rdp->qs_pending = 0; |
814 | } | 815 | } |
815 | } | 816 | } |
816 | 817 | ||
817 | static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp) | 818 | static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp) |
818 | { | 819 | { |
819 | unsigned long flags; | 820 | unsigned long flags; |
820 | struct rcu_node *rnp; | 821 | struct rcu_node *rnp; |
821 | 822 | ||
822 | local_irq_save(flags); | 823 | local_irq_save(flags); |
823 | rnp = rdp->mynode; | 824 | rnp = rdp->mynode; |
824 | if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */ | 825 | if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */ |
825 | !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ | 826 | !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ |
826 | local_irq_restore(flags); | 827 | local_irq_restore(flags); |
827 | return; | 828 | return; |
828 | } | 829 | } |
829 | __note_new_gpnum(rsp, rnp, rdp); | 830 | __note_new_gpnum(rsp, rnp, rdp); |
830 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 831 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
831 | } | 832 | } |
832 | 833 | ||
833 | /* | 834 | /* |
834 | * Did someone else start a new RCU grace period start since we last | 835 | * Did someone else start a new RCU grace period start since we last |
835 | * checked? Update local state appropriately if so. Must be called | 836 | * checked? Update local state appropriately if so. Must be called |
836 | * on the CPU corresponding to rdp. | 837 | * on the CPU corresponding to rdp. |
837 | */ | 838 | */ |
838 | static int | 839 | static int |
839 | check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp) | 840 | check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp) |
840 | { | 841 | { |
841 | unsigned long flags; | 842 | unsigned long flags; |
842 | int ret = 0; | 843 | int ret = 0; |
843 | 844 | ||
844 | local_irq_save(flags); | 845 | local_irq_save(flags); |
845 | if (rdp->gpnum != rsp->gpnum) { | 846 | if (rdp->gpnum != rsp->gpnum) { |
846 | note_new_gpnum(rsp, rdp); | 847 | note_new_gpnum(rsp, rdp); |
847 | ret = 1; | 848 | ret = 1; |
848 | } | 849 | } |
849 | local_irq_restore(flags); | 850 | local_irq_restore(flags); |
850 | return ret; | 851 | return ret; |
851 | } | 852 | } |
852 | 853 | ||
853 | /* | 854 | /* |
854 | * Advance this CPU's callbacks, but only if the current grace period | 855 | * Advance this CPU's callbacks, but only if the current grace period |
855 | * has ended. This may be called only from the CPU to whom the rdp | 856 | * has ended. This may be called only from the CPU to whom the rdp |
856 | * belongs. In addition, the corresponding leaf rcu_node structure's | 857 | * belongs. In addition, the corresponding leaf rcu_node structure's |
857 | * ->lock must be held by the caller, with irqs disabled. | 858 | * ->lock must be held by the caller, with irqs disabled. |
858 | */ | 859 | */ |
859 | static void | 860 | static void |
860 | __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) | 861 | __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) |
861 | { | 862 | { |
862 | /* Did another grace period end? */ | 863 | /* Did another grace period end? */ |
863 | if (rdp->completed != rnp->completed) { | 864 | if (rdp->completed != rnp->completed) { |
864 | 865 | ||
865 | /* Advance callbacks. No harm if list empty. */ | 866 | /* Advance callbacks. No harm if list empty. */ |
866 | rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL]; | 867 | rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL]; |
867 | rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL]; | 868 | rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL]; |
868 | rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; | 869 | rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; |
869 | 870 | ||
870 | /* Remember that we saw this grace-period completion. */ | 871 | /* Remember that we saw this grace-period completion. */ |
871 | rdp->completed = rnp->completed; | 872 | rdp->completed = rnp->completed; |
872 | trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend"); | 873 | trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend"); |
873 | 874 | ||
874 | /* | 875 | /* |
875 | * If we were in an extended quiescent state, we may have | 876 | * If we were in an extended quiescent state, we may have |
876 | * missed some grace periods that others CPUs handled on | 877 | * missed some grace periods that others CPUs handled on |
877 | * our behalf. Catch up with this state to avoid noting | 878 | * our behalf. Catch up with this state to avoid noting |
878 | * spurious new grace periods. If another grace period | 879 | * spurious new grace periods. If another grace period |
879 | * has started, then rnp->gpnum will have advanced, so | 880 | * has started, then rnp->gpnum will have advanced, so |
880 | * we will detect this later on. | 881 | * we will detect this later on. |
881 | */ | 882 | */ |
882 | if (ULONG_CMP_LT(rdp->gpnum, rdp->completed)) | 883 | if (ULONG_CMP_LT(rdp->gpnum, rdp->completed)) |
883 | rdp->gpnum = rdp->completed; | 884 | rdp->gpnum = rdp->completed; |
884 | 885 | ||
885 | /* | 886 | /* |
886 | * If RCU does not need a quiescent state from this CPU, | 887 | * If RCU does not need a quiescent state from this CPU, |
887 | * then make sure that this CPU doesn't go looking for one. | 888 | * then make sure that this CPU doesn't go looking for one. |
888 | */ | 889 | */ |
889 | if ((rnp->qsmask & rdp->grpmask) == 0) | 890 | if ((rnp->qsmask & rdp->grpmask) == 0) |
890 | rdp->qs_pending = 0; | 891 | rdp->qs_pending = 0; |
891 | } | 892 | } |
892 | } | 893 | } |
893 | 894 | ||
894 | /* | 895 | /* |
895 | * Advance this CPU's callbacks, but only if the current grace period | 896 | * Advance this CPU's callbacks, but only if the current grace period |
896 | * has ended. This may be called only from the CPU to whom the rdp | 897 | * has ended. This may be called only from the CPU to whom the rdp |
897 | * belongs. | 898 | * belongs. |
898 | */ | 899 | */ |
899 | static void | 900 | static void |
900 | rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp) | 901 | rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp) |
901 | { | 902 | { |
902 | unsigned long flags; | 903 | unsigned long flags; |
903 | struct rcu_node *rnp; | 904 | struct rcu_node *rnp; |
904 | 905 | ||
905 | local_irq_save(flags); | 906 | local_irq_save(flags); |
906 | rnp = rdp->mynode; | 907 | rnp = rdp->mynode; |
907 | if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */ | 908 | if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */ |
908 | !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ | 909 | !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */ |
909 | local_irq_restore(flags); | 910 | local_irq_restore(flags); |
910 | return; | 911 | return; |
911 | } | 912 | } |
912 | __rcu_process_gp_end(rsp, rnp, rdp); | 913 | __rcu_process_gp_end(rsp, rnp, rdp); |
913 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 914 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
914 | } | 915 | } |
915 | 916 | ||
916 | /* | 917 | /* |
917 | * Do per-CPU grace-period initialization for running CPU. The caller | 918 | * Do per-CPU grace-period initialization for running CPU. The caller |
918 | * must hold the lock of the leaf rcu_node structure corresponding to | 919 | * must hold the lock of the leaf rcu_node structure corresponding to |
919 | * this CPU. | 920 | * this CPU. |
920 | */ | 921 | */ |
921 | static void | 922 | static void |
922 | rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) | 923 | rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) |
923 | { | 924 | { |
924 | /* Prior grace period ended, so advance callbacks for current CPU. */ | 925 | /* Prior grace period ended, so advance callbacks for current CPU. */ |
925 | __rcu_process_gp_end(rsp, rnp, rdp); | 926 | __rcu_process_gp_end(rsp, rnp, rdp); |
926 | 927 | ||
927 | /* | 928 | /* |
928 | * Because this CPU just now started the new grace period, we know | 929 | * Because this CPU just now started the new grace period, we know |
929 | * that all of its callbacks will be covered by this upcoming grace | 930 | * that all of its callbacks will be covered by this upcoming grace |
930 | * period, even the ones that were registered arbitrarily recently. | 931 | * period, even the ones that were registered arbitrarily recently. |
931 | * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL. | 932 | * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL. |
932 | * | 933 | * |
933 | * Other CPUs cannot be sure exactly when the grace period started. | 934 | * Other CPUs cannot be sure exactly when the grace period started. |
934 | * Therefore, their recently registered callbacks must pass through | 935 | * Therefore, their recently registered callbacks must pass through |
935 | * an additional RCU_NEXT_READY stage, so that they will be handled | 936 | * an additional RCU_NEXT_READY stage, so that they will be handled |
936 | * by the next RCU grace period. | 937 | * by the next RCU grace period. |
937 | */ | 938 | */ |
938 | rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; | 939 | rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; |
939 | rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; | 940 | rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; |
940 | 941 | ||
941 | /* Set state so that this CPU will detect the next quiescent state. */ | 942 | /* Set state so that this CPU will detect the next quiescent state. */ |
942 | __note_new_gpnum(rsp, rnp, rdp); | 943 | __note_new_gpnum(rsp, rnp, rdp); |
943 | } | 944 | } |
944 | 945 | ||
945 | /* | 946 | /* |
946 | * Start a new RCU grace period if warranted, re-initializing the hierarchy | 947 | * Start a new RCU grace period if warranted, re-initializing the hierarchy |
947 | * in preparation for detecting the next grace period. The caller must hold | 948 | * in preparation for detecting the next grace period. The caller must hold |
948 | * the root node's ->lock, which is released before return. Hard irqs must | 949 | * the root node's ->lock, which is released before return. Hard irqs must |
949 | * be disabled. | 950 | * be disabled. |
950 | */ | 951 | */ |
951 | static void | 952 | static void |
952 | rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | 953 | rcu_start_gp(struct rcu_state *rsp, unsigned long flags) |
953 | __releases(rcu_get_root(rsp)->lock) | 954 | __releases(rcu_get_root(rsp)->lock) |
954 | { | 955 | { |
955 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | 956 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
956 | struct rcu_node *rnp = rcu_get_root(rsp); | 957 | struct rcu_node *rnp = rcu_get_root(rsp); |
957 | 958 | ||
958 | if (!rcu_scheduler_fully_active || | 959 | if (!rcu_scheduler_fully_active || |
959 | !cpu_needs_another_gp(rsp, rdp)) { | 960 | !cpu_needs_another_gp(rsp, rdp)) { |
960 | /* | 961 | /* |
961 | * Either the scheduler hasn't yet spawned the first | 962 | * Either the scheduler hasn't yet spawned the first |
962 | * non-idle task or this CPU does not need another | 963 | * non-idle task or this CPU does not need another |
963 | * grace period. Either way, don't start a new grace | 964 | * grace period. Either way, don't start a new grace |
964 | * period. | 965 | * period. |
965 | */ | 966 | */ |
966 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 967 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
967 | return; | 968 | return; |
968 | } | 969 | } |
969 | 970 | ||
970 | if (rsp->fqs_active) { | 971 | if (rsp->fqs_active) { |
971 | /* | 972 | /* |
972 | * This CPU needs a grace period, but force_quiescent_state() | 973 | * This CPU needs a grace period, but force_quiescent_state() |
973 | * is running. Tell it to start one on this CPU's behalf. | 974 | * is running. Tell it to start one on this CPU's behalf. |
974 | */ | 975 | */ |
975 | rsp->fqs_need_gp = 1; | 976 | rsp->fqs_need_gp = 1; |
976 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 977 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
977 | return; | 978 | return; |
978 | } | 979 | } |
979 | 980 | ||
980 | /* Advance to a new grace period and initialize state. */ | 981 | /* Advance to a new grace period and initialize state. */ |
981 | rsp->gpnum++; | 982 | rsp->gpnum++; |
982 | trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); | 983 | trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); |
983 | WARN_ON_ONCE(rsp->fqs_state == RCU_GP_INIT); | 984 | WARN_ON_ONCE(rsp->fqs_state == RCU_GP_INIT); |
984 | rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */ | 985 | rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */ |
985 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; | 986 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; |
986 | record_gp_stall_check_time(rsp); | 987 | record_gp_stall_check_time(rsp); |
987 | 988 | ||
988 | /* Special-case the common single-level case. */ | 989 | /* Special-case the common single-level case. */ |
989 | if (NUM_RCU_NODES == 1) { | 990 | if (NUM_RCU_NODES == 1) { |
990 | rcu_preempt_check_blocked_tasks(rnp); | 991 | rcu_preempt_check_blocked_tasks(rnp); |
991 | rnp->qsmask = rnp->qsmaskinit; | 992 | rnp->qsmask = rnp->qsmaskinit; |
992 | rnp->gpnum = rsp->gpnum; | 993 | rnp->gpnum = rsp->gpnum; |
993 | rnp->completed = rsp->completed; | 994 | rnp->completed = rsp->completed; |
994 | rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state OK */ | 995 | rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state OK */ |
995 | rcu_start_gp_per_cpu(rsp, rnp, rdp); | 996 | rcu_start_gp_per_cpu(rsp, rnp, rdp); |
996 | rcu_preempt_boost_start_gp(rnp); | 997 | rcu_preempt_boost_start_gp(rnp); |
997 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, | 998 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, |
998 | rnp->level, rnp->grplo, | 999 | rnp->level, rnp->grplo, |
999 | rnp->grphi, rnp->qsmask); | 1000 | rnp->grphi, rnp->qsmask); |
1000 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1001 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1001 | return; | 1002 | return; |
1002 | } | 1003 | } |
1003 | 1004 | ||
1004 | raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */ | 1005 | raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */ |
1005 | 1006 | ||
1006 | 1007 | ||
1007 | /* Exclude any concurrent CPU-hotplug operations. */ | 1008 | /* Exclude any concurrent CPU-hotplug operations. */ |
1008 | raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ | 1009 | raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ |
1009 | 1010 | ||
1010 | /* | 1011 | /* |
1011 | * Set the quiescent-state-needed bits in all the rcu_node | 1012 | * Set the quiescent-state-needed bits in all the rcu_node |
1012 | * structures for all currently online CPUs in breadth-first | 1013 | * structures for all currently online CPUs in breadth-first |
1013 | * order, starting from the root rcu_node structure. This | 1014 | * order, starting from the root rcu_node structure. This |
1014 | * operation relies on the layout of the hierarchy within the | 1015 | * operation relies on the layout of the hierarchy within the |
1015 | * rsp->node[] array. Note that other CPUs will access only | 1016 | * rsp->node[] array. Note that other CPUs will access only |
1016 | * the leaves of the hierarchy, which still indicate that no | 1017 | * the leaves of the hierarchy, which still indicate that no |
1017 | * grace period is in progress, at least until the corresponding | 1018 | * grace period is in progress, at least until the corresponding |
1018 | * leaf node has been initialized. In addition, we have excluded | 1019 | * leaf node has been initialized. In addition, we have excluded |
1019 | * CPU-hotplug operations. | 1020 | * CPU-hotplug operations. |
1020 | * | 1021 | * |
1021 | * Note that the grace period cannot complete until we finish | 1022 | * Note that the grace period cannot complete until we finish |
1022 | * the initialization process, as there will be at least one | 1023 | * the initialization process, as there will be at least one |
1023 | * qsmask bit set in the root node until that time, namely the | 1024 | * qsmask bit set in the root node until that time, namely the |
1024 | * one corresponding to this CPU, due to the fact that we have | 1025 | * one corresponding to this CPU, due to the fact that we have |
1025 | * irqs disabled. | 1026 | * irqs disabled. |
1026 | */ | 1027 | */ |
1027 | rcu_for_each_node_breadth_first(rsp, rnp) { | 1028 | rcu_for_each_node_breadth_first(rsp, rnp) { |
1028 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 1029 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ |
1029 | rcu_preempt_check_blocked_tasks(rnp); | 1030 | rcu_preempt_check_blocked_tasks(rnp); |
1030 | rnp->qsmask = rnp->qsmaskinit; | 1031 | rnp->qsmask = rnp->qsmaskinit; |
1031 | rnp->gpnum = rsp->gpnum; | 1032 | rnp->gpnum = rsp->gpnum; |
1032 | rnp->completed = rsp->completed; | 1033 | rnp->completed = rsp->completed; |
1033 | if (rnp == rdp->mynode) | 1034 | if (rnp == rdp->mynode) |
1034 | rcu_start_gp_per_cpu(rsp, rnp, rdp); | 1035 | rcu_start_gp_per_cpu(rsp, rnp, rdp); |
1035 | rcu_preempt_boost_start_gp(rnp); | 1036 | rcu_preempt_boost_start_gp(rnp); |
1036 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, | 1037 | trace_rcu_grace_period_init(rsp->name, rnp->gpnum, |
1037 | rnp->level, rnp->grplo, | 1038 | rnp->level, rnp->grplo, |
1038 | rnp->grphi, rnp->qsmask); | 1039 | rnp->grphi, rnp->qsmask); |
1039 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1040 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
1040 | } | 1041 | } |
1041 | 1042 | ||
1042 | rnp = rcu_get_root(rsp); | 1043 | rnp = rcu_get_root(rsp); |
1043 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 1044 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ |
1044 | rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ | 1045 | rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ |
1045 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1046 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
1046 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 1047 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); |
1047 | } | 1048 | } |
1048 | 1049 | ||
1049 | /* | 1050 | /* |
1050 | * Report a full set of quiescent states to the specified rcu_state | 1051 | * Report a full set of quiescent states to the specified rcu_state |
1051 | * data structure. This involves cleaning up after the prior grace | 1052 | * data structure. This involves cleaning up after the prior grace |
1052 | * period and letting rcu_start_gp() start up the next grace period | 1053 | * period and letting rcu_start_gp() start up the next grace period |
1053 | * if one is needed. Note that the caller must hold rnp->lock, as | 1054 | * if one is needed. Note that the caller must hold rnp->lock, as |
1054 | * required by rcu_start_gp(), which will release it. | 1055 | * required by rcu_start_gp(), which will release it. |
1055 | */ | 1056 | */ |
1056 | static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | 1057 | static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) |
1057 | __releases(rcu_get_root(rsp)->lock) | 1058 | __releases(rcu_get_root(rsp)->lock) |
1058 | { | 1059 | { |
1059 | unsigned long gp_duration; | 1060 | unsigned long gp_duration; |
1060 | struct rcu_node *rnp = rcu_get_root(rsp); | 1061 | struct rcu_node *rnp = rcu_get_root(rsp); |
1061 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | 1062 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
1062 | 1063 | ||
1063 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); | 1064 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); |
1064 | 1065 | ||
1065 | /* | 1066 | /* |
1066 | * Ensure that all grace-period and pre-grace-period activity | 1067 | * Ensure that all grace-period and pre-grace-period activity |
1067 | * is seen before the assignment to rsp->completed. | 1068 | * is seen before the assignment to rsp->completed. |
1068 | */ | 1069 | */ |
1069 | smp_mb(); /* See above block comment. */ | 1070 | smp_mb(); /* See above block comment. */ |
1070 | gp_duration = jiffies - rsp->gp_start; | 1071 | gp_duration = jiffies - rsp->gp_start; |
1071 | if (gp_duration > rsp->gp_max) | 1072 | if (gp_duration > rsp->gp_max) |
1072 | rsp->gp_max = gp_duration; | 1073 | rsp->gp_max = gp_duration; |
1073 | 1074 | ||
1074 | /* | 1075 | /* |
1075 | * We know the grace period is complete, but to everyone else | 1076 | * We know the grace period is complete, but to everyone else |
1076 | * it appears to still be ongoing. But it is also the case | 1077 | * it appears to still be ongoing. But it is also the case |
1077 | * that to everyone else it looks like there is nothing that | 1078 | * that to everyone else it looks like there is nothing that |
1078 | * they can do to advance the grace period. It is therefore | 1079 | * they can do to advance the grace period. It is therefore |
1079 | * safe for us to drop the lock in order to mark the grace | 1080 | * safe for us to drop the lock in order to mark the grace |
1080 | * period as completed in all of the rcu_node structures. | 1081 | * period as completed in all of the rcu_node structures. |
1081 | * | 1082 | * |
1082 | * But if this CPU needs another grace period, it will take | 1083 | * But if this CPU needs another grace period, it will take |
1083 | * care of this while initializing the next grace period. | 1084 | * care of this while initializing the next grace period. |
1084 | * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL | 1085 | * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL |
1085 | * because the callbacks have not yet been advanced: Those | 1086 | * because the callbacks have not yet been advanced: Those |
1086 | * callbacks are waiting on the grace period that just now | 1087 | * callbacks are waiting on the grace period that just now |
1087 | * completed. | 1088 | * completed. |
1088 | */ | 1089 | */ |
1089 | if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) { | 1090 | if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) { |
1090 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1091 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
1091 | 1092 | ||
1092 | /* | 1093 | /* |
1093 | * Propagate new ->completed value to rcu_node structures | 1094 | * Propagate new ->completed value to rcu_node structures |
1094 | * so that other CPUs don't have to wait until the start | 1095 | * so that other CPUs don't have to wait until the start |
1095 | * of the next grace period to process their callbacks. | 1096 | * of the next grace period to process their callbacks. |
1096 | */ | 1097 | */ |
1097 | rcu_for_each_node_breadth_first(rsp, rnp) { | 1098 | rcu_for_each_node_breadth_first(rsp, rnp) { |
1098 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 1099 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ |
1099 | rnp->completed = rsp->gpnum; | 1100 | rnp->completed = rsp->gpnum; |
1100 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1101 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
1101 | } | 1102 | } |
1102 | rnp = rcu_get_root(rsp); | 1103 | rnp = rcu_get_root(rsp); |
1103 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 1104 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ |
1104 | } | 1105 | } |
1105 | 1106 | ||
1106 | rsp->completed = rsp->gpnum; /* Declare the grace period complete. */ | 1107 | rsp->completed = rsp->gpnum; /* Declare the grace period complete. */ |
1107 | trace_rcu_grace_period(rsp->name, rsp->completed, "end"); | 1108 | trace_rcu_grace_period(rsp->name, rsp->completed, "end"); |
1108 | rsp->fqs_state = RCU_GP_IDLE; | 1109 | rsp->fqs_state = RCU_GP_IDLE; |
1109 | rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ | 1110 | rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ |
1110 | } | 1111 | } |
1111 | 1112 | ||
1112 | /* | 1113 | /* |
1113 | * Similar to rcu_report_qs_rdp(), for which it is a helper function. | 1114 | * Similar to rcu_report_qs_rdp(), for which it is a helper function. |
1114 | * Allows quiescent states for a group of CPUs to be reported at one go | 1115 | * Allows quiescent states for a group of CPUs to be reported at one go |
1115 | * to the specified rcu_node structure, though all the CPUs in the group | 1116 | * to the specified rcu_node structure, though all the CPUs in the group |
1116 | * must be represented by the same rcu_node structure (which need not be | 1117 | * must be represented by the same rcu_node structure (which need not be |
1117 | * a leaf rcu_node structure, though it often will be). That structure's | 1118 | * a leaf rcu_node structure, though it often will be). That structure's |
1118 | * lock must be held upon entry, and it is released before return. | 1119 | * lock must be held upon entry, and it is released before return. |
1119 | */ | 1120 | */ |
1120 | static void | 1121 | static void |
1121 | rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, | 1122 | rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, |
1122 | struct rcu_node *rnp, unsigned long flags) | 1123 | struct rcu_node *rnp, unsigned long flags) |
1123 | __releases(rnp->lock) | 1124 | __releases(rnp->lock) |
1124 | { | 1125 | { |
1125 | struct rcu_node *rnp_c; | 1126 | struct rcu_node *rnp_c; |
1126 | 1127 | ||
1127 | /* Walk up the rcu_node hierarchy. */ | 1128 | /* Walk up the rcu_node hierarchy. */ |
1128 | for (;;) { | 1129 | for (;;) { |
1129 | if (!(rnp->qsmask & mask)) { | 1130 | if (!(rnp->qsmask & mask)) { |
1130 | 1131 | ||
1131 | /* Our bit has already been cleared, so done. */ | 1132 | /* Our bit has already been cleared, so done. */ |
1132 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1133 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1133 | return; | 1134 | return; |
1134 | } | 1135 | } |
1135 | rnp->qsmask &= ~mask; | 1136 | rnp->qsmask &= ~mask; |
1136 | trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, | 1137 | trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, |
1137 | mask, rnp->qsmask, rnp->level, | 1138 | mask, rnp->qsmask, rnp->level, |
1138 | rnp->grplo, rnp->grphi, | 1139 | rnp->grplo, rnp->grphi, |
1139 | !!rnp->gp_tasks); | 1140 | !!rnp->gp_tasks); |
1140 | if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { | 1141 | if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { |
1141 | 1142 | ||
1142 | /* Other bits still set at this level, so done. */ | 1143 | /* Other bits still set at this level, so done. */ |
1143 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1144 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1144 | return; | 1145 | return; |
1145 | } | 1146 | } |
1146 | mask = rnp->grpmask; | 1147 | mask = rnp->grpmask; |
1147 | if (rnp->parent == NULL) { | 1148 | if (rnp->parent == NULL) { |
1148 | 1149 | ||
1149 | /* No more levels. Exit loop holding root lock. */ | 1150 | /* No more levels. Exit loop holding root lock. */ |
1150 | 1151 | ||
1151 | break; | 1152 | break; |
1152 | } | 1153 | } |
1153 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1154 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1154 | rnp_c = rnp; | 1155 | rnp_c = rnp; |
1155 | rnp = rnp->parent; | 1156 | rnp = rnp->parent; |
1156 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1157 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1157 | WARN_ON_ONCE(rnp_c->qsmask); | 1158 | WARN_ON_ONCE(rnp_c->qsmask); |
1158 | } | 1159 | } |
1159 | 1160 | ||
1160 | /* | 1161 | /* |
1161 | * Get here if we are the last CPU to pass through a quiescent | 1162 | * Get here if we are the last CPU to pass through a quiescent |
1162 | * state for this grace period. Invoke rcu_report_qs_rsp() | 1163 | * state for this grace period. Invoke rcu_report_qs_rsp() |
1163 | * to clean up and start the next grace period if one is needed. | 1164 | * to clean up and start the next grace period if one is needed. |
1164 | */ | 1165 | */ |
1165 | rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */ | 1166 | rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */ |
1166 | } | 1167 | } |
1167 | 1168 | ||
1168 | /* | 1169 | /* |
1169 | * Record a quiescent state for the specified CPU to that CPU's rcu_data | 1170 | * Record a quiescent state for the specified CPU to that CPU's rcu_data |
1170 | * structure. This must be either called from the specified CPU, or | 1171 | * structure. This must be either called from the specified CPU, or |
1171 | * called when the specified CPU is known to be offline (and when it is | 1172 | * called when the specified CPU is known to be offline (and when it is |
1172 | * also known that no other CPU is concurrently trying to help the offline | 1173 | * also known that no other CPU is concurrently trying to help the offline |
1173 | * CPU). The lastcomp argument is used to make sure we are still in the | 1174 | * CPU). The lastcomp argument is used to make sure we are still in the |
1174 | * grace period of interest. We don't want to end the current grace period | 1175 | * grace period of interest. We don't want to end the current grace period |
1175 | * based on quiescent states detected in an earlier grace period! | 1176 | * based on quiescent states detected in an earlier grace period! |
1176 | */ | 1177 | */ |
1177 | static void | 1178 | static void |
1178 | rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastgp) | 1179 | rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastgp) |
1179 | { | 1180 | { |
1180 | unsigned long flags; | 1181 | unsigned long flags; |
1181 | unsigned long mask; | 1182 | unsigned long mask; |
1182 | struct rcu_node *rnp; | 1183 | struct rcu_node *rnp; |
1183 | 1184 | ||
1184 | rnp = rdp->mynode; | 1185 | rnp = rdp->mynode; |
1185 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1186 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1186 | if (lastgp != rnp->gpnum || rnp->completed == rnp->gpnum) { | 1187 | if (lastgp != rnp->gpnum || rnp->completed == rnp->gpnum) { |
1187 | 1188 | ||
1188 | /* | 1189 | /* |
1189 | * The grace period in which this quiescent state was | 1190 | * The grace period in which this quiescent state was |
1190 | * recorded has ended, so don't report it upwards. | 1191 | * recorded has ended, so don't report it upwards. |
1191 | * We will instead need a new quiescent state that lies | 1192 | * We will instead need a new quiescent state that lies |
1192 | * within the current grace period. | 1193 | * within the current grace period. |
1193 | */ | 1194 | */ |
1194 | rdp->passed_quiesce = 0; /* need qs for new gp. */ | 1195 | rdp->passed_quiesce = 0; /* need qs for new gp. */ |
1195 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1196 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1196 | return; | 1197 | return; |
1197 | } | 1198 | } |
1198 | mask = rdp->grpmask; | 1199 | mask = rdp->grpmask; |
1199 | if ((rnp->qsmask & mask) == 0) { | 1200 | if ((rnp->qsmask & mask) == 0) { |
1200 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1201 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1201 | } else { | 1202 | } else { |
1202 | rdp->qs_pending = 0; | 1203 | rdp->qs_pending = 0; |
1203 | 1204 | ||
1204 | /* | 1205 | /* |
1205 | * This GP can't end until cpu checks in, so all of our | 1206 | * This GP can't end until cpu checks in, so all of our |
1206 | * callbacks can be processed during the next GP. | 1207 | * callbacks can be processed during the next GP. |
1207 | */ | 1208 | */ |
1208 | rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; | 1209 | rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; |
1209 | 1210 | ||
1210 | rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */ | 1211 | rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */ |
1211 | } | 1212 | } |
1212 | } | 1213 | } |
1213 | 1214 | ||
1214 | /* | 1215 | /* |
1215 | * Check to see if there is a new grace period of which this CPU | 1216 | * Check to see if there is a new grace period of which this CPU |
1216 | * is not yet aware, and if so, set up local rcu_data state for it. | 1217 | * is not yet aware, and if so, set up local rcu_data state for it. |
1217 | * Otherwise, see if this CPU has just passed through its first | 1218 | * Otherwise, see if this CPU has just passed through its first |
1218 | * quiescent state for this grace period, and record that fact if so. | 1219 | * quiescent state for this grace period, and record that fact if so. |
1219 | */ | 1220 | */ |
1220 | static void | 1221 | static void |
1221 | rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | 1222 | rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) |
1222 | { | 1223 | { |
1223 | /* If there is now a new grace period, record and return. */ | 1224 | /* If there is now a new grace period, record and return. */ |
1224 | if (check_for_new_grace_period(rsp, rdp)) | 1225 | if (check_for_new_grace_period(rsp, rdp)) |
1225 | return; | 1226 | return; |
1226 | 1227 | ||
1227 | /* | 1228 | /* |
1228 | * Does this CPU still need to do its part for current grace period? | 1229 | * Does this CPU still need to do its part for current grace period? |
1229 | * If no, return and let the other CPUs do their part as well. | 1230 | * If no, return and let the other CPUs do their part as well. |
1230 | */ | 1231 | */ |
1231 | if (!rdp->qs_pending) | 1232 | if (!rdp->qs_pending) |
1232 | return; | 1233 | return; |
1233 | 1234 | ||
1234 | /* | 1235 | /* |
1235 | * Was there a quiescent state since the beginning of the grace | 1236 | * Was there a quiescent state since the beginning of the grace |
1236 | * period? If no, then exit and wait for the next call. | 1237 | * period? If no, then exit and wait for the next call. |
1237 | */ | 1238 | */ |
1238 | if (!rdp->passed_quiesce) | 1239 | if (!rdp->passed_quiesce) |
1239 | return; | 1240 | return; |
1240 | 1241 | ||
1241 | /* | 1242 | /* |
1242 | * Tell RCU we are done (but rcu_report_qs_rdp() will be the | 1243 | * Tell RCU we are done (but rcu_report_qs_rdp() will be the |
1243 | * judge of that). | 1244 | * judge of that). |
1244 | */ | 1245 | */ |
1245 | rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesce_gpnum); | 1246 | rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesce_gpnum); |
1246 | } | 1247 | } |
1247 | 1248 | ||
1248 | #ifdef CONFIG_HOTPLUG_CPU | 1249 | #ifdef CONFIG_HOTPLUG_CPU |
1249 | 1250 | ||
1250 | /* | 1251 | /* |
1251 | * Move a dying CPU's RCU callbacks to online CPU's callback list. | 1252 | * Move a dying CPU's RCU callbacks to online CPU's callback list. |
1252 | * Synchronization is not required because this function executes | 1253 | * Synchronization is not required because this function executes |
1253 | * in stop_machine() context. | 1254 | * in stop_machine() context. |
1254 | */ | 1255 | */ |
1255 | static void rcu_send_cbs_to_online(struct rcu_state *rsp) | 1256 | static void rcu_send_cbs_to_online(struct rcu_state *rsp) |
1256 | { | 1257 | { |
1257 | int i; | 1258 | int i; |
1258 | /* current DYING CPU is cleared in the cpu_online_mask */ | 1259 | /* current DYING CPU is cleared in the cpu_online_mask */ |
1259 | int receive_cpu = cpumask_any(cpu_online_mask); | 1260 | int receive_cpu = cpumask_any(cpu_online_mask); |
1260 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | 1261 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
1261 | struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); | 1262 | struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); |
1262 | 1263 | ||
1263 | if (rdp->nxtlist == NULL) | 1264 | if (rdp->nxtlist == NULL) |
1264 | return; /* irqs disabled, so comparison is stable. */ | 1265 | return; /* irqs disabled, so comparison is stable. */ |
1265 | 1266 | ||
1266 | *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; | 1267 | *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; |
1267 | receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; | 1268 | receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; |
1268 | receive_rdp->qlen += rdp->qlen; | 1269 | receive_rdp->qlen += rdp->qlen; |
1269 | receive_rdp->n_cbs_adopted += rdp->qlen; | 1270 | receive_rdp->n_cbs_adopted += rdp->qlen; |
1270 | rdp->n_cbs_orphaned += rdp->qlen; | 1271 | rdp->n_cbs_orphaned += rdp->qlen; |
1271 | 1272 | ||
1272 | rdp->nxtlist = NULL; | 1273 | rdp->nxtlist = NULL; |
1273 | for (i = 0; i < RCU_NEXT_SIZE; i++) | 1274 | for (i = 0; i < RCU_NEXT_SIZE; i++) |
1274 | rdp->nxttail[i] = &rdp->nxtlist; | 1275 | rdp->nxttail[i] = &rdp->nxtlist; |
1275 | rdp->qlen = 0; | 1276 | rdp->qlen = 0; |
1276 | } | 1277 | } |
1277 | 1278 | ||
1278 | /* | 1279 | /* |
1279 | * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy | 1280 | * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy |
1280 | * and move all callbacks from the outgoing CPU to the current one. | 1281 | * and move all callbacks from the outgoing CPU to the current one. |
1281 | * There can only be one CPU hotplug operation at a time, so no other | 1282 | * There can only be one CPU hotplug operation at a time, so no other |
1282 | * CPU can be attempting to update rcu_cpu_kthread_task. | 1283 | * CPU can be attempting to update rcu_cpu_kthread_task. |
1283 | */ | 1284 | */ |
1284 | static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | 1285 | static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) |
1285 | { | 1286 | { |
1286 | unsigned long flags; | 1287 | unsigned long flags; |
1287 | unsigned long mask; | 1288 | unsigned long mask; |
1288 | int need_report = 0; | 1289 | int need_report = 0; |
1289 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 1290 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
1290 | struct rcu_node *rnp; | 1291 | struct rcu_node *rnp; |
1291 | 1292 | ||
1292 | rcu_stop_cpu_kthread(cpu); | 1293 | rcu_stop_cpu_kthread(cpu); |
1293 | 1294 | ||
1294 | /* Exclude any attempts to start a new grace period. */ | 1295 | /* Exclude any attempts to start a new grace period. */ |
1295 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | 1296 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
1296 | 1297 | ||
1297 | /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ | 1298 | /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ |
1298 | rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */ | 1299 | rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */ |
1299 | mask = rdp->grpmask; /* rnp->grplo is constant. */ | 1300 | mask = rdp->grpmask; /* rnp->grplo is constant. */ |
1300 | do { | 1301 | do { |
1301 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 1302 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ |
1302 | rnp->qsmaskinit &= ~mask; | 1303 | rnp->qsmaskinit &= ~mask; |
1303 | if (rnp->qsmaskinit != 0) { | 1304 | if (rnp->qsmaskinit != 0) { |
1304 | if (rnp != rdp->mynode) | 1305 | if (rnp != rdp->mynode) |
1305 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1306 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
1306 | else | 1307 | else |
1307 | trace_rcu_grace_period(rsp->name, | 1308 | trace_rcu_grace_period(rsp->name, |
1308 | rnp->gpnum + 1 - | 1309 | rnp->gpnum + 1 - |
1309 | !!(rnp->qsmask & mask), | 1310 | !!(rnp->qsmask & mask), |
1310 | "cpuofl"); | 1311 | "cpuofl"); |
1311 | break; | 1312 | break; |
1312 | } | 1313 | } |
1313 | if (rnp == rdp->mynode) { | 1314 | if (rnp == rdp->mynode) { |
1314 | trace_rcu_grace_period(rsp->name, | 1315 | trace_rcu_grace_period(rsp->name, |
1315 | rnp->gpnum + 1 - | 1316 | rnp->gpnum + 1 - |
1316 | !!(rnp->qsmask & mask), | 1317 | !!(rnp->qsmask & mask), |
1317 | "cpuofl"); | 1318 | "cpuofl"); |
1318 | need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); | 1319 | need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); |
1319 | } else | 1320 | } else |
1320 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1321 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
1321 | mask = rnp->grpmask; | 1322 | mask = rnp->grpmask; |
1322 | rnp = rnp->parent; | 1323 | rnp = rnp->parent; |
1323 | } while (rnp != NULL); | 1324 | } while (rnp != NULL); |
1324 | 1325 | ||
1325 | /* | 1326 | /* |
1326 | * We still hold the leaf rcu_node structure lock here, and | 1327 | * We still hold the leaf rcu_node structure lock here, and |
1327 | * irqs are still disabled. The reason for this subterfuge is | 1328 | * irqs are still disabled. The reason for this subterfuge is |
1328 | * because invoking rcu_report_unblock_qs_rnp() with ->onofflock | 1329 | * because invoking rcu_report_unblock_qs_rnp() with ->onofflock |
1329 | * held leads to deadlock. | 1330 | * held leads to deadlock. |
1330 | */ | 1331 | */ |
1331 | raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ | 1332 | raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ |
1332 | rnp = rdp->mynode; | 1333 | rnp = rdp->mynode; |
1333 | if (need_report & RCU_OFL_TASKS_NORM_GP) | 1334 | if (need_report & RCU_OFL_TASKS_NORM_GP) |
1334 | rcu_report_unblock_qs_rnp(rnp, flags); | 1335 | rcu_report_unblock_qs_rnp(rnp, flags); |
1335 | else | 1336 | else |
1336 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1337 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1337 | if (need_report & RCU_OFL_TASKS_EXP_GP) | 1338 | if (need_report & RCU_OFL_TASKS_EXP_GP) |
1338 | rcu_report_exp_rnp(rsp, rnp, true); | 1339 | rcu_report_exp_rnp(rsp, rnp, true); |
1339 | rcu_node_kthread_setaffinity(rnp, -1); | 1340 | rcu_node_kthread_setaffinity(rnp, -1); |
1340 | } | 1341 | } |
1341 | 1342 | ||
1342 | /* | 1343 | /* |
1343 | * Remove the specified CPU from the RCU hierarchy and move any pending | 1344 | * Remove the specified CPU from the RCU hierarchy and move any pending |
1344 | * callbacks that it might have to the current CPU. This code assumes | 1345 | * callbacks that it might have to the current CPU. This code assumes |
1345 | * that at least one CPU in the system will remain running at all times. | 1346 | * that at least one CPU in the system will remain running at all times. |
1346 | * Any attempt to offline -all- CPUs is likely to strand RCU callbacks. | 1347 | * Any attempt to offline -all- CPUs is likely to strand RCU callbacks. |
1347 | */ | 1348 | */ |
1348 | static void rcu_offline_cpu(int cpu) | 1349 | static void rcu_offline_cpu(int cpu) |
1349 | { | 1350 | { |
1350 | __rcu_offline_cpu(cpu, &rcu_sched_state); | 1351 | __rcu_offline_cpu(cpu, &rcu_sched_state); |
1351 | __rcu_offline_cpu(cpu, &rcu_bh_state); | 1352 | __rcu_offline_cpu(cpu, &rcu_bh_state); |
1352 | rcu_preempt_offline_cpu(cpu); | 1353 | rcu_preempt_offline_cpu(cpu); |
1353 | } | 1354 | } |
1354 | 1355 | ||
1355 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 1356 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |
1356 | 1357 | ||
1357 | static void rcu_send_cbs_to_online(struct rcu_state *rsp) | 1358 | static void rcu_send_cbs_to_online(struct rcu_state *rsp) |
1358 | { | 1359 | { |
1359 | } | 1360 | } |
1360 | 1361 | ||
1361 | static void rcu_offline_cpu(int cpu) | 1362 | static void rcu_offline_cpu(int cpu) |
1362 | { | 1363 | { |
1363 | } | 1364 | } |
1364 | 1365 | ||
1365 | #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ | 1366 | #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ |
1366 | 1367 | ||
1367 | /* | 1368 | /* |
1368 | * Invoke any RCU callbacks that have made it to the end of their grace | 1369 | * Invoke any RCU callbacks that have made it to the end of their grace |
1369 | * period. Thottle as specified by rdp->blimit. | 1370 | * period. Thottle as specified by rdp->blimit. |
1370 | */ | 1371 | */ |
1371 | static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | 1372 | static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) |
1372 | { | 1373 | { |
1373 | unsigned long flags; | 1374 | unsigned long flags; |
1374 | struct rcu_head *next, *list, **tail; | 1375 | struct rcu_head *next, *list, **tail; |
1375 | int bl, count; | 1376 | int bl, count; |
1376 | 1377 | ||
1377 | /* If no callbacks are ready, just return.*/ | 1378 | /* If no callbacks are ready, just return.*/ |
1378 | if (!cpu_has_callbacks_ready_to_invoke(rdp)) { | 1379 | if (!cpu_has_callbacks_ready_to_invoke(rdp)) { |
1379 | trace_rcu_batch_start(rsp->name, 0, 0); | 1380 | trace_rcu_batch_start(rsp->name, 0, 0); |
1380 | trace_rcu_batch_end(rsp->name, 0); | 1381 | trace_rcu_batch_end(rsp->name, 0); |
1381 | return; | 1382 | return; |
1382 | } | 1383 | } |
1383 | 1384 | ||
1384 | /* | 1385 | /* |
1385 | * Extract the list of ready callbacks, disabling to prevent | 1386 | * Extract the list of ready callbacks, disabling to prevent |
1386 | * races with call_rcu() from interrupt handlers. | 1387 | * races with call_rcu() from interrupt handlers. |
1387 | */ | 1388 | */ |
1388 | local_irq_save(flags); | 1389 | local_irq_save(flags); |
1389 | bl = rdp->blimit; | 1390 | bl = rdp->blimit; |
1390 | trace_rcu_batch_start(rsp->name, rdp->qlen, bl); | 1391 | trace_rcu_batch_start(rsp->name, rdp->qlen, bl); |
1391 | list = rdp->nxtlist; | 1392 | list = rdp->nxtlist; |
1392 | rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; | 1393 | rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; |
1393 | *rdp->nxttail[RCU_DONE_TAIL] = NULL; | 1394 | *rdp->nxttail[RCU_DONE_TAIL] = NULL; |
1394 | tail = rdp->nxttail[RCU_DONE_TAIL]; | 1395 | tail = rdp->nxttail[RCU_DONE_TAIL]; |
1395 | for (count = RCU_NEXT_SIZE - 1; count >= 0; count--) | 1396 | for (count = RCU_NEXT_SIZE - 1; count >= 0; count--) |
1396 | if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL]) | 1397 | if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL]) |
1397 | rdp->nxttail[count] = &rdp->nxtlist; | 1398 | rdp->nxttail[count] = &rdp->nxtlist; |
1398 | local_irq_restore(flags); | 1399 | local_irq_restore(flags); |
1399 | 1400 | ||
1400 | /* Invoke callbacks. */ | 1401 | /* Invoke callbacks. */ |
1401 | count = 0; | 1402 | count = 0; |
1402 | while (list) { | 1403 | while (list) { |
1403 | next = list->next; | 1404 | next = list->next; |
1404 | prefetch(next); | 1405 | prefetch(next); |
1405 | debug_rcu_head_unqueue(list); | 1406 | debug_rcu_head_unqueue(list); |
1406 | __rcu_reclaim(rsp->name, list); | 1407 | __rcu_reclaim(rsp->name, list); |
1407 | list = next; | 1408 | list = next; |
1408 | if (++count >= bl) | 1409 | if (++count >= bl) |
1409 | break; | 1410 | break; |
1410 | } | 1411 | } |
1411 | 1412 | ||
1412 | local_irq_save(flags); | 1413 | local_irq_save(flags); |
1413 | trace_rcu_batch_end(rsp->name, count); | 1414 | trace_rcu_batch_end(rsp->name, count); |
1414 | 1415 | ||
1415 | /* Update count, and requeue any remaining callbacks. */ | 1416 | /* Update count, and requeue any remaining callbacks. */ |
1416 | rdp->qlen -= count; | 1417 | rdp->qlen -= count; |
1417 | rdp->n_cbs_invoked += count; | 1418 | rdp->n_cbs_invoked += count; |
1418 | if (list != NULL) { | 1419 | if (list != NULL) { |
1419 | *tail = rdp->nxtlist; | 1420 | *tail = rdp->nxtlist; |
1420 | rdp->nxtlist = list; | 1421 | rdp->nxtlist = list; |
1421 | for (count = 0; count < RCU_NEXT_SIZE; count++) | 1422 | for (count = 0; count < RCU_NEXT_SIZE; count++) |
1422 | if (&rdp->nxtlist == rdp->nxttail[count]) | 1423 | if (&rdp->nxtlist == rdp->nxttail[count]) |
1423 | rdp->nxttail[count] = tail; | 1424 | rdp->nxttail[count] = tail; |
1424 | else | 1425 | else |
1425 | break; | 1426 | break; |
1426 | } | 1427 | } |
1427 | 1428 | ||
1428 | /* Reinstate batch limit if we have worked down the excess. */ | 1429 | /* Reinstate batch limit if we have worked down the excess. */ |
1429 | if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) | 1430 | if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) |
1430 | rdp->blimit = blimit; | 1431 | rdp->blimit = blimit; |
1431 | 1432 | ||
1432 | /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ | 1433 | /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ |
1433 | if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) { | 1434 | if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) { |
1434 | rdp->qlen_last_fqs_check = 0; | 1435 | rdp->qlen_last_fqs_check = 0; |
1435 | rdp->n_force_qs_snap = rsp->n_force_qs; | 1436 | rdp->n_force_qs_snap = rsp->n_force_qs; |
1436 | } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) | 1437 | } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) |
1437 | rdp->qlen_last_fqs_check = rdp->qlen; | 1438 | rdp->qlen_last_fqs_check = rdp->qlen; |
1438 | 1439 | ||
1439 | local_irq_restore(flags); | 1440 | local_irq_restore(flags); |
1440 | 1441 | ||
1441 | /* Re-invoke RCU core processing if there are callbacks remaining. */ | 1442 | /* Re-invoke RCU core processing if there are callbacks remaining. */ |
1442 | if (cpu_has_callbacks_ready_to_invoke(rdp)) | 1443 | if (cpu_has_callbacks_ready_to_invoke(rdp)) |
1443 | invoke_rcu_core(); | 1444 | invoke_rcu_core(); |
1444 | } | 1445 | } |
1445 | 1446 | ||
1446 | /* | 1447 | /* |
1447 | * Check to see if this CPU is in a non-context-switch quiescent state | 1448 | * Check to see if this CPU is in a non-context-switch quiescent state |
1448 | * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). | 1449 | * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). |
1449 | * Also schedule RCU core processing. | 1450 | * Also schedule RCU core processing. |
1450 | * | 1451 | * |
1451 | * This function must be called from hardirq context. It is normally | 1452 | * This function must be called from hardirq context. It is normally |
1452 | * invoked from the scheduling-clock interrupt. If rcu_pending returns | 1453 | * invoked from the scheduling-clock interrupt. If rcu_pending returns |
1453 | * false, there is no point in invoking rcu_check_callbacks(). | 1454 | * false, there is no point in invoking rcu_check_callbacks(). |
1454 | */ | 1455 | */ |
1455 | void rcu_check_callbacks(int cpu, int user) | 1456 | void rcu_check_callbacks(int cpu, int user) |
1456 | { | 1457 | { |
1457 | trace_rcu_utilization("Start scheduler-tick"); | 1458 | trace_rcu_utilization("Start scheduler-tick"); |
1458 | if (user || rcu_is_cpu_rrupt_from_idle()) { | 1459 | if (user || rcu_is_cpu_rrupt_from_idle()) { |
1459 | 1460 | ||
1460 | /* | 1461 | /* |
1461 | * Get here if this CPU took its interrupt from user | 1462 | * Get here if this CPU took its interrupt from user |
1462 | * mode or from the idle loop, and if this is not a | 1463 | * mode or from the idle loop, and if this is not a |
1463 | * nested interrupt. In this case, the CPU is in | 1464 | * nested interrupt. In this case, the CPU is in |
1464 | * a quiescent state, so note it. | 1465 | * a quiescent state, so note it. |
1465 | * | 1466 | * |
1466 | * No memory barrier is required here because both | 1467 | * No memory barrier is required here because both |
1467 | * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local | 1468 | * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local |
1468 | * variables that other CPUs neither access nor modify, | 1469 | * variables that other CPUs neither access nor modify, |
1469 | * at least not while the corresponding CPU is online. | 1470 | * at least not while the corresponding CPU is online. |
1470 | */ | 1471 | */ |
1471 | 1472 | ||
1472 | rcu_sched_qs(cpu); | 1473 | rcu_sched_qs(cpu); |
1473 | rcu_bh_qs(cpu); | 1474 | rcu_bh_qs(cpu); |
1474 | 1475 | ||
1475 | } else if (!in_softirq()) { | 1476 | } else if (!in_softirq()) { |
1476 | 1477 | ||
1477 | /* | 1478 | /* |
1478 | * Get here if this CPU did not take its interrupt from | 1479 | * Get here if this CPU did not take its interrupt from |
1479 | * softirq, in other words, if it is not interrupting | 1480 | * softirq, in other words, if it is not interrupting |
1480 | * a rcu_bh read-side critical section. This is an _bh | 1481 | * a rcu_bh read-side critical section. This is an _bh |
1481 | * critical section, so note it. | 1482 | * critical section, so note it. |
1482 | */ | 1483 | */ |
1483 | 1484 | ||
1484 | rcu_bh_qs(cpu); | 1485 | rcu_bh_qs(cpu); |
1485 | } | 1486 | } |
1486 | rcu_preempt_check_callbacks(cpu); | 1487 | rcu_preempt_check_callbacks(cpu); |
1487 | if (rcu_pending(cpu)) | 1488 | if (rcu_pending(cpu)) |
1488 | invoke_rcu_core(); | 1489 | invoke_rcu_core(); |
1489 | trace_rcu_utilization("End scheduler-tick"); | 1490 | trace_rcu_utilization("End scheduler-tick"); |
1490 | } | 1491 | } |
1491 | 1492 | ||
1492 | #ifdef CONFIG_SMP | 1493 | #ifdef CONFIG_SMP |
1493 | 1494 | ||
1494 | /* | 1495 | /* |
1495 | * Scan the leaf rcu_node structures, processing dyntick state for any that | 1496 | * Scan the leaf rcu_node structures, processing dyntick state for any that |
1496 | * have not yet encountered a quiescent state, using the function specified. | 1497 | * have not yet encountered a quiescent state, using the function specified. |
1497 | * Also initiate boosting for any threads blocked on the root rcu_node. | 1498 | * Also initiate boosting for any threads blocked on the root rcu_node. |
1498 | * | 1499 | * |
1499 | * The caller must have suppressed start of new grace periods. | 1500 | * The caller must have suppressed start of new grace periods. |
1500 | */ | 1501 | */ |
1501 | static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) | 1502 | static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) |
1502 | { | 1503 | { |
1503 | unsigned long bit; | 1504 | unsigned long bit; |
1504 | int cpu; | 1505 | int cpu; |
1505 | unsigned long flags; | 1506 | unsigned long flags; |
1506 | unsigned long mask; | 1507 | unsigned long mask; |
1507 | struct rcu_node *rnp; | 1508 | struct rcu_node *rnp; |
1508 | 1509 | ||
1509 | rcu_for_each_leaf_node(rsp, rnp) { | 1510 | rcu_for_each_leaf_node(rsp, rnp) { |
1510 | mask = 0; | 1511 | mask = 0; |
1511 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1512 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1512 | if (!rcu_gp_in_progress(rsp)) { | 1513 | if (!rcu_gp_in_progress(rsp)) { |
1513 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1514 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1514 | return; | 1515 | return; |
1515 | } | 1516 | } |
1516 | if (rnp->qsmask == 0) { | 1517 | if (rnp->qsmask == 0) { |
1517 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ | 1518 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ |
1518 | continue; | 1519 | continue; |
1519 | } | 1520 | } |
1520 | cpu = rnp->grplo; | 1521 | cpu = rnp->grplo; |
1521 | bit = 1; | 1522 | bit = 1; |
1522 | for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { | 1523 | for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { |
1523 | if ((rnp->qsmask & bit) != 0 && | 1524 | if ((rnp->qsmask & bit) != 0 && |
1524 | f(per_cpu_ptr(rsp->rda, cpu))) | 1525 | f(per_cpu_ptr(rsp->rda, cpu))) |
1525 | mask |= bit; | 1526 | mask |= bit; |
1526 | } | 1527 | } |
1527 | if (mask != 0) { | 1528 | if (mask != 0) { |
1528 | 1529 | ||
1529 | /* rcu_report_qs_rnp() releases rnp->lock. */ | 1530 | /* rcu_report_qs_rnp() releases rnp->lock. */ |
1530 | rcu_report_qs_rnp(mask, rsp, rnp, flags); | 1531 | rcu_report_qs_rnp(mask, rsp, rnp, flags); |
1531 | continue; | 1532 | continue; |
1532 | } | 1533 | } |
1533 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1534 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1534 | } | 1535 | } |
1535 | rnp = rcu_get_root(rsp); | 1536 | rnp = rcu_get_root(rsp); |
1536 | if (rnp->qsmask == 0) { | 1537 | if (rnp->qsmask == 0) { |
1537 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1538 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1538 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ | 1539 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ |
1539 | } | 1540 | } |
1540 | } | 1541 | } |
1541 | 1542 | ||
1542 | /* | 1543 | /* |
1543 | * Force quiescent states on reluctant CPUs, and also detect which | 1544 | * Force quiescent states on reluctant CPUs, and also detect which |
1544 | * CPUs are in dyntick-idle mode. | 1545 | * CPUs are in dyntick-idle mode. |
1545 | */ | 1546 | */ |
1546 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | 1547 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed) |
1547 | { | 1548 | { |
1548 | unsigned long flags; | 1549 | unsigned long flags; |
1549 | struct rcu_node *rnp = rcu_get_root(rsp); | 1550 | struct rcu_node *rnp = rcu_get_root(rsp); |
1550 | 1551 | ||
1551 | trace_rcu_utilization("Start fqs"); | 1552 | trace_rcu_utilization("Start fqs"); |
1552 | if (!rcu_gp_in_progress(rsp)) { | 1553 | if (!rcu_gp_in_progress(rsp)) { |
1553 | trace_rcu_utilization("End fqs"); | 1554 | trace_rcu_utilization("End fqs"); |
1554 | return; /* No grace period in progress, nothing to force. */ | 1555 | return; /* No grace period in progress, nothing to force. */ |
1555 | } | 1556 | } |
1556 | if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) { | 1557 | if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) { |
1557 | rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ | 1558 | rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ |
1558 | trace_rcu_utilization("End fqs"); | 1559 | trace_rcu_utilization("End fqs"); |
1559 | return; /* Someone else is already on the job. */ | 1560 | return; /* Someone else is already on the job. */ |
1560 | } | 1561 | } |
1561 | if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) | 1562 | if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) |
1562 | goto unlock_fqs_ret; /* no emergency and done recently. */ | 1563 | goto unlock_fqs_ret; /* no emergency and done recently. */ |
1563 | rsp->n_force_qs++; | 1564 | rsp->n_force_qs++; |
1564 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ | 1565 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ |
1565 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; | 1566 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; |
1566 | if(!rcu_gp_in_progress(rsp)) { | 1567 | if(!rcu_gp_in_progress(rsp)) { |
1567 | rsp->n_force_qs_ngp++; | 1568 | rsp->n_force_qs_ngp++; |
1568 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | 1569 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ |
1569 | goto unlock_fqs_ret; /* no GP in progress, time updated. */ | 1570 | goto unlock_fqs_ret; /* no GP in progress, time updated. */ |
1570 | } | 1571 | } |
1571 | rsp->fqs_active = 1; | 1572 | rsp->fqs_active = 1; |
1572 | switch (rsp->fqs_state) { | 1573 | switch (rsp->fqs_state) { |
1573 | case RCU_GP_IDLE: | 1574 | case RCU_GP_IDLE: |
1574 | case RCU_GP_INIT: | 1575 | case RCU_GP_INIT: |
1575 | 1576 | ||
1576 | break; /* grace period idle or initializing, ignore. */ | 1577 | break; /* grace period idle or initializing, ignore. */ |
1577 | 1578 | ||
1578 | case RCU_SAVE_DYNTICK: | 1579 | case RCU_SAVE_DYNTICK: |
1579 | if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) | 1580 | if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) |
1580 | break; /* So gcc recognizes the dead code. */ | 1581 | break; /* So gcc recognizes the dead code. */ |
1581 | 1582 | ||
1582 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | 1583 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ |
1583 | 1584 | ||
1584 | /* Record dyntick-idle state. */ | 1585 | /* Record dyntick-idle state. */ |
1585 | force_qs_rnp(rsp, dyntick_save_progress_counter); | 1586 | force_qs_rnp(rsp, dyntick_save_progress_counter); |
1586 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ | 1587 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ |
1587 | if (rcu_gp_in_progress(rsp)) | 1588 | if (rcu_gp_in_progress(rsp)) |
1588 | rsp->fqs_state = RCU_FORCE_QS; | 1589 | rsp->fqs_state = RCU_FORCE_QS; |
1589 | break; | 1590 | break; |
1590 | 1591 | ||
1591 | case RCU_FORCE_QS: | 1592 | case RCU_FORCE_QS: |
1592 | 1593 | ||
1593 | /* Check dyntick-idle state, send IPI to laggarts. */ | 1594 | /* Check dyntick-idle state, send IPI to laggarts. */ |
1594 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | 1595 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ |
1595 | force_qs_rnp(rsp, rcu_implicit_dynticks_qs); | 1596 | force_qs_rnp(rsp, rcu_implicit_dynticks_qs); |
1596 | 1597 | ||
1597 | /* Leave state in case more forcing is required. */ | 1598 | /* Leave state in case more forcing is required. */ |
1598 | 1599 | ||
1599 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ | 1600 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ |
1600 | break; | 1601 | break; |
1601 | } | 1602 | } |
1602 | rsp->fqs_active = 0; | 1603 | rsp->fqs_active = 0; |
1603 | if (rsp->fqs_need_gp) { | 1604 | if (rsp->fqs_need_gp) { |
1604 | raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */ | 1605 | raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */ |
1605 | rsp->fqs_need_gp = 0; | 1606 | rsp->fqs_need_gp = 0; |
1606 | rcu_start_gp(rsp, flags); /* releases rnp->lock */ | 1607 | rcu_start_gp(rsp, flags); /* releases rnp->lock */ |
1607 | trace_rcu_utilization("End fqs"); | 1608 | trace_rcu_utilization("End fqs"); |
1608 | return; | 1609 | return; |
1609 | } | 1610 | } |
1610 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | 1611 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ |
1611 | unlock_fqs_ret: | 1612 | unlock_fqs_ret: |
1612 | raw_spin_unlock_irqrestore(&rsp->fqslock, flags); | 1613 | raw_spin_unlock_irqrestore(&rsp->fqslock, flags); |
1613 | trace_rcu_utilization("End fqs"); | 1614 | trace_rcu_utilization("End fqs"); |
1614 | } | 1615 | } |
1615 | 1616 | ||
1616 | #else /* #ifdef CONFIG_SMP */ | 1617 | #else /* #ifdef CONFIG_SMP */ |
1617 | 1618 | ||
1618 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | 1619 | static void force_quiescent_state(struct rcu_state *rsp, int relaxed) |
1619 | { | 1620 | { |
1620 | set_need_resched(); | 1621 | set_need_resched(); |
1621 | } | 1622 | } |
1622 | 1623 | ||
1623 | #endif /* #else #ifdef CONFIG_SMP */ | 1624 | #endif /* #else #ifdef CONFIG_SMP */ |
1624 | 1625 | ||
1625 | /* | 1626 | /* |
1626 | * This does the RCU core processing work for the specified rcu_state | 1627 | * This does the RCU core processing work for the specified rcu_state |
1627 | * and rcu_data structures. This may be called only from the CPU to | 1628 | * and rcu_data structures. This may be called only from the CPU to |
1628 | * whom the rdp belongs. | 1629 | * whom the rdp belongs. |
1629 | */ | 1630 | */ |
1630 | static void | 1631 | static void |
1631 | __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | 1632 | __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) |
1632 | { | 1633 | { |
1633 | unsigned long flags; | 1634 | unsigned long flags; |
1634 | 1635 | ||
1635 | WARN_ON_ONCE(rdp->beenonline == 0); | 1636 | WARN_ON_ONCE(rdp->beenonline == 0); |
1636 | 1637 | ||
1637 | /* | 1638 | /* |
1638 | * If an RCU GP has gone long enough, go check for dyntick | 1639 | * If an RCU GP has gone long enough, go check for dyntick |
1639 | * idle CPUs and, if needed, send resched IPIs. | 1640 | * idle CPUs and, if needed, send resched IPIs. |
1640 | */ | 1641 | */ |
1641 | if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) | 1642 | if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) |
1642 | force_quiescent_state(rsp, 1); | 1643 | force_quiescent_state(rsp, 1); |
1643 | 1644 | ||
1644 | /* | 1645 | /* |
1645 | * Advance callbacks in response to end of earlier grace | 1646 | * Advance callbacks in response to end of earlier grace |
1646 | * period that some other CPU ended. | 1647 | * period that some other CPU ended. |
1647 | */ | 1648 | */ |
1648 | rcu_process_gp_end(rsp, rdp); | 1649 | rcu_process_gp_end(rsp, rdp); |
1649 | 1650 | ||
1650 | /* Update RCU state based on any recent quiescent states. */ | 1651 | /* Update RCU state based on any recent quiescent states. */ |
1651 | rcu_check_quiescent_state(rsp, rdp); | 1652 | rcu_check_quiescent_state(rsp, rdp); |
1652 | 1653 | ||
1653 | /* Does this CPU require a not-yet-started grace period? */ | 1654 | /* Does this CPU require a not-yet-started grace period? */ |
1654 | if (cpu_needs_another_gp(rsp, rdp)) { | 1655 | if (cpu_needs_another_gp(rsp, rdp)) { |
1655 | raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags); | 1656 | raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags); |
1656 | rcu_start_gp(rsp, flags); /* releases above lock */ | 1657 | rcu_start_gp(rsp, flags); /* releases above lock */ |
1657 | } | 1658 | } |
1658 | 1659 | ||
1659 | /* If there are callbacks ready, invoke them. */ | 1660 | /* If there are callbacks ready, invoke them. */ |
1660 | if (cpu_has_callbacks_ready_to_invoke(rdp)) | 1661 | if (cpu_has_callbacks_ready_to_invoke(rdp)) |
1661 | invoke_rcu_callbacks(rsp, rdp); | 1662 | invoke_rcu_callbacks(rsp, rdp); |
1662 | } | 1663 | } |
1663 | 1664 | ||
1664 | /* | 1665 | /* |
1665 | * Do RCU core processing for the current CPU. | 1666 | * Do RCU core processing for the current CPU. |
1666 | */ | 1667 | */ |
1667 | static void rcu_process_callbacks(struct softirq_action *unused) | 1668 | static void rcu_process_callbacks(struct softirq_action *unused) |
1668 | { | 1669 | { |
1669 | trace_rcu_utilization("Start RCU core"); | 1670 | trace_rcu_utilization("Start RCU core"); |
1670 | __rcu_process_callbacks(&rcu_sched_state, | 1671 | __rcu_process_callbacks(&rcu_sched_state, |
1671 | &__get_cpu_var(rcu_sched_data)); | 1672 | &__get_cpu_var(rcu_sched_data)); |
1672 | __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); | 1673 | __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); |
1673 | rcu_preempt_process_callbacks(); | 1674 | rcu_preempt_process_callbacks(); |
1674 | trace_rcu_utilization("End RCU core"); | 1675 | trace_rcu_utilization("End RCU core"); |
1675 | } | 1676 | } |
1676 | 1677 | ||
1677 | /* | 1678 | /* |
1678 | * Schedule RCU callback invocation. If the specified type of RCU | 1679 | * Schedule RCU callback invocation. If the specified type of RCU |
1679 | * does not support RCU priority boosting, just do a direct call, | 1680 | * does not support RCU priority boosting, just do a direct call, |
1680 | * otherwise wake up the per-CPU kernel kthread. Note that because we | 1681 | * otherwise wake up the per-CPU kernel kthread. Note that because we |
1681 | * are running on the current CPU with interrupts disabled, the | 1682 | * are running on the current CPU with interrupts disabled, the |
1682 | * rcu_cpu_kthread_task cannot disappear out from under us. | 1683 | * rcu_cpu_kthread_task cannot disappear out from under us. |
1683 | */ | 1684 | */ |
1684 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | 1685 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) |
1685 | { | 1686 | { |
1686 | if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active))) | 1687 | if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active))) |
1687 | return; | 1688 | return; |
1688 | if (likely(!rsp->boost)) { | 1689 | if (likely(!rsp->boost)) { |
1689 | rcu_do_batch(rsp, rdp); | 1690 | rcu_do_batch(rsp, rdp); |
1690 | return; | 1691 | return; |
1691 | } | 1692 | } |
1692 | invoke_rcu_callbacks_kthread(); | 1693 | invoke_rcu_callbacks_kthread(); |
1693 | } | 1694 | } |
1694 | 1695 | ||
1695 | static void invoke_rcu_core(void) | 1696 | static void invoke_rcu_core(void) |
1696 | { | 1697 | { |
1697 | raise_softirq(RCU_SOFTIRQ); | 1698 | raise_softirq(RCU_SOFTIRQ); |
1698 | } | 1699 | } |
1699 | 1700 | ||
1700 | static void | 1701 | static void |
1701 | __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | 1702 | __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), |
1702 | struct rcu_state *rsp) | 1703 | struct rcu_state *rsp) |
1703 | { | 1704 | { |
1704 | unsigned long flags; | 1705 | unsigned long flags; |
1705 | struct rcu_data *rdp; | 1706 | struct rcu_data *rdp; |
1706 | 1707 | ||
1707 | debug_rcu_head_queue(head); | 1708 | debug_rcu_head_queue(head); |
1708 | head->func = func; | 1709 | head->func = func; |
1709 | head->next = NULL; | 1710 | head->next = NULL; |
1710 | 1711 | ||
1711 | smp_mb(); /* Ensure RCU update seen before callback registry. */ | 1712 | smp_mb(); /* Ensure RCU update seen before callback registry. */ |
1712 | 1713 | ||
1713 | /* | 1714 | /* |
1714 | * Opportunistically note grace-period endings and beginnings. | 1715 | * Opportunistically note grace-period endings and beginnings. |
1715 | * Note that we might see a beginning right after we see an | 1716 | * Note that we might see a beginning right after we see an |
1716 | * end, but never vice versa, since this CPU has to pass through | 1717 | * end, but never vice versa, since this CPU has to pass through |
1717 | * a quiescent state betweentimes. | 1718 | * a quiescent state betweentimes. |
1718 | */ | 1719 | */ |
1719 | local_irq_save(flags); | 1720 | local_irq_save(flags); |
1720 | rdp = this_cpu_ptr(rsp->rda); | 1721 | rdp = this_cpu_ptr(rsp->rda); |
1721 | 1722 | ||
1722 | /* Add the callback to our list. */ | 1723 | /* Add the callback to our list. */ |
1723 | *rdp->nxttail[RCU_NEXT_TAIL] = head; | 1724 | *rdp->nxttail[RCU_NEXT_TAIL] = head; |
1724 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; | 1725 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; |
1725 | rdp->qlen++; | 1726 | rdp->qlen++; |
1726 | 1727 | ||
1727 | if (__is_kfree_rcu_offset((unsigned long)func)) | 1728 | if (__is_kfree_rcu_offset((unsigned long)func)) |
1728 | trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, | 1729 | trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, |
1729 | rdp->qlen); | 1730 | rdp->qlen); |
1730 | else | 1731 | else |
1731 | trace_rcu_callback(rsp->name, head, rdp->qlen); | 1732 | trace_rcu_callback(rsp->name, head, rdp->qlen); |
1732 | 1733 | ||
1733 | /* If interrupts were disabled, don't dive into RCU core. */ | 1734 | /* If interrupts were disabled, don't dive into RCU core. */ |
1734 | if (irqs_disabled_flags(flags)) { | 1735 | if (irqs_disabled_flags(flags)) { |
1735 | local_irq_restore(flags); | 1736 | local_irq_restore(flags); |
1736 | return; | 1737 | return; |
1737 | } | 1738 | } |
1738 | 1739 | ||
1739 | /* | 1740 | /* |
1740 | * Force the grace period if too many callbacks or too long waiting. | 1741 | * Force the grace period if too many callbacks or too long waiting. |
1741 | * Enforce hysteresis, and don't invoke force_quiescent_state() | 1742 | * Enforce hysteresis, and don't invoke force_quiescent_state() |
1742 | * if some other CPU has recently done so. Also, don't bother | 1743 | * if some other CPU has recently done so. Also, don't bother |
1743 | * invoking force_quiescent_state() if the newly enqueued callback | 1744 | * invoking force_quiescent_state() if the newly enqueued callback |
1744 | * is the only one waiting for a grace period to complete. | 1745 | * is the only one waiting for a grace period to complete. |
1745 | */ | 1746 | */ |
1746 | if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { | 1747 | if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { |
1747 | 1748 | ||
1748 | /* Are we ignoring a completed grace period? */ | 1749 | /* Are we ignoring a completed grace period? */ |
1749 | rcu_process_gp_end(rsp, rdp); | 1750 | rcu_process_gp_end(rsp, rdp); |
1750 | check_for_new_grace_period(rsp, rdp); | 1751 | check_for_new_grace_period(rsp, rdp); |
1751 | 1752 | ||
1752 | /* Start a new grace period if one not already started. */ | 1753 | /* Start a new grace period if one not already started. */ |
1753 | if (!rcu_gp_in_progress(rsp)) { | 1754 | if (!rcu_gp_in_progress(rsp)) { |
1754 | unsigned long nestflag; | 1755 | unsigned long nestflag; |
1755 | struct rcu_node *rnp_root = rcu_get_root(rsp); | 1756 | struct rcu_node *rnp_root = rcu_get_root(rsp); |
1756 | 1757 | ||
1757 | raw_spin_lock_irqsave(&rnp_root->lock, nestflag); | 1758 | raw_spin_lock_irqsave(&rnp_root->lock, nestflag); |
1758 | rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ | 1759 | rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ |
1759 | } else { | 1760 | } else { |
1760 | /* Give the grace period a kick. */ | 1761 | /* Give the grace period a kick. */ |
1761 | rdp->blimit = LONG_MAX; | 1762 | rdp->blimit = LONG_MAX; |
1762 | if (rsp->n_force_qs == rdp->n_force_qs_snap && | 1763 | if (rsp->n_force_qs == rdp->n_force_qs_snap && |
1763 | *rdp->nxttail[RCU_DONE_TAIL] != head) | 1764 | *rdp->nxttail[RCU_DONE_TAIL] != head) |
1764 | force_quiescent_state(rsp, 0); | 1765 | force_quiescent_state(rsp, 0); |
1765 | rdp->n_force_qs_snap = rsp->n_force_qs; | 1766 | rdp->n_force_qs_snap = rsp->n_force_qs; |
1766 | rdp->qlen_last_fqs_check = rdp->qlen; | 1767 | rdp->qlen_last_fqs_check = rdp->qlen; |
1767 | } | 1768 | } |
1768 | } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) | 1769 | } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) |
1769 | force_quiescent_state(rsp, 1); | 1770 | force_quiescent_state(rsp, 1); |
1770 | local_irq_restore(flags); | 1771 | local_irq_restore(flags); |
1771 | } | 1772 | } |
1772 | 1773 | ||
1773 | /* | 1774 | /* |
1774 | * Queue an RCU-sched callback for invocation after a grace period. | 1775 | * Queue an RCU-sched callback for invocation after a grace period. |
1775 | */ | 1776 | */ |
1776 | void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | 1777 | void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) |
1777 | { | 1778 | { |
1778 | __call_rcu(head, func, &rcu_sched_state); | 1779 | __call_rcu(head, func, &rcu_sched_state); |
1779 | } | 1780 | } |
1780 | EXPORT_SYMBOL_GPL(call_rcu_sched); | 1781 | EXPORT_SYMBOL_GPL(call_rcu_sched); |
1781 | 1782 | ||
1782 | /* | 1783 | /* |
1783 | * Queue an RCU for invocation after a quicker grace period. | 1784 | * Queue an RCU for invocation after a quicker grace period. |
1784 | */ | 1785 | */ |
1785 | void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | 1786 | void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) |
1786 | { | 1787 | { |
1787 | __call_rcu(head, func, &rcu_bh_state); | 1788 | __call_rcu(head, func, &rcu_bh_state); |
1788 | } | 1789 | } |
1789 | EXPORT_SYMBOL_GPL(call_rcu_bh); | 1790 | EXPORT_SYMBOL_GPL(call_rcu_bh); |
1790 | 1791 | ||
1791 | /** | 1792 | /** |
1792 | * synchronize_sched - wait until an rcu-sched grace period has elapsed. | 1793 | * synchronize_sched - wait until an rcu-sched grace period has elapsed. |
1793 | * | 1794 | * |
1794 | * Control will return to the caller some time after a full rcu-sched | 1795 | * Control will return to the caller some time after a full rcu-sched |
1795 | * grace period has elapsed, in other words after all currently executing | 1796 | * grace period has elapsed, in other words after all currently executing |
1796 | * rcu-sched read-side critical sections have completed. These read-side | 1797 | * rcu-sched read-side critical sections have completed. These read-side |
1797 | * critical sections are delimited by rcu_read_lock_sched() and | 1798 | * critical sections are delimited by rcu_read_lock_sched() and |
1798 | * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(), | 1799 | * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(), |
1799 | * local_irq_disable(), and so on may be used in place of | 1800 | * local_irq_disable(), and so on may be used in place of |
1800 | * rcu_read_lock_sched(). | 1801 | * rcu_read_lock_sched(). |
1801 | * | 1802 | * |
1802 | * This means that all preempt_disable code sequences, including NMI and | 1803 | * This means that all preempt_disable code sequences, including NMI and |
1803 | * hardware-interrupt handlers, in progress on entry will have completed | 1804 | * hardware-interrupt handlers, in progress on entry will have completed |
1804 | * before this primitive returns. However, this does not guarantee that | 1805 | * before this primitive returns. However, this does not guarantee that |
1805 | * softirq handlers will have completed, since in some kernels, these | 1806 | * softirq handlers will have completed, since in some kernels, these |
1806 | * handlers can run in process context, and can block. | 1807 | * handlers can run in process context, and can block. |
1807 | * | 1808 | * |
1808 | * This primitive provides the guarantees made by the (now removed) | 1809 | * This primitive provides the guarantees made by the (now removed) |
1809 | * synchronize_kernel() API. In contrast, synchronize_rcu() only | 1810 | * synchronize_kernel() API. In contrast, synchronize_rcu() only |
1810 | * guarantees that rcu_read_lock() sections will have completed. | 1811 | * guarantees that rcu_read_lock() sections will have completed. |
1811 | * In "classic RCU", these two guarantees happen to be one and | 1812 | * In "classic RCU", these two guarantees happen to be one and |
1812 | * the same, but can differ in realtime RCU implementations. | 1813 | * the same, but can differ in realtime RCU implementations. |
1813 | */ | 1814 | */ |
1814 | void synchronize_sched(void) | 1815 | void synchronize_sched(void) |
1815 | { | 1816 | { |
1816 | if (rcu_blocking_is_gp()) | 1817 | if (rcu_blocking_is_gp()) |
1817 | return; | 1818 | return; |
1818 | wait_rcu_gp(call_rcu_sched); | 1819 | wait_rcu_gp(call_rcu_sched); |
1819 | } | 1820 | } |
1820 | EXPORT_SYMBOL_GPL(synchronize_sched); | 1821 | EXPORT_SYMBOL_GPL(synchronize_sched); |
1821 | 1822 | ||
1822 | /** | 1823 | /** |
1823 | * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. | 1824 | * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. |
1824 | * | 1825 | * |
1825 | * Control will return to the caller some time after a full rcu_bh grace | 1826 | * Control will return to the caller some time after a full rcu_bh grace |
1826 | * period has elapsed, in other words after all currently executing rcu_bh | 1827 | * period has elapsed, in other words after all currently executing rcu_bh |
1827 | * read-side critical sections have completed. RCU read-side critical | 1828 | * read-side critical sections have completed. RCU read-side critical |
1828 | * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(), | 1829 | * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(), |
1829 | * and may be nested. | 1830 | * and may be nested. |
1830 | */ | 1831 | */ |
1831 | void synchronize_rcu_bh(void) | 1832 | void synchronize_rcu_bh(void) |
1832 | { | 1833 | { |
1833 | if (rcu_blocking_is_gp()) | 1834 | if (rcu_blocking_is_gp()) |
1834 | return; | 1835 | return; |
1835 | wait_rcu_gp(call_rcu_bh); | 1836 | wait_rcu_gp(call_rcu_bh); |
1836 | } | 1837 | } |
1837 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); | 1838 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); |
1838 | 1839 | ||
1839 | /* | 1840 | /* |
1840 | * Check to see if there is any immediate RCU-related work to be done | 1841 | * Check to see if there is any immediate RCU-related work to be done |
1841 | * by the current CPU, for the specified type of RCU, returning 1 if so. | 1842 | * by the current CPU, for the specified type of RCU, returning 1 if so. |
1842 | * The checks are in order of increasing expense: checks that can be | 1843 | * The checks are in order of increasing expense: checks that can be |
1843 | * carried out against CPU-local state are performed first. However, | 1844 | * carried out against CPU-local state are performed first. However, |
1844 | * we must check for CPU stalls first, else we might not get a chance. | 1845 | * we must check for CPU stalls first, else we might not get a chance. |
1845 | */ | 1846 | */ |
1846 | static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | 1847 | static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) |
1847 | { | 1848 | { |
1848 | struct rcu_node *rnp = rdp->mynode; | 1849 | struct rcu_node *rnp = rdp->mynode; |
1849 | 1850 | ||
1850 | rdp->n_rcu_pending++; | 1851 | rdp->n_rcu_pending++; |
1851 | 1852 | ||
1852 | /* Check for CPU stalls, if enabled. */ | 1853 | /* Check for CPU stalls, if enabled. */ |
1853 | check_cpu_stall(rsp, rdp); | 1854 | check_cpu_stall(rsp, rdp); |
1854 | 1855 | ||
1855 | /* Is the RCU core waiting for a quiescent state from this CPU? */ | 1856 | /* Is the RCU core waiting for a quiescent state from this CPU? */ |
1856 | if (rcu_scheduler_fully_active && | 1857 | if (rcu_scheduler_fully_active && |
1857 | rdp->qs_pending && !rdp->passed_quiesce) { | 1858 | rdp->qs_pending && !rdp->passed_quiesce) { |
1858 | 1859 | ||
1859 | /* | 1860 | /* |
1860 | * If force_quiescent_state() coming soon and this CPU | 1861 | * If force_quiescent_state() coming soon and this CPU |
1861 | * needs a quiescent state, and this is either RCU-sched | 1862 | * needs a quiescent state, and this is either RCU-sched |
1862 | * or RCU-bh, force a local reschedule. | 1863 | * or RCU-bh, force a local reschedule. |
1863 | */ | 1864 | */ |
1864 | rdp->n_rp_qs_pending++; | 1865 | rdp->n_rp_qs_pending++; |
1865 | if (!rdp->preemptible && | 1866 | if (!rdp->preemptible && |
1866 | ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, | 1867 | ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, |
1867 | jiffies)) | 1868 | jiffies)) |
1868 | set_need_resched(); | 1869 | set_need_resched(); |
1869 | } else if (rdp->qs_pending && rdp->passed_quiesce) { | 1870 | } else if (rdp->qs_pending && rdp->passed_quiesce) { |
1870 | rdp->n_rp_report_qs++; | 1871 | rdp->n_rp_report_qs++; |
1871 | return 1; | 1872 | return 1; |
1872 | } | 1873 | } |
1873 | 1874 | ||
1874 | /* Does this CPU have callbacks ready to invoke? */ | 1875 | /* Does this CPU have callbacks ready to invoke? */ |
1875 | if (cpu_has_callbacks_ready_to_invoke(rdp)) { | 1876 | if (cpu_has_callbacks_ready_to_invoke(rdp)) { |
1876 | rdp->n_rp_cb_ready++; | 1877 | rdp->n_rp_cb_ready++; |
1877 | return 1; | 1878 | return 1; |
1878 | } | 1879 | } |
1879 | 1880 | ||
1880 | /* Has RCU gone idle with this CPU needing another grace period? */ | 1881 | /* Has RCU gone idle with this CPU needing another grace period? */ |
1881 | if (cpu_needs_another_gp(rsp, rdp)) { | 1882 | if (cpu_needs_another_gp(rsp, rdp)) { |
1882 | rdp->n_rp_cpu_needs_gp++; | 1883 | rdp->n_rp_cpu_needs_gp++; |
1883 | return 1; | 1884 | return 1; |
1884 | } | 1885 | } |
1885 | 1886 | ||
1886 | /* Has another RCU grace period completed? */ | 1887 | /* Has another RCU grace period completed? */ |
1887 | if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */ | 1888 | if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */ |
1888 | rdp->n_rp_gp_completed++; | 1889 | rdp->n_rp_gp_completed++; |
1889 | return 1; | 1890 | return 1; |
1890 | } | 1891 | } |
1891 | 1892 | ||
1892 | /* Has a new RCU grace period started? */ | 1893 | /* Has a new RCU grace period started? */ |
1893 | if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */ | 1894 | if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */ |
1894 | rdp->n_rp_gp_started++; | 1895 | rdp->n_rp_gp_started++; |
1895 | return 1; | 1896 | return 1; |
1896 | } | 1897 | } |
1897 | 1898 | ||
1898 | /* Has an RCU GP gone long enough to send resched IPIs &c? */ | 1899 | /* Has an RCU GP gone long enough to send resched IPIs &c? */ |
1899 | if (rcu_gp_in_progress(rsp) && | 1900 | if (rcu_gp_in_progress(rsp) && |
1900 | ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) { | 1901 | ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) { |
1901 | rdp->n_rp_need_fqs++; | 1902 | rdp->n_rp_need_fqs++; |
1902 | return 1; | 1903 | return 1; |
1903 | } | 1904 | } |
1904 | 1905 | ||
1905 | /* nothing to do */ | 1906 | /* nothing to do */ |
1906 | rdp->n_rp_need_nothing++; | 1907 | rdp->n_rp_need_nothing++; |
1907 | return 0; | 1908 | return 0; |
1908 | } | 1909 | } |
1909 | 1910 | ||
1910 | /* | 1911 | /* |
1911 | * Check to see if there is any immediate RCU-related work to be done | 1912 | * Check to see if there is any immediate RCU-related work to be done |
1912 | * by the current CPU, returning 1 if so. This function is part of the | 1913 | * by the current CPU, returning 1 if so. This function is part of the |
1913 | * RCU implementation; it is -not- an exported member of the RCU API. | 1914 | * RCU implementation; it is -not- an exported member of the RCU API. |
1914 | */ | 1915 | */ |
1915 | static int rcu_pending(int cpu) | 1916 | static int rcu_pending(int cpu) |
1916 | { | 1917 | { |
1917 | return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || | 1918 | return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) || |
1918 | __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) || | 1919 | __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) || |
1919 | rcu_preempt_pending(cpu); | 1920 | rcu_preempt_pending(cpu); |
1920 | } | 1921 | } |
1921 | 1922 | ||
1922 | /* | 1923 | /* |
1923 | * Check to see if any future RCU-related work will need to be done | 1924 | * Check to see if any future RCU-related work will need to be done |
1924 | * by the current CPU, even if none need be done immediately, returning | 1925 | * by the current CPU, even if none need be done immediately, returning |
1925 | * 1 if so. | 1926 | * 1 if so. |
1926 | */ | 1927 | */ |
1927 | static int rcu_cpu_has_callbacks(int cpu) | 1928 | static int rcu_cpu_has_callbacks(int cpu) |
1928 | { | 1929 | { |
1929 | /* RCU callbacks either ready or pending? */ | 1930 | /* RCU callbacks either ready or pending? */ |
1930 | return per_cpu(rcu_sched_data, cpu).nxtlist || | 1931 | return per_cpu(rcu_sched_data, cpu).nxtlist || |
1931 | per_cpu(rcu_bh_data, cpu).nxtlist || | 1932 | per_cpu(rcu_bh_data, cpu).nxtlist || |
1932 | rcu_preempt_needs_cpu(cpu); | 1933 | rcu_preempt_needs_cpu(cpu); |
1933 | } | 1934 | } |
1934 | 1935 | ||
1935 | static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; | 1936 | static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; |
1936 | static atomic_t rcu_barrier_cpu_count; | 1937 | static atomic_t rcu_barrier_cpu_count; |
1937 | static DEFINE_MUTEX(rcu_barrier_mutex); | 1938 | static DEFINE_MUTEX(rcu_barrier_mutex); |
1938 | static struct completion rcu_barrier_completion; | 1939 | static struct completion rcu_barrier_completion; |
1939 | 1940 | ||
1940 | static void rcu_barrier_callback(struct rcu_head *notused) | 1941 | static void rcu_barrier_callback(struct rcu_head *notused) |
1941 | { | 1942 | { |
1942 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) | 1943 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) |
1943 | complete(&rcu_barrier_completion); | 1944 | complete(&rcu_barrier_completion); |
1944 | } | 1945 | } |
1945 | 1946 | ||
1946 | /* | 1947 | /* |
1947 | * Called with preemption disabled, and from cross-cpu IRQ context. | 1948 | * Called with preemption disabled, and from cross-cpu IRQ context. |
1948 | */ | 1949 | */ |
1949 | static void rcu_barrier_func(void *type) | 1950 | static void rcu_barrier_func(void *type) |
1950 | { | 1951 | { |
1951 | int cpu = smp_processor_id(); | 1952 | int cpu = smp_processor_id(); |
1952 | struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); | 1953 | struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); |
1953 | void (*call_rcu_func)(struct rcu_head *head, | 1954 | void (*call_rcu_func)(struct rcu_head *head, |
1954 | void (*func)(struct rcu_head *head)); | 1955 | void (*func)(struct rcu_head *head)); |
1955 | 1956 | ||
1956 | atomic_inc(&rcu_barrier_cpu_count); | 1957 | atomic_inc(&rcu_barrier_cpu_count); |
1957 | call_rcu_func = type; | 1958 | call_rcu_func = type; |
1958 | call_rcu_func(head, rcu_barrier_callback); | 1959 | call_rcu_func(head, rcu_barrier_callback); |
1959 | } | 1960 | } |
1960 | 1961 | ||
1961 | /* | 1962 | /* |
1962 | * Orchestrate the specified type of RCU barrier, waiting for all | 1963 | * Orchestrate the specified type of RCU barrier, waiting for all |
1963 | * RCU callbacks of the specified type to complete. | 1964 | * RCU callbacks of the specified type to complete. |
1964 | */ | 1965 | */ |
1965 | static void _rcu_barrier(struct rcu_state *rsp, | 1966 | static void _rcu_barrier(struct rcu_state *rsp, |
1966 | void (*call_rcu_func)(struct rcu_head *head, | 1967 | void (*call_rcu_func)(struct rcu_head *head, |
1967 | void (*func)(struct rcu_head *head))) | 1968 | void (*func)(struct rcu_head *head))) |
1968 | { | 1969 | { |
1969 | BUG_ON(in_interrupt()); | 1970 | BUG_ON(in_interrupt()); |
1970 | /* Take mutex to serialize concurrent rcu_barrier() requests. */ | 1971 | /* Take mutex to serialize concurrent rcu_barrier() requests. */ |
1971 | mutex_lock(&rcu_barrier_mutex); | 1972 | mutex_lock(&rcu_barrier_mutex); |
1972 | init_completion(&rcu_barrier_completion); | 1973 | init_completion(&rcu_barrier_completion); |
1973 | /* | 1974 | /* |
1974 | * Initialize rcu_barrier_cpu_count to 1, then invoke | 1975 | * Initialize rcu_barrier_cpu_count to 1, then invoke |
1975 | * rcu_barrier_func() on each CPU, so that each CPU also has | 1976 | * rcu_barrier_func() on each CPU, so that each CPU also has |
1976 | * incremented rcu_barrier_cpu_count. Only then is it safe to | 1977 | * incremented rcu_barrier_cpu_count. Only then is it safe to |
1977 | * decrement rcu_barrier_cpu_count -- otherwise the first CPU | 1978 | * decrement rcu_barrier_cpu_count -- otherwise the first CPU |
1978 | * might complete its grace period before all of the other CPUs | 1979 | * might complete its grace period before all of the other CPUs |
1979 | * did their increment, causing this function to return too | 1980 | * did their increment, causing this function to return too |
1980 | * early. Note that on_each_cpu() disables irqs, which prevents | 1981 | * early. Note that on_each_cpu() disables irqs, which prevents |
1981 | * any CPUs from coming online or going offline until each online | 1982 | * any CPUs from coming online or going offline until each online |
1982 | * CPU has queued its RCU-barrier callback. | 1983 | * CPU has queued its RCU-barrier callback. |
1983 | */ | 1984 | */ |
1984 | atomic_set(&rcu_barrier_cpu_count, 1); | 1985 | atomic_set(&rcu_barrier_cpu_count, 1); |
1985 | on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); | 1986 | on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); |
1986 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) | 1987 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) |
1987 | complete(&rcu_barrier_completion); | 1988 | complete(&rcu_barrier_completion); |
1988 | wait_for_completion(&rcu_barrier_completion); | 1989 | wait_for_completion(&rcu_barrier_completion); |
1989 | mutex_unlock(&rcu_barrier_mutex); | 1990 | mutex_unlock(&rcu_barrier_mutex); |
1990 | } | 1991 | } |
1991 | 1992 | ||
1992 | /** | 1993 | /** |
1993 | * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. | 1994 | * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. |
1994 | */ | 1995 | */ |
1995 | void rcu_barrier_bh(void) | 1996 | void rcu_barrier_bh(void) |
1996 | { | 1997 | { |
1997 | _rcu_barrier(&rcu_bh_state, call_rcu_bh); | 1998 | _rcu_barrier(&rcu_bh_state, call_rcu_bh); |
1998 | } | 1999 | } |
1999 | EXPORT_SYMBOL_GPL(rcu_barrier_bh); | 2000 | EXPORT_SYMBOL_GPL(rcu_barrier_bh); |
2000 | 2001 | ||
2001 | /** | 2002 | /** |
2002 | * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. | 2003 | * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. |
2003 | */ | 2004 | */ |
2004 | void rcu_barrier_sched(void) | 2005 | void rcu_barrier_sched(void) |
2005 | { | 2006 | { |
2006 | _rcu_barrier(&rcu_sched_state, call_rcu_sched); | 2007 | _rcu_barrier(&rcu_sched_state, call_rcu_sched); |
2007 | } | 2008 | } |
2008 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); | 2009 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); |
2009 | 2010 | ||
2010 | /* | 2011 | /* |
2011 | * Do boot-time initialization of a CPU's per-CPU RCU data. | 2012 | * Do boot-time initialization of a CPU's per-CPU RCU data. |
2012 | */ | 2013 | */ |
2013 | static void __init | 2014 | static void __init |
2014 | rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | 2015 | rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) |
2015 | { | 2016 | { |
2016 | unsigned long flags; | 2017 | unsigned long flags; |
2017 | int i; | 2018 | int i; |
2018 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 2019 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
2019 | struct rcu_node *rnp = rcu_get_root(rsp); | 2020 | struct rcu_node *rnp = rcu_get_root(rsp); |
2020 | 2021 | ||
2021 | /* Set up local state, ensuring consistent view of global state. */ | 2022 | /* Set up local state, ensuring consistent view of global state. */ |
2022 | raw_spin_lock_irqsave(&rnp->lock, flags); | 2023 | raw_spin_lock_irqsave(&rnp->lock, flags); |
2023 | rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); | 2024 | rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); |
2024 | rdp->nxtlist = NULL; | 2025 | rdp->nxtlist = NULL; |
2025 | for (i = 0; i < RCU_NEXT_SIZE; i++) | 2026 | for (i = 0; i < RCU_NEXT_SIZE; i++) |
2026 | rdp->nxttail[i] = &rdp->nxtlist; | 2027 | rdp->nxttail[i] = &rdp->nxtlist; |
2027 | rdp->qlen = 0; | 2028 | rdp->qlen = 0; |
2028 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); | 2029 | rdp->dynticks = &per_cpu(rcu_dynticks, cpu); |
2029 | WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING); | 2030 | WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING); |
2030 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); | 2031 | WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); |
2031 | rdp->cpu = cpu; | 2032 | rdp->cpu = cpu; |
2032 | rdp->rsp = rsp; | 2033 | rdp->rsp = rsp; |
2033 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 2034 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
2034 | } | 2035 | } |
2035 | 2036 | ||
2036 | /* | 2037 | /* |
2037 | * Initialize a CPU's per-CPU RCU data. Note that only one online or | 2038 | * Initialize a CPU's per-CPU RCU data. Note that only one online or |
2038 | * offline event can be happening at a given time. Note also that we | 2039 | * offline event can be happening at a given time. Note also that we |
2039 | * can accept some slop in the rsp->completed access due to the fact | 2040 | * can accept some slop in the rsp->completed access due to the fact |
2040 | * that this CPU cannot possibly have any RCU callbacks in flight yet. | 2041 | * that this CPU cannot possibly have any RCU callbacks in flight yet. |
2041 | */ | 2042 | */ |
2042 | static void __cpuinit | 2043 | static void __cpuinit |
2043 | rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | 2044 | rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) |
2044 | { | 2045 | { |
2045 | unsigned long flags; | 2046 | unsigned long flags; |
2046 | unsigned long mask; | 2047 | unsigned long mask; |
2047 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 2048 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
2048 | struct rcu_node *rnp = rcu_get_root(rsp); | 2049 | struct rcu_node *rnp = rcu_get_root(rsp); |
2049 | 2050 | ||
2050 | /* Set up local state, ensuring consistent view of global state. */ | 2051 | /* Set up local state, ensuring consistent view of global state. */ |
2051 | raw_spin_lock_irqsave(&rnp->lock, flags); | 2052 | raw_spin_lock_irqsave(&rnp->lock, flags); |
2052 | rdp->beenonline = 1; /* We have now been online. */ | 2053 | rdp->beenonline = 1; /* We have now been online. */ |
2053 | rdp->preemptible = preemptible; | 2054 | rdp->preemptible = preemptible; |
2054 | rdp->qlen_last_fqs_check = 0; | 2055 | rdp->qlen_last_fqs_check = 0; |
2055 | rdp->n_force_qs_snap = rsp->n_force_qs; | 2056 | rdp->n_force_qs_snap = rsp->n_force_qs; |
2056 | rdp->blimit = blimit; | 2057 | rdp->blimit = blimit; |
2057 | rdp->dynticks->dynticks_nesting = DYNTICK_TASK_NESTING; | 2058 | rdp->dynticks->dynticks_nesting = DYNTICK_TASK_NESTING; |
2058 | atomic_set(&rdp->dynticks->dynticks, | 2059 | atomic_set(&rdp->dynticks->dynticks, |
2059 | (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); | 2060 | (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); |
2061 | rcu_prepare_for_idle_init(cpu); | ||
2060 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 2062 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
2061 | 2063 | ||
2062 | /* | 2064 | /* |
2063 | * A new grace period might start here. If so, we won't be part | 2065 | * A new grace period might start here. If so, we won't be part |
2064 | * of it, but that is OK, as we are currently in a quiescent state. | 2066 | * of it, but that is OK, as we are currently in a quiescent state. |
2065 | */ | 2067 | */ |
2066 | 2068 | ||
2067 | /* Exclude any attempts to start a new GP on large systems. */ | 2069 | /* Exclude any attempts to start a new GP on large systems. */ |
2068 | raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ | 2070 | raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ |
2069 | 2071 | ||
2070 | /* Add CPU to rcu_node bitmasks. */ | 2072 | /* Add CPU to rcu_node bitmasks. */ |
2071 | rnp = rdp->mynode; | 2073 | rnp = rdp->mynode; |
2072 | mask = rdp->grpmask; | 2074 | mask = rdp->grpmask; |
2073 | do { | 2075 | do { |
2074 | /* Exclude any attempts to start a new GP on small systems. */ | 2076 | /* Exclude any attempts to start a new GP on small systems. */ |
2075 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 2077 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ |
2076 | rnp->qsmaskinit |= mask; | 2078 | rnp->qsmaskinit |= mask; |
2077 | mask = rnp->grpmask; | 2079 | mask = rnp->grpmask; |
2078 | if (rnp == rdp->mynode) { | 2080 | if (rnp == rdp->mynode) { |
2079 | /* | 2081 | /* |
2080 | * If there is a grace period in progress, we will | 2082 | * If there is a grace period in progress, we will |
2081 | * set up to wait for it next time we run the | 2083 | * set up to wait for it next time we run the |
2082 | * RCU core code. | 2084 | * RCU core code. |
2083 | */ | 2085 | */ |
2084 | rdp->gpnum = rnp->completed; | 2086 | rdp->gpnum = rnp->completed; |
2085 | rdp->completed = rnp->completed; | 2087 | rdp->completed = rnp->completed; |
2086 | rdp->passed_quiesce = 0; | 2088 | rdp->passed_quiesce = 0; |
2087 | rdp->qs_pending = 0; | 2089 | rdp->qs_pending = 0; |
2088 | rdp->passed_quiesce_gpnum = rnp->gpnum - 1; | 2090 | rdp->passed_quiesce_gpnum = rnp->gpnum - 1; |
2089 | trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl"); | 2091 | trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl"); |
2090 | } | 2092 | } |
2091 | raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ | 2093 | raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ |
2092 | rnp = rnp->parent; | 2094 | rnp = rnp->parent; |
2093 | } while (rnp != NULL && !(rnp->qsmaskinit & mask)); | 2095 | } while (rnp != NULL && !(rnp->qsmaskinit & mask)); |
2094 | 2096 | ||
2095 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 2097 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); |
2096 | } | 2098 | } |
2097 | 2099 | ||
2098 | static void __cpuinit rcu_prepare_cpu(int cpu) | 2100 | static void __cpuinit rcu_prepare_cpu(int cpu) |
2099 | { | 2101 | { |
2100 | rcu_init_percpu_data(cpu, &rcu_sched_state, 0); | 2102 | rcu_init_percpu_data(cpu, &rcu_sched_state, 0); |
2101 | rcu_init_percpu_data(cpu, &rcu_bh_state, 0); | 2103 | rcu_init_percpu_data(cpu, &rcu_bh_state, 0); |
2102 | rcu_preempt_init_percpu_data(cpu); | 2104 | rcu_preempt_init_percpu_data(cpu); |
2103 | } | 2105 | } |
2104 | 2106 | ||
2105 | /* | 2107 | /* |
2106 | * Handle CPU online/offline notification events. | 2108 | * Handle CPU online/offline notification events. |
2107 | */ | 2109 | */ |
2108 | static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | 2110 | static int __cpuinit rcu_cpu_notify(struct notifier_block *self, |
2109 | unsigned long action, void *hcpu) | 2111 | unsigned long action, void *hcpu) |
2110 | { | 2112 | { |
2111 | long cpu = (long)hcpu; | 2113 | long cpu = (long)hcpu; |
2112 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); | 2114 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); |
2113 | struct rcu_node *rnp = rdp->mynode; | 2115 | struct rcu_node *rnp = rdp->mynode; |
2114 | 2116 | ||
2115 | trace_rcu_utilization("Start CPU hotplug"); | 2117 | trace_rcu_utilization("Start CPU hotplug"); |
2116 | switch (action) { | 2118 | switch (action) { |
2117 | case CPU_UP_PREPARE: | 2119 | case CPU_UP_PREPARE: |
2118 | case CPU_UP_PREPARE_FROZEN: | 2120 | case CPU_UP_PREPARE_FROZEN: |
2119 | rcu_prepare_cpu(cpu); | 2121 | rcu_prepare_cpu(cpu); |
2120 | rcu_prepare_kthreads(cpu); | 2122 | rcu_prepare_kthreads(cpu); |
2121 | break; | 2123 | break; |
2122 | case CPU_ONLINE: | 2124 | case CPU_ONLINE: |
2123 | case CPU_DOWN_FAILED: | 2125 | case CPU_DOWN_FAILED: |
2124 | rcu_node_kthread_setaffinity(rnp, -1); | 2126 | rcu_node_kthread_setaffinity(rnp, -1); |
2125 | rcu_cpu_kthread_setrt(cpu, 1); | 2127 | rcu_cpu_kthread_setrt(cpu, 1); |
2126 | break; | 2128 | break; |
2127 | case CPU_DOWN_PREPARE: | 2129 | case CPU_DOWN_PREPARE: |
2128 | rcu_node_kthread_setaffinity(rnp, cpu); | 2130 | rcu_node_kthread_setaffinity(rnp, cpu); |
2129 | rcu_cpu_kthread_setrt(cpu, 0); | 2131 | rcu_cpu_kthread_setrt(cpu, 0); |
2130 | break; | 2132 | break; |
2131 | case CPU_DYING: | 2133 | case CPU_DYING: |
2132 | case CPU_DYING_FROZEN: | 2134 | case CPU_DYING_FROZEN: |
2133 | /* | 2135 | /* |
2134 | * The whole machine is "stopped" except this CPU, so we can | 2136 | * The whole machine is "stopped" except this CPU, so we can |
2135 | * touch any data without introducing corruption. We send the | 2137 | * touch any data without introducing corruption. We send the |
2136 | * dying CPU's callbacks to an arbitrarily chosen online CPU. | 2138 | * dying CPU's callbacks to an arbitrarily chosen online CPU. |
2137 | */ | 2139 | */ |
2138 | rcu_send_cbs_to_online(&rcu_bh_state); | 2140 | rcu_send_cbs_to_online(&rcu_bh_state); |
2139 | rcu_send_cbs_to_online(&rcu_sched_state); | 2141 | rcu_send_cbs_to_online(&rcu_sched_state); |
2140 | rcu_preempt_send_cbs_to_online(); | 2142 | rcu_preempt_send_cbs_to_online(); |
2143 | rcu_cleanup_after_idle(cpu); | ||
2141 | break; | 2144 | break; |
2142 | case CPU_DEAD: | 2145 | case CPU_DEAD: |
2143 | case CPU_DEAD_FROZEN: | 2146 | case CPU_DEAD_FROZEN: |
2144 | case CPU_UP_CANCELED: | 2147 | case CPU_UP_CANCELED: |
2145 | case CPU_UP_CANCELED_FROZEN: | 2148 | case CPU_UP_CANCELED_FROZEN: |
2146 | rcu_offline_cpu(cpu); | 2149 | rcu_offline_cpu(cpu); |
2147 | break; | 2150 | break; |
2148 | default: | 2151 | default: |
2149 | break; | 2152 | break; |
2150 | } | 2153 | } |
2151 | trace_rcu_utilization("End CPU hotplug"); | 2154 | trace_rcu_utilization("End CPU hotplug"); |
2152 | return NOTIFY_OK; | 2155 | return NOTIFY_OK; |
2153 | } | 2156 | } |
2154 | 2157 | ||
2155 | /* | 2158 | /* |
2156 | * This function is invoked towards the end of the scheduler's initialization | 2159 | * This function is invoked towards the end of the scheduler's initialization |
2157 | * process. Before this is called, the idle task might contain | 2160 | * process. Before this is called, the idle task might contain |
2158 | * RCU read-side critical sections (during which time, this idle | 2161 | * RCU read-side critical sections (during which time, this idle |
2159 | * task is booting the system). After this function is called, the | 2162 | * task is booting the system). After this function is called, the |
2160 | * idle tasks are prohibited from containing RCU read-side critical | 2163 | * idle tasks are prohibited from containing RCU read-side critical |
2161 | * sections. This function also enables RCU lockdep checking. | 2164 | * sections. This function also enables RCU lockdep checking. |
2162 | */ | 2165 | */ |
2163 | void rcu_scheduler_starting(void) | 2166 | void rcu_scheduler_starting(void) |
2164 | { | 2167 | { |
2165 | WARN_ON(num_online_cpus() != 1); | 2168 | WARN_ON(num_online_cpus() != 1); |
2166 | WARN_ON(nr_context_switches() > 0); | 2169 | WARN_ON(nr_context_switches() > 0); |
2167 | rcu_scheduler_active = 1; | 2170 | rcu_scheduler_active = 1; |
2168 | } | 2171 | } |
2169 | 2172 | ||
2170 | /* | 2173 | /* |
2171 | * Compute the per-level fanout, either using the exact fanout specified | 2174 | * Compute the per-level fanout, either using the exact fanout specified |
2172 | * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. | 2175 | * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. |
2173 | */ | 2176 | */ |
2174 | #ifdef CONFIG_RCU_FANOUT_EXACT | 2177 | #ifdef CONFIG_RCU_FANOUT_EXACT |
2175 | static void __init rcu_init_levelspread(struct rcu_state *rsp) | 2178 | static void __init rcu_init_levelspread(struct rcu_state *rsp) |
2176 | { | 2179 | { |
2177 | int i; | 2180 | int i; |
2178 | 2181 | ||
2179 | for (i = NUM_RCU_LVLS - 1; i > 0; i--) | 2182 | for (i = NUM_RCU_LVLS - 1; i > 0; i--) |
2180 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; | 2183 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; |
2181 | rsp->levelspread[0] = RCU_FANOUT_LEAF; | 2184 | rsp->levelspread[0] = RCU_FANOUT_LEAF; |
2182 | } | 2185 | } |
2183 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ | 2186 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ |
2184 | static void __init rcu_init_levelspread(struct rcu_state *rsp) | 2187 | static void __init rcu_init_levelspread(struct rcu_state *rsp) |
2185 | { | 2188 | { |
2186 | int ccur; | 2189 | int ccur; |
2187 | int cprv; | 2190 | int cprv; |
2188 | int i; | 2191 | int i; |
2189 | 2192 | ||
2190 | cprv = NR_CPUS; | 2193 | cprv = NR_CPUS; |
2191 | for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { | 2194 | for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { |
2192 | ccur = rsp->levelcnt[i]; | 2195 | ccur = rsp->levelcnt[i]; |
2193 | rsp->levelspread[i] = (cprv + ccur - 1) / ccur; | 2196 | rsp->levelspread[i] = (cprv + ccur - 1) / ccur; |
2194 | cprv = ccur; | 2197 | cprv = ccur; |
2195 | } | 2198 | } |
2196 | } | 2199 | } |
2197 | #endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */ | 2200 | #endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */ |
2198 | 2201 | ||
2199 | /* | 2202 | /* |
2200 | * Helper function for rcu_init() that initializes one rcu_state structure. | 2203 | * Helper function for rcu_init() that initializes one rcu_state structure. |
2201 | */ | 2204 | */ |
2202 | static void __init rcu_init_one(struct rcu_state *rsp, | 2205 | static void __init rcu_init_one(struct rcu_state *rsp, |
2203 | struct rcu_data __percpu *rda) | 2206 | struct rcu_data __percpu *rda) |
2204 | { | 2207 | { |
2205 | static char *buf[] = { "rcu_node_level_0", | 2208 | static char *buf[] = { "rcu_node_level_0", |
2206 | "rcu_node_level_1", | 2209 | "rcu_node_level_1", |
2207 | "rcu_node_level_2", | 2210 | "rcu_node_level_2", |
2208 | "rcu_node_level_3" }; /* Match MAX_RCU_LVLS */ | 2211 | "rcu_node_level_3" }; /* Match MAX_RCU_LVLS */ |
2209 | int cpustride = 1; | 2212 | int cpustride = 1; |
2210 | int i; | 2213 | int i; |
2211 | int j; | 2214 | int j; |
2212 | struct rcu_node *rnp; | 2215 | struct rcu_node *rnp; |
2213 | 2216 | ||
2214 | BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */ | 2217 | BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */ |
2215 | 2218 | ||
2216 | /* Initialize the level-tracking arrays. */ | 2219 | /* Initialize the level-tracking arrays. */ |
2217 | 2220 | ||
2218 | for (i = 1; i < NUM_RCU_LVLS; i++) | 2221 | for (i = 1; i < NUM_RCU_LVLS; i++) |
2219 | rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; | 2222 | rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; |
2220 | rcu_init_levelspread(rsp); | 2223 | rcu_init_levelspread(rsp); |
2221 | 2224 | ||
2222 | /* Initialize the elements themselves, starting from the leaves. */ | 2225 | /* Initialize the elements themselves, starting from the leaves. */ |
2223 | 2226 | ||
2224 | for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { | 2227 | for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { |
2225 | cpustride *= rsp->levelspread[i]; | 2228 | cpustride *= rsp->levelspread[i]; |
2226 | rnp = rsp->level[i]; | 2229 | rnp = rsp->level[i]; |
2227 | for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { | 2230 | for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { |
2228 | raw_spin_lock_init(&rnp->lock); | 2231 | raw_spin_lock_init(&rnp->lock); |
2229 | lockdep_set_class_and_name(&rnp->lock, | 2232 | lockdep_set_class_and_name(&rnp->lock, |
2230 | &rcu_node_class[i], buf[i]); | 2233 | &rcu_node_class[i], buf[i]); |
2231 | rnp->gpnum = 0; | 2234 | rnp->gpnum = 0; |
2232 | rnp->qsmask = 0; | 2235 | rnp->qsmask = 0; |
2233 | rnp->qsmaskinit = 0; | 2236 | rnp->qsmaskinit = 0; |
2234 | rnp->grplo = j * cpustride; | 2237 | rnp->grplo = j * cpustride; |
2235 | rnp->grphi = (j + 1) * cpustride - 1; | 2238 | rnp->grphi = (j + 1) * cpustride - 1; |
2236 | if (rnp->grphi >= NR_CPUS) | 2239 | if (rnp->grphi >= NR_CPUS) |
2237 | rnp->grphi = NR_CPUS - 1; | 2240 | rnp->grphi = NR_CPUS - 1; |
2238 | if (i == 0) { | 2241 | if (i == 0) { |
2239 | rnp->grpnum = 0; | 2242 | rnp->grpnum = 0; |
2240 | rnp->grpmask = 0; | 2243 | rnp->grpmask = 0; |
2241 | rnp->parent = NULL; | 2244 | rnp->parent = NULL; |
2242 | } else { | 2245 | } else { |
2243 | rnp->grpnum = j % rsp->levelspread[i - 1]; | 2246 | rnp->grpnum = j % rsp->levelspread[i - 1]; |
2244 | rnp->grpmask = 1UL << rnp->grpnum; | 2247 | rnp->grpmask = 1UL << rnp->grpnum; |
2245 | rnp->parent = rsp->level[i - 1] + | 2248 | rnp->parent = rsp->level[i - 1] + |
2246 | j / rsp->levelspread[i - 1]; | 2249 | j / rsp->levelspread[i - 1]; |
2247 | } | 2250 | } |
2248 | rnp->level = i; | 2251 | rnp->level = i; |
2249 | INIT_LIST_HEAD(&rnp->blkd_tasks); | 2252 | INIT_LIST_HEAD(&rnp->blkd_tasks); |
2250 | } | 2253 | } |
2251 | } | 2254 | } |
2252 | 2255 | ||
2253 | rsp->rda = rda; | 2256 | rsp->rda = rda; |
2254 | rnp = rsp->level[NUM_RCU_LVLS - 1]; | 2257 | rnp = rsp->level[NUM_RCU_LVLS - 1]; |
2255 | for_each_possible_cpu(i) { | 2258 | for_each_possible_cpu(i) { |
2256 | while (i > rnp->grphi) | 2259 | while (i > rnp->grphi) |
2257 | rnp++; | 2260 | rnp++; |
2258 | per_cpu_ptr(rsp->rda, i)->mynode = rnp; | 2261 | per_cpu_ptr(rsp->rda, i)->mynode = rnp; |
2259 | rcu_boot_init_percpu_data(i, rsp); | 2262 | rcu_boot_init_percpu_data(i, rsp); |
2260 | } | 2263 | } |
2261 | } | 2264 | } |
2262 | 2265 | ||
2263 | void __init rcu_init(void) | 2266 | void __init rcu_init(void) |
2264 | { | 2267 | { |
2265 | int cpu; | 2268 | int cpu; |
2266 | 2269 | ||
2267 | rcu_bootup_announce(); | 2270 | rcu_bootup_announce(); |
2268 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); | 2271 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); |
2269 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); | 2272 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); |
2270 | __rcu_init_preempt(); | 2273 | __rcu_init_preempt(); |
2271 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | 2274 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
2272 | 2275 | ||
2273 | /* | 2276 | /* |
2274 | * We don't need protection against CPU-hotplug here because | 2277 | * We don't need protection against CPU-hotplug here because |
2275 | * this is called early in boot, before either interrupts | 2278 | * this is called early in boot, before either interrupts |
2276 | * or the scheduler are operational. | 2279 | * or the scheduler are operational. |
2277 | */ | 2280 | */ |
2278 | cpu_notifier(rcu_cpu_notify, 0); | 2281 | cpu_notifier(rcu_cpu_notify, 0); |
2279 | for_each_online_cpu(cpu) | 2282 | for_each_online_cpu(cpu) |
2280 | rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); | 2283 | rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); |
2281 | check_cpu_stall_init(); | 2284 | check_cpu_stall_init(); |
2282 | } | 2285 | } |
2283 | 2286 | ||
2284 | #include "rcutree_plugin.h" | 2287 | #include "rcutree_plugin.h" |
2285 | 2288 |
kernel/rcutree.h
1 | /* | 1 | /* |
2 | * Read-Copy Update mechanism for mutual exclusion (tree-based version) | 2 | * Read-Copy Update mechanism for mutual exclusion (tree-based version) |
3 | * Internal non-public definitions. | 3 | * Internal non-public definitions. |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or | 7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. | 8 | * (at your option) any later version. |
9 | * | 9 | * |
10 | * This program is distributed in the hope that it will be useful, | 10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. | 13 | * GNU General Public License for more details. |
14 | * | 14 | * |
15 | * You should have received a copy of the GNU General Public License | 15 | * You should have received a copy of the GNU General Public License |
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
18 | * | 18 | * |
19 | * Copyright IBM Corporation, 2008 | 19 | * Copyright IBM Corporation, 2008 |
20 | * | 20 | * |
21 | * Author: Ingo Molnar <mingo@elte.hu> | 21 | * Author: Ingo Molnar <mingo@elte.hu> |
22 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 22 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/cache.h> | 25 | #include <linux/cache.h> |
26 | #include <linux/spinlock.h> | 26 | #include <linux/spinlock.h> |
27 | #include <linux/threads.h> | 27 | #include <linux/threads.h> |
28 | #include <linux/cpumask.h> | 28 | #include <linux/cpumask.h> |
29 | #include <linux/seqlock.h> | 29 | #include <linux/seqlock.h> |
30 | 30 | ||
31 | /* | 31 | /* |
32 | * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. | 32 | * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. |
33 | * In theory, it should be possible to add more levels straightforwardly. | 33 | * In theory, it should be possible to add more levels straightforwardly. |
34 | * In practice, this did work well going from three levels to four. | 34 | * In practice, this did work well going from three levels to four. |
35 | * Of course, your mileage may vary. | 35 | * Of course, your mileage may vary. |
36 | */ | 36 | */ |
37 | #define MAX_RCU_LVLS 4 | 37 | #define MAX_RCU_LVLS 4 |
38 | #if CONFIG_RCU_FANOUT > 16 | 38 | #if CONFIG_RCU_FANOUT > 16 |
39 | #define RCU_FANOUT_LEAF 16 | 39 | #define RCU_FANOUT_LEAF 16 |
40 | #else /* #if CONFIG_RCU_FANOUT > 16 */ | 40 | #else /* #if CONFIG_RCU_FANOUT > 16 */ |
41 | #define RCU_FANOUT_LEAF (CONFIG_RCU_FANOUT) | 41 | #define RCU_FANOUT_LEAF (CONFIG_RCU_FANOUT) |
42 | #endif /* #else #if CONFIG_RCU_FANOUT > 16 */ | 42 | #endif /* #else #if CONFIG_RCU_FANOUT > 16 */ |
43 | #define RCU_FANOUT_1 (RCU_FANOUT_LEAF) | 43 | #define RCU_FANOUT_1 (RCU_FANOUT_LEAF) |
44 | #define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT) | 44 | #define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT) |
45 | #define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT) | 45 | #define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT) |
46 | #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) | 46 | #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) |
47 | 47 | ||
48 | #if NR_CPUS <= RCU_FANOUT_1 | 48 | #if NR_CPUS <= RCU_FANOUT_1 |
49 | # define NUM_RCU_LVLS 1 | 49 | # define NUM_RCU_LVLS 1 |
50 | # define NUM_RCU_LVL_0 1 | 50 | # define NUM_RCU_LVL_0 1 |
51 | # define NUM_RCU_LVL_1 (NR_CPUS) | 51 | # define NUM_RCU_LVL_1 (NR_CPUS) |
52 | # define NUM_RCU_LVL_2 0 | 52 | # define NUM_RCU_LVL_2 0 |
53 | # define NUM_RCU_LVL_3 0 | 53 | # define NUM_RCU_LVL_3 0 |
54 | # define NUM_RCU_LVL_4 0 | 54 | # define NUM_RCU_LVL_4 0 |
55 | #elif NR_CPUS <= RCU_FANOUT_2 | 55 | #elif NR_CPUS <= RCU_FANOUT_2 |
56 | # define NUM_RCU_LVLS 2 | 56 | # define NUM_RCU_LVLS 2 |
57 | # define NUM_RCU_LVL_0 1 | 57 | # define NUM_RCU_LVL_0 1 |
58 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) | 58 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) |
59 | # define NUM_RCU_LVL_2 (NR_CPUS) | 59 | # define NUM_RCU_LVL_2 (NR_CPUS) |
60 | # define NUM_RCU_LVL_3 0 | 60 | # define NUM_RCU_LVL_3 0 |
61 | # define NUM_RCU_LVL_4 0 | 61 | # define NUM_RCU_LVL_4 0 |
62 | #elif NR_CPUS <= RCU_FANOUT_3 | 62 | #elif NR_CPUS <= RCU_FANOUT_3 |
63 | # define NUM_RCU_LVLS 3 | 63 | # define NUM_RCU_LVLS 3 |
64 | # define NUM_RCU_LVL_0 1 | 64 | # define NUM_RCU_LVL_0 1 |
65 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) | 65 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) |
66 | # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) | 66 | # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) |
67 | # define NUM_RCU_LVL_3 (NR_CPUS) | 67 | # define NUM_RCU_LVL_3 (NR_CPUS) |
68 | # define NUM_RCU_LVL_4 0 | 68 | # define NUM_RCU_LVL_4 0 |
69 | #elif NR_CPUS <= RCU_FANOUT_4 | 69 | #elif NR_CPUS <= RCU_FANOUT_4 |
70 | # define NUM_RCU_LVLS 4 | 70 | # define NUM_RCU_LVLS 4 |
71 | # define NUM_RCU_LVL_0 1 | 71 | # define NUM_RCU_LVL_0 1 |
72 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) | 72 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) |
73 | # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) | 73 | # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) |
74 | # define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) | 74 | # define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) |
75 | # define NUM_RCU_LVL_4 (NR_CPUS) | 75 | # define NUM_RCU_LVL_4 (NR_CPUS) |
76 | #else | 76 | #else |
77 | # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" | 77 | # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" |
78 | #endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */ | 78 | #endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */ |
79 | 79 | ||
80 | #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) | 80 | #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) |
81 | #define NUM_RCU_NODES (RCU_SUM - NR_CPUS) | 81 | #define NUM_RCU_NODES (RCU_SUM - NR_CPUS) |
82 | 82 | ||
83 | /* | 83 | /* |
84 | * Dynticks per-CPU state. | 84 | * Dynticks per-CPU state. |
85 | */ | 85 | */ |
86 | struct rcu_dynticks { | 86 | struct rcu_dynticks { |
87 | long long dynticks_nesting; /* Track irq/process nesting level. */ | 87 | long long dynticks_nesting; /* Track irq/process nesting level. */ |
88 | /* Process level is worth LLONG_MAX/2. */ | 88 | /* Process level is worth LLONG_MAX/2. */ |
89 | int dynticks_nmi_nesting; /* Track NMI nesting level. */ | 89 | int dynticks_nmi_nesting; /* Track NMI nesting level. */ |
90 | atomic_t dynticks; /* Even value for idle, else odd. */ | 90 | atomic_t dynticks; /* Even value for idle, else odd. */ |
91 | }; | 91 | }; |
92 | 92 | ||
93 | /* RCU's kthread states for tracing. */ | 93 | /* RCU's kthread states for tracing. */ |
94 | #define RCU_KTHREAD_STOPPED 0 | 94 | #define RCU_KTHREAD_STOPPED 0 |
95 | #define RCU_KTHREAD_RUNNING 1 | 95 | #define RCU_KTHREAD_RUNNING 1 |
96 | #define RCU_KTHREAD_WAITING 2 | 96 | #define RCU_KTHREAD_WAITING 2 |
97 | #define RCU_KTHREAD_OFFCPU 3 | 97 | #define RCU_KTHREAD_OFFCPU 3 |
98 | #define RCU_KTHREAD_YIELDING 4 | 98 | #define RCU_KTHREAD_YIELDING 4 |
99 | #define RCU_KTHREAD_MAX 4 | 99 | #define RCU_KTHREAD_MAX 4 |
100 | 100 | ||
101 | /* | 101 | /* |
102 | * Definition for node within the RCU grace-period-detection hierarchy. | 102 | * Definition for node within the RCU grace-period-detection hierarchy. |
103 | */ | 103 | */ |
104 | struct rcu_node { | 104 | struct rcu_node { |
105 | raw_spinlock_t lock; /* Root rcu_node's lock protects some */ | 105 | raw_spinlock_t lock; /* Root rcu_node's lock protects some */ |
106 | /* rcu_state fields as well as following. */ | 106 | /* rcu_state fields as well as following. */ |
107 | unsigned long gpnum; /* Current grace period for this node. */ | 107 | unsigned long gpnum; /* Current grace period for this node. */ |
108 | /* This will either be equal to or one */ | 108 | /* This will either be equal to or one */ |
109 | /* behind the root rcu_node's gpnum. */ | 109 | /* behind the root rcu_node's gpnum. */ |
110 | unsigned long completed; /* Last GP completed for this node. */ | 110 | unsigned long completed; /* Last GP completed for this node. */ |
111 | /* This will either be equal to or one */ | 111 | /* This will either be equal to or one */ |
112 | /* behind the root rcu_node's gpnum. */ | 112 | /* behind the root rcu_node's gpnum. */ |
113 | unsigned long qsmask; /* CPUs or groups that need to switch in */ | 113 | unsigned long qsmask; /* CPUs or groups that need to switch in */ |
114 | /* order for current grace period to proceed.*/ | 114 | /* order for current grace period to proceed.*/ |
115 | /* In leaf rcu_node, each bit corresponds to */ | 115 | /* In leaf rcu_node, each bit corresponds to */ |
116 | /* an rcu_data structure, otherwise, each */ | 116 | /* an rcu_data structure, otherwise, each */ |
117 | /* bit corresponds to a child rcu_node */ | 117 | /* bit corresponds to a child rcu_node */ |
118 | /* structure. */ | 118 | /* structure. */ |
119 | unsigned long expmask; /* Groups that have ->blkd_tasks */ | 119 | unsigned long expmask; /* Groups that have ->blkd_tasks */ |
120 | /* elements that need to drain to allow the */ | 120 | /* elements that need to drain to allow the */ |
121 | /* current expedited grace period to */ | 121 | /* current expedited grace period to */ |
122 | /* complete (only for TREE_PREEMPT_RCU). */ | 122 | /* complete (only for TREE_PREEMPT_RCU). */ |
123 | atomic_t wakemask; /* CPUs whose kthread needs to be awakened. */ | 123 | atomic_t wakemask; /* CPUs whose kthread needs to be awakened. */ |
124 | /* Since this has meaning only for leaf */ | 124 | /* Since this has meaning only for leaf */ |
125 | /* rcu_node structures, 32 bits suffices. */ | 125 | /* rcu_node structures, 32 bits suffices. */ |
126 | unsigned long qsmaskinit; | 126 | unsigned long qsmaskinit; |
127 | /* Per-GP initial value for qsmask & expmask. */ | 127 | /* Per-GP initial value for qsmask & expmask. */ |
128 | unsigned long grpmask; /* Mask to apply to parent qsmask. */ | 128 | unsigned long grpmask; /* Mask to apply to parent qsmask. */ |
129 | /* Only one bit will be set in this mask. */ | 129 | /* Only one bit will be set in this mask. */ |
130 | int grplo; /* lowest-numbered CPU or group here. */ | 130 | int grplo; /* lowest-numbered CPU or group here. */ |
131 | int grphi; /* highest-numbered CPU or group here. */ | 131 | int grphi; /* highest-numbered CPU or group here. */ |
132 | u8 grpnum; /* CPU/group number for next level up. */ | 132 | u8 grpnum; /* CPU/group number for next level up. */ |
133 | u8 level; /* root is at level 0. */ | 133 | u8 level; /* root is at level 0. */ |
134 | struct rcu_node *parent; | 134 | struct rcu_node *parent; |
135 | struct list_head blkd_tasks; | 135 | struct list_head blkd_tasks; |
136 | /* Tasks blocked in RCU read-side critical */ | 136 | /* Tasks blocked in RCU read-side critical */ |
137 | /* section. Tasks are placed at the head */ | 137 | /* section. Tasks are placed at the head */ |
138 | /* of this list and age towards the tail. */ | 138 | /* of this list and age towards the tail. */ |
139 | struct list_head *gp_tasks; | 139 | struct list_head *gp_tasks; |
140 | /* Pointer to the first task blocking the */ | 140 | /* Pointer to the first task blocking the */ |
141 | /* current grace period, or NULL if there */ | 141 | /* current grace period, or NULL if there */ |
142 | /* is no such task. */ | 142 | /* is no such task. */ |
143 | struct list_head *exp_tasks; | 143 | struct list_head *exp_tasks; |
144 | /* Pointer to the first task blocking the */ | 144 | /* Pointer to the first task blocking the */ |
145 | /* current expedited grace period, or NULL */ | 145 | /* current expedited grace period, or NULL */ |
146 | /* if there is no such task. If there */ | 146 | /* if there is no such task. If there */ |
147 | /* is no current expedited grace period, */ | 147 | /* is no current expedited grace period, */ |
148 | /* then there can cannot be any such task. */ | 148 | /* then there can cannot be any such task. */ |
149 | #ifdef CONFIG_RCU_BOOST | 149 | #ifdef CONFIG_RCU_BOOST |
150 | struct list_head *boost_tasks; | 150 | struct list_head *boost_tasks; |
151 | /* Pointer to first task that needs to be */ | 151 | /* Pointer to first task that needs to be */ |
152 | /* priority boosted, or NULL if no priority */ | 152 | /* priority boosted, or NULL if no priority */ |
153 | /* boosting is needed for this rcu_node */ | 153 | /* boosting is needed for this rcu_node */ |
154 | /* structure. If there are no tasks */ | 154 | /* structure. If there are no tasks */ |
155 | /* queued on this rcu_node structure that */ | 155 | /* queued on this rcu_node structure that */ |
156 | /* are blocking the current grace period, */ | 156 | /* are blocking the current grace period, */ |
157 | /* there can be no such task. */ | 157 | /* there can be no such task. */ |
158 | unsigned long boost_time; | 158 | unsigned long boost_time; |
159 | /* When to start boosting (jiffies). */ | 159 | /* When to start boosting (jiffies). */ |
160 | struct task_struct *boost_kthread_task; | 160 | struct task_struct *boost_kthread_task; |
161 | /* kthread that takes care of priority */ | 161 | /* kthread that takes care of priority */ |
162 | /* boosting for this rcu_node structure. */ | 162 | /* boosting for this rcu_node structure. */ |
163 | unsigned int boost_kthread_status; | 163 | unsigned int boost_kthread_status; |
164 | /* State of boost_kthread_task for tracing. */ | 164 | /* State of boost_kthread_task for tracing. */ |
165 | unsigned long n_tasks_boosted; | 165 | unsigned long n_tasks_boosted; |
166 | /* Total number of tasks boosted. */ | 166 | /* Total number of tasks boosted. */ |
167 | unsigned long n_exp_boosts; | 167 | unsigned long n_exp_boosts; |
168 | /* Number of tasks boosted for expedited GP. */ | 168 | /* Number of tasks boosted for expedited GP. */ |
169 | unsigned long n_normal_boosts; | 169 | unsigned long n_normal_boosts; |
170 | /* Number of tasks boosted for normal GP. */ | 170 | /* Number of tasks boosted for normal GP. */ |
171 | unsigned long n_balk_blkd_tasks; | 171 | unsigned long n_balk_blkd_tasks; |
172 | /* Refused to boost: no blocked tasks. */ | 172 | /* Refused to boost: no blocked tasks. */ |
173 | unsigned long n_balk_exp_gp_tasks; | 173 | unsigned long n_balk_exp_gp_tasks; |
174 | /* Refused to boost: nothing blocking GP. */ | 174 | /* Refused to boost: nothing blocking GP. */ |
175 | unsigned long n_balk_boost_tasks; | 175 | unsigned long n_balk_boost_tasks; |
176 | /* Refused to boost: already boosting. */ | 176 | /* Refused to boost: already boosting. */ |
177 | unsigned long n_balk_notblocked; | 177 | unsigned long n_balk_notblocked; |
178 | /* Refused to boost: RCU RS CS still running. */ | 178 | /* Refused to boost: RCU RS CS still running. */ |
179 | unsigned long n_balk_notyet; | 179 | unsigned long n_balk_notyet; |
180 | /* Refused to boost: not yet time. */ | 180 | /* Refused to boost: not yet time. */ |
181 | unsigned long n_balk_nos; | 181 | unsigned long n_balk_nos; |
182 | /* Refused to boost: not sure why, though. */ | 182 | /* Refused to boost: not sure why, though. */ |
183 | /* This can happen due to race conditions. */ | 183 | /* This can happen due to race conditions. */ |
184 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 184 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
185 | struct task_struct *node_kthread_task; | 185 | struct task_struct *node_kthread_task; |
186 | /* kthread that takes care of this rcu_node */ | 186 | /* kthread that takes care of this rcu_node */ |
187 | /* structure, for example, awakening the */ | 187 | /* structure, for example, awakening the */ |
188 | /* per-CPU kthreads as needed. */ | 188 | /* per-CPU kthreads as needed. */ |
189 | unsigned int node_kthread_status; | 189 | unsigned int node_kthread_status; |
190 | /* State of node_kthread_task for tracing. */ | 190 | /* State of node_kthread_task for tracing. */ |
191 | } ____cacheline_internodealigned_in_smp; | 191 | } ____cacheline_internodealigned_in_smp; |
192 | 192 | ||
193 | /* | 193 | /* |
194 | * Do a full breadth-first scan of the rcu_node structures for the | 194 | * Do a full breadth-first scan of the rcu_node structures for the |
195 | * specified rcu_state structure. | 195 | * specified rcu_state structure. |
196 | */ | 196 | */ |
197 | #define rcu_for_each_node_breadth_first(rsp, rnp) \ | 197 | #define rcu_for_each_node_breadth_first(rsp, rnp) \ |
198 | for ((rnp) = &(rsp)->node[0]; \ | 198 | for ((rnp) = &(rsp)->node[0]; \ |
199 | (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) | 199 | (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) |
200 | 200 | ||
201 | /* | 201 | /* |
202 | * Do a breadth-first scan of the non-leaf rcu_node structures for the | 202 | * Do a breadth-first scan of the non-leaf rcu_node structures for the |
203 | * specified rcu_state structure. Note that if there is a singleton | 203 | * specified rcu_state structure. Note that if there is a singleton |
204 | * rcu_node tree with but one rcu_node structure, this loop is a no-op. | 204 | * rcu_node tree with but one rcu_node structure, this loop is a no-op. |
205 | */ | 205 | */ |
206 | #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ | 206 | #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ |
207 | for ((rnp) = &(rsp)->node[0]; \ | 207 | for ((rnp) = &(rsp)->node[0]; \ |
208 | (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++) | 208 | (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++) |
209 | 209 | ||
210 | /* | 210 | /* |
211 | * Scan the leaves of the rcu_node hierarchy for the specified rcu_state | 211 | * Scan the leaves of the rcu_node hierarchy for the specified rcu_state |
212 | * structure. Note that if there is a singleton rcu_node tree with but | 212 | * structure. Note that if there is a singleton rcu_node tree with but |
213 | * one rcu_node structure, this loop -will- visit the rcu_node structure. | 213 | * one rcu_node structure, this loop -will- visit the rcu_node structure. |
214 | * It is still a leaf node, even if it is also the root node. | 214 | * It is still a leaf node, even if it is also the root node. |
215 | */ | 215 | */ |
216 | #define rcu_for_each_leaf_node(rsp, rnp) \ | 216 | #define rcu_for_each_leaf_node(rsp, rnp) \ |
217 | for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ | 217 | for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ |
218 | (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) | 218 | (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) |
219 | 219 | ||
220 | /* Index values for nxttail array in struct rcu_data. */ | 220 | /* Index values for nxttail array in struct rcu_data. */ |
221 | #define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ | 221 | #define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ |
222 | #define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */ | 222 | #define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */ |
223 | #define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */ | 223 | #define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */ |
224 | #define RCU_NEXT_TAIL 3 | 224 | #define RCU_NEXT_TAIL 3 |
225 | #define RCU_NEXT_SIZE 4 | 225 | #define RCU_NEXT_SIZE 4 |
226 | 226 | ||
227 | /* Per-CPU data for read-copy update. */ | 227 | /* Per-CPU data for read-copy update. */ |
228 | struct rcu_data { | 228 | struct rcu_data { |
229 | /* 1) quiescent-state and grace-period handling : */ | 229 | /* 1) quiescent-state and grace-period handling : */ |
230 | unsigned long completed; /* Track rsp->completed gp number */ | 230 | unsigned long completed; /* Track rsp->completed gp number */ |
231 | /* in order to detect GP end. */ | 231 | /* in order to detect GP end. */ |
232 | unsigned long gpnum; /* Highest gp number that this CPU */ | 232 | unsigned long gpnum; /* Highest gp number that this CPU */ |
233 | /* is aware of having started. */ | 233 | /* is aware of having started. */ |
234 | unsigned long passed_quiesce_gpnum; | 234 | unsigned long passed_quiesce_gpnum; |
235 | /* gpnum at time of quiescent state. */ | 235 | /* gpnum at time of quiescent state. */ |
236 | bool passed_quiesce; /* User-mode/idle loop etc. */ | 236 | bool passed_quiesce; /* User-mode/idle loop etc. */ |
237 | bool qs_pending; /* Core waits for quiesc state. */ | 237 | bool qs_pending; /* Core waits for quiesc state. */ |
238 | bool beenonline; /* CPU online at least once. */ | 238 | bool beenonline; /* CPU online at least once. */ |
239 | bool preemptible; /* Preemptible RCU? */ | 239 | bool preemptible; /* Preemptible RCU? */ |
240 | struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ | 240 | struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ |
241 | unsigned long grpmask; /* Mask to apply to leaf qsmask. */ | 241 | unsigned long grpmask; /* Mask to apply to leaf qsmask. */ |
242 | 242 | ||
243 | /* 2) batch handling */ | 243 | /* 2) batch handling */ |
244 | /* | 244 | /* |
245 | * If nxtlist is not NULL, it is partitioned as follows. | 245 | * If nxtlist is not NULL, it is partitioned as follows. |
246 | * Any of the partitions might be empty, in which case the | 246 | * Any of the partitions might be empty, in which case the |
247 | * pointer to that partition will be equal to the pointer for | 247 | * pointer to that partition will be equal to the pointer for |
248 | * the following partition. When the list is empty, all of | 248 | * the following partition. When the list is empty, all of |
249 | * the nxttail elements point to the ->nxtlist pointer itself, | 249 | * the nxttail elements point to the ->nxtlist pointer itself, |
250 | * which in that case is NULL. | 250 | * which in that case is NULL. |
251 | * | 251 | * |
252 | * [nxtlist, *nxttail[RCU_DONE_TAIL]): | 252 | * [nxtlist, *nxttail[RCU_DONE_TAIL]): |
253 | * Entries that batch # <= ->completed | 253 | * Entries that batch # <= ->completed |
254 | * The grace period for these entries has completed, and | 254 | * The grace period for these entries has completed, and |
255 | * the other grace-period-completed entries may be moved | 255 | * the other grace-period-completed entries may be moved |
256 | * here temporarily in rcu_process_callbacks(). | 256 | * here temporarily in rcu_process_callbacks(). |
257 | * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]): | 257 | * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]): |
258 | * Entries that batch # <= ->completed - 1: waiting for current GP | 258 | * Entries that batch # <= ->completed - 1: waiting for current GP |
259 | * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]): | 259 | * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]): |
260 | * Entries known to have arrived before current GP ended | 260 | * Entries known to have arrived before current GP ended |
261 | * [*nxttail[RCU_NEXT_READY_TAIL], *nxttail[RCU_NEXT_TAIL]): | 261 | * [*nxttail[RCU_NEXT_READY_TAIL], *nxttail[RCU_NEXT_TAIL]): |
262 | * Entries that might have arrived after current GP ended | 262 | * Entries that might have arrived after current GP ended |
263 | * Note that the value of *nxttail[RCU_NEXT_TAIL] will | 263 | * Note that the value of *nxttail[RCU_NEXT_TAIL] will |
264 | * always be NULL, as this is the end of the list. | 264 | * always be NULL, as this is the end of the list. |
265 | */ | 265 | */ |
266 | struct rcu_head *nxtlist; | 266 | struct rcu_head *nxtlist; |
267 | struct rcu_head **nxttail[RCU_NEXT_SIZE]; | 267 | struct rcu_head **nxttail[RCU_NEXT_SIZE]; |
268 | long qlen; /* # of queued callbacks */ | 268 | long qlen; /* # of queued callbacks */ |
269 | long qlen_last_fqs_check; | 269 | long qlen_last_fqs_check; |
270 | /* qlen at last check for QS forcing */ | 270 | /* qlen at last check for QS forcing */ |
271 | unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ | 271 | unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ |
272 | unsigned long n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */ | 272 | unsigned long n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */ |
273 | unsigned long n_cbs_adopted; /* RCU cbs adopted from dying CPU */ | 273 | unsigned long n_cbs_adopted; /* RCU cbs adopted from dying CPU */ |
274 | unsigned long n_force_qs_snap; | 274 | unsigned long n_force_qs_snap; |
275 | /* did other CPU force QS recently? */ | 275 | /* did other CPU force QS recently? */ |
276 | long blimit; /* Upper limit on a processed batch */ | 276 | long blimit; /* Upper limit on a processed batch */ |
277 | 277 | ||
278 | /* 3) dynticks interface. */ | 278 | /* 3) dynticks interface. */ |
279 | struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ | 279 | struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ |
280 | int dynticks_snap; /* Per-GP tracking for dynticks. */ | 280 | int dynticks_snap; /* Per-GP tracking for dynticks. */ |
281 | 281 | ||
282 | /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ | 282 | /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ |
283 | unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ | 283 | unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ |
284 | unsigned long offline_fqs; /* Kicked due to being offline. */ | 284 | unsigned long offline_fqs; /* Kicked due to being offline. */ |
285 | unsigned long resched_ipi; /* Sent a resched IPI. */ | 285 | unsigned long resched_ipi; /* Sent a resched IPI. */ |
286 | 286 | ||
287 | /* 5) __rcu_pending() statistics. */ | 287 | /* 5) __rcu_pending() statistics. */ |
288 | unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */ | 288 | unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */ |
289 | unsigned long n_rp_qs_pending; | 289 | unsigned long n_rp_qs_pending; |
290 | unsigned long n_rp_report_qs; | 290 | unsigned long n_rp_report_qs; |
291 | unsigned long n_rp_cb_ready; | 291 | unsigned long n_rp_cb_ready; |
292 | unsigned long n_rp_cpu_needs_gp; | 292 | unsigned long n_rp_cpu_needs_gp; |
293 | unsigned long n_rp_gp_completed; | 293 | unsigned long n_rp_gp_completed; |
294 | unsigned long n_rp_gp_started; | 294 | unsigned long n_rp_gp_started; |
295 | unsigned long n_rp_need_fqs; | 295 | unsigned long n_rp_need_fqs; |
296 | unsigned long n_rp_need_nothing; | 296 | unsigned long n_rp_need_nothing; |
297 | 297 | ||
298 | int cpu; | 298 | int cpu; |
299 | struct rcu_state *rsp; | 299 | struct rcu_state *rsp; |
300 | }; | 300 | }; |
301 | 301 | ||
302 | /* Values for fqs_state field in struct rcu_state. */ | 302 | /* Values for fqs_state field in struct rcu_state. */ |
303 | #define RCU_GP_IDLE 0 /* No grace period in progress. */ | 303 | #define RCU_GP_IDLE 0 /* No grace period in progress. */ |
304 | #define RCU_GP_INIT 1 /* Grace period being initialized. */ | 304 | #define RCU_GP_INIT 1 /* Grace period being initialized. */ |
305 | #define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */ | 305 | #define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */ |
306 | #define RCU_FORCE_QS 3 /* Need to force quiescent state. */ | 306 | #define RCU_FORCE_QS 3 /* Need to force quiescent state. */ |
307 | #define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK | 307 | #define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK |
308 | 308 | ||
309 | #define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ | 309 | #define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ |
310 | 310 | ||
311 | #ifdef CONFIG_PROVE_RCU | 311 | #ifdef CONFIG_PROVE_RCU |
312 | #define RCU_STALL_DELAY_DELTA (5 * HZ) | 312 | #define RCU_STALL_DELAY_DELTA (5 * HZ) |
313 | #else | 313 | #else |
314 | #define RCU_STALL_DELAY_DELTA 0 | 314 | #define RCU_STALL_DELAY_DELTA 0 |
315 | #endif | 315 | #endif |
316 | 316 | ||
317 | #define RCU_SECONDS_TILL_STALL_CHECK (CONFIG_RCU_CPU_STALL_TIMEOUT * HZ + \ | 317 | #define RCU_SECONDS_TILL_STALL_CHECK (CONFIG_RCU_CPU_STALL_TIMEOUT * HZ + \ |
318 | RCU_STALL_DELAY_DELTA) | 318 | RCU_STALL_DELAY_DELTA) |
319 | /* for rsp->jiffies_stall */ | 319 | /* for rsp->jiffies_stall */ |
320 | #define RCU_SECONDS_TILL_STALL_RECHECK (3 * RCU_SECONDS_TILL_STALL_CHECK + 30) | 320 | #define RCU_SECONDS_TILL_STALL_RECHECK (3 * RCU_SECONDS_TILL_STALL_CHECK + 30) |
321 | /* for rsp->jiffies_stall */ | 321 | /* for rsp->jiffies_stall */ |
322 | #define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ | 322 | #define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ |
323 | /* to take at least one */ | 323 | /* to take at least one */ |
324 | /* scheduling clock irq */ | 324 | /* scheduling clock irq */ |
325 | /* before ratting on them. */ | 325 | /* before ratting on them. */ |
326 | 326 | ||
327 | #define rcu_wait(cond) \ | 327 | #define rcu_wait(cond) \ |
328 | do { \ | 328 | do { \ |
329 | for (;;) { \ | 329 | for (;;) { \ |
330 | set_current_state(TASK_INTERRUPTIBLE); \ | 330 | set_current_state(TASK_INTERRUPTIBLE); \ |
331 | if (cond) \ | 331 | if (cond) \ |
332 | break; \ | 332 | break; \ |
333 | schedule(); \ | 333 | schedule(); \ |
334 | } \ | 334 | } \ |
335 | __set_current_state(TASK_RUNNING); \ | 335 | __set_current_state(TASK_RUNNING); \ |
336 | } while (0) | 336 | } while (0) |
337 | 337 | ||
338 | /* | 338 | /* |
339 | * RCU global state, including node hierarchy. This hierarchy is | 339 | * RCU global state, including node hierarchy. This hierarchy is |
340 | * represented in "heap" form in a dense array. The root (first level) | 340 | * represented in "heap" form in a dense array. The root (first level) |
341 | * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second | 341 | * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second |
342 | * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]), | 342 | * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]), |
343 | * and the third level in ->node[m+1] and following (->node[m+1] referenced | 343 | * and the third level in ->node[m+1] and following (->node[m+1] referenced |
344 | * by ->level[2]). The number of levels is determined by the number of | 344 | * by ->level[2]). The number of levels is determined by the number of |
345 | * CPUs and by CONFIG_RCU_FANOUT. Small systems will have a "hierarchy" | 345 | * CPUs and by CONFIG_RCU_FANOUT. Small systems will have a "hierarchy" |
346 | * consisting of a single rcu_node. | 346 | * consisting of a single rcu_node. |
347 | */ | 347 | */ |
348 | struct rcu_state { | 348 | struct rcu_state { |
349 | struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ | 349 | struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ |
350 | struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ | 350 | struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ |
351 | u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ | 351 | u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ |
352 | u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ | 352 | u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ |
353 | struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ | 353 | struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ |
354 | 354 | ||
355 | /* The following fields are guarded by the root rcu_node's lock. */ | 355 | /* The following fields are guarded by the root rcu_node's lock. */ |
356 | 356 | ||
357 | u8 fqs_state ____cacheline_internodealigned_in_smp; | 357 | u8 fqs_state ____cacheline_internodealigned_in_smp; |
358 | /* Force QS state. */ | 358 | /* Force QS state. */ |
359 | u8 fqs_active; /* force_quiescent_state() */ | 359 | u8 fqs_active; /* force_quiescent_state() */ |
360 | /* is running. */ | 360 | /* is running. */ |
361 | u8 fqs_need_gp; /* A CPU was prevented from */ | 361 | u8 fqs_need_gp; /* A CPU was prevented from */ |
362 | /* starting a new grace */ | 362 | /* starting a new grace */ |
363 | /* period because */ | 363 | /* period because */ |
364 | /* force_quiescent_state() */ | 364 | /* force_quiescent_state() */ |
365 | /* was running. */ | 365 | /* was running. */ |
366 | u8 boost; /* Subject to priority boost. */ | 366 | u8 boost; /* Subject to priority boost. */ |
367 | unsigned long gpnum; /* Current gp number. */ | 367 | unsigned long gpnum; /* Current gp number. */ |
368 | unsigned long completed; /* # of last completed gp. */ | 368 | unsigned long completed; /* # of last completed gp. */ |
369 | 369 | ||
370 | /* End of fields guarded by root rcu_node's lock. */ | 370 | /* End of fields guarded by root rcu_node's lock. */ |
371 | 371 | ||
372 | raw_spinlock_t onofflock; /* exclude on/offline and */ | 372 | raw_spinlock_t onofflock; /* exclude on/offline and */ |
373 | /* starting new GP. */ | 373 | /* starting new GP. */ |
374 | raw_spinlock_t fqslock; /* Only one task forcing */ | 374 | raw_spinlock_t fqslock; /* Only one task forcing */ |
375 | /* quiescent states. */ | 375 | /* quiescent states. */ |
376 | unsigned long jiffies_force_qs; /* Time at which to invoke */ | 376 | unsigned long jiffies_force_qs; /* Time at which to invoke */ |
377 | /* force_quiescent_state(). */ | 377 | /* force_quiescent_state(). */ |
378 | unsigned long n_force_qs; /* Number of calls to */ | 378 | unsigned long n_force_qs; /* Number of calls to */ |
379 | /* force_quiescent_state(). */ | 379 | /* force_quiescent_state(). */ |
380 | unsigned long n_force_qs_lh; /* ~Number of calls leaving */ | 380 | unsigned long n_force_qs_lh; /* ~Number of calls leaving */ |
381 | /* due to lock unavailable. */ | 381 | /* due to lock unavailable. */ |
382 | unsigned long n_force_qs_ngp; /* Number of calls leaving */ | 382 | unsigned long n_force_qs_ngp; /* Number of calls leaving */ |
383 | /* due to no GP active. */ | 383 | /* due to no GP active. */ |
384 | unsigned long gp_start; /* Time at which GP started, */ | 384 | unsigned long gp_start; /* Time at which GP started, */ |
385 | /* but in jiffies. */ | 385 | /* but in jiffies. */ |
386 | unsigned long jiffies_stall; /* Time at which to check */ | 386 | unsigned long jiffies_stall; /* Time at which to check */ |
387 | /* for CPU stalls. */ | 387 | /* for CPU stalls. */ |
388 | unsigned long gp_max; /* Maximum GP duration in */ | 388 | unsigned long gp_max; /* Maximum GP duration in */ |
389 | /* jiffies. */ | 389 | /* jiffies. */ |
390 | char *name; /* Name of structure. */ | 390 | char *name; /* Name of structure. */ |
391 | }; | 391 | }; |
392 | 392 | ||
393 | /* Return values for rcu_preempt_offline_tasks(). */ | 393 | /* Return values for rcu_preempt_offline_tasks(). */ |
394 | 394 | ||
395 | #define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */ | 395 | #define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */ |
396 | /* GP were moved to root. */ | 396 | /* GP were moved to root. */ |
397 | #define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */ | 397 | #define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */ |
398 | /* GP were moved to root. */ | 398 | /* GP were moved to root. */ |
399 | 399 | ||
400 | /* | 400 | /* |
401 | * RCU implementation internal declarations: | 401 | * RCU implementation internal declarations: |
402 | */ | 402 | */ |
403 | extern struct rcu_state rcu_sched_state; | 403 | extern struct rcu_state rcu_sched_state; |
404 | DECLARE_PER_CPU(struct rcu_data, rcu_sched_data); | 404 | DECLARE_PER_CPU(struct rcu_data, rcu_sched_data); |
405 | 405 | ||
406 | extern struct rcu_state rcu_bh_state; | 406 | extern struct rcu_state rcu_bh_state; |
407 | DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); | 407 | DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); |
408 | 408 | ||
409 | #ifdef CONFIG_TREE_PREEMPT_RCU | 409 | #ifdef CONFIG_TREE_PREEMPT_RCU |
410 | extern struct rcu_state rcu_preempt_state; | 410 | extern struct rcu_state rcu_preempt_state; |
411 | DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); | 411 | DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); |
412 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 412 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
413 | 413 | ||
414 | #ifdef CONFIG_RCU_BOOST | 414 | #ifdef CONFIG_RCU_BOOST |
415 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | 415 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); |
416 | DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu); | 416 | DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu); |
417 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | 417 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); |
418 | DECLARE_PER_CPU(char, rcu_cpu_has_work); | 418 | DECLARE_PER_CPU(char, rcu_cpu_has_work); |
419 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 419 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
420 | 420 | ||
421 | #ifndef RCU_TREE_NONCORE | 421 | #ifndef RCU_TREE_NONCORE |
422 | 422 | ||
423 | /* Forward declarations for rcutree_plugin.h */ | 423 | /* Forward declarations for rcutree_plugin.h */ |
424 | static void rcu_bootup_announce(void); | 424 | static void rcu_bootup_announce(void); |
425 | long rcu_batches_completed(void); | 425 | long rcu_batches_completed(void); |
426 | static void rcu_preempt_note_context_switch(int cpu); | 426 | static void rcu_preempt_note_context_switch(int cpu); |
427 | static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); | 427 | static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); |
428 | #ifdef CONFIG_HOTPLUG_CPU | 428 | #ifdef CONFIG_HOTPLUG_CPU |
429 | static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, | 429 | static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, |
430 | unsigned long flags); | 430 | unsigned long flags); |
431 | static void rcu_stop_cpu_kthread(int cpu); | 431 | static void rcu_stop_cpu_kthread(int cpu); |
432 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 432 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
433 | static void rcu_print_detail_task_stall(struct rcu_state *rsp); | 433 | static void rcu_print_detail_task_stall(struct rcu_state *rsp); |
434 | static int rcu_print_task_stall(struct rcu_node *rnp); | 434 | static int rcu_print_task_stall(struct rcu_node *rnp); |
435 | static void rcu_preempt_stall_reset(void); | 435 | static void rcu_preempt_stall_reset(void); |
436 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); | 436 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); |
437 | #ifdef CONFIG_HOTPLUG_CPU | 437 | #ifdef CONFIG_HOTPLUG_CPU |
438 | static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | 438 | static int rcu_preempt_offline_tasks(struct rcu_state *rsp, |
439 | struct rcu_node *rnp, | 439 | struct rcu_node *rnp, |
440 | struct rcu_data *rdp); | 440 | struct rcu_data *rdp); |
441 | static void rcu_preempt_offline_cpu(int cpu); | 441 | static void rcu_preempt_offline_cpu(int cpu); |
442 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 442 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
443 | static void rcu_preempt_check_callbacks(int cpu); | 443 | static void rcu_preempt_check_callbacks(int cpu); |
444 | static void rcu_preempt_process_callbacks(void); | 444 | static void rcu_preempt_process_callbacks(void); |
445 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); | 445 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); |
446 | #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) | 446 | #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) |
447 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, | 447 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, |
448 | bool wake); | 448 | bool wake); |
449 | #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ | 449 | #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ |
450 | static int rcu_preempt_pending(int cpu); | 450 | static int rcu_preempt_pending(int cpu); |
451 | static int rcu_preempt_needs_cpu(int cpu); | 451 | static int rcu_preempt_needs_cpu(int cpu); |
452 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); | 452 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); |
453 | static void rcu_preempt_send_cbs_to_online(void); | 453 | static void rcu_preempt_send_cbs_to_online(void); |
454 | static void __init __rcu_init_preempt(void); | 454 | static void __init __rcu_init_preempt(void); |
455 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); | 455 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); |
456 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); | 456 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); |
457 | static void invoke_rcu_callbacks_kthread(void); | 457 | static void invoke_rcu_callbacks_kthread(void); |
458 | #ifdef CONFIG_RCU_BOOST | 458 | #ifdef CONFIG_RCU_BOOST |
459 | static void rcu_preempt_do_callbacks(void); | 459 | static void rcu_preempt_do_callbacks(void); |
460 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, | 460 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, |
461 | cpumask_var_t cm); | 461 | cpumask_var_t cm); |
462 | static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | 462 | static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, |
463 | struct rcu_node *rnp, | 463 | struct rcu_node *rnp, |
464 | int rnp_index); | 464 | int rnp_index); |
465 | static void invoke_rcu_node_kthread(struct rcu_node *rnp); | 465 | static void invoke_rcu_node_kthread(struct rcu_node *rnp); |
466 | static void rcu_yield(void (*f)(unsigned long), unsigned long arg); | 466 | static void rcu_yield(void (*f)(unsigned long), unsigned long arg); |
467 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 467 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
468 | static void rcu_cpu_kthread_setrt(int cpu, int to_rt); | 468 | static void rcu_cpu_kthread_setrt(int cpu, int to_rt); |
469 | static void __cpuinit rcu_prepare_kthreads(int cpu); | 469 | static void __cpuinit rcu_prepare_kthreads(int cpu); |
470 | static void rcu_prepare_for_idle_init(int cpu); | ||
471 | static void rcu_cleanup_after_idle(int cpu); | ||
470 | static void rcu_prepare_for_idle(int cpu); | 472 | static void rcu_prepare_for_idle(int cpu); |
471 | 473 | ||
472 | #endif /* #ifndef RCU_TREE_NONCORE */ | 474 | #endif /* #ifndef RCU_TREE_NONCORE */ |
473 | 475 |
kernel/rcutree_plugin.h
1 | /* | 1 | /* |
2 | * Read-Copy Update mechanism for mutual exclusion (tree-based version) | 2 | * Read-Copy Update mechanism for mutual exclusion (tree-based version) |
3 | * Internal non-public definitions that provide either classic | 3 | * Internal non-public definitions that provide either classic |
4 | * or preemptible semantics. | 4 | * or preemptible semantics. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License as published by | 7 | * it under the terms of the GNU General Public License as published by |
8 | * the Free Software Foundation; either version 2 of the License, or | 8 | * the Free Software Foundation; either version 2 of the License, or |
9 | * (at your option) any later version. | 9 | * (at your option) any later version. |
10 | * | 10 | * |
11 | * This program is distributed in the hope that it will be useful, | 11 | * This program is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | * GNU General Public License for more details. | 14 | * GNU General Public License for more details. |
15 | * | 15 | * |
16 | * You should have received a copy of the GNU General Public License | 16 | * You should have received a copy of the GNU General Public License |
17 | * along with this program; if not, write to the Free Software | 17 | * along with this program; if not, write to the Free Software |
18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | 18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
19 | * | 19 | * |
20 | * Copyright Red Hat, 2009 | 20 | * Copyright Red Hat, 2009 |
21 | * Copyright IBM Corporation, 2009 | 21 | * Copyright IBM Corporation, 2009 |
22 | * | 22 | * |
23 | * Author: Ingo Molnar <mingo@elte.hu> | 23 | * Author: Ingo Molnar <mingo@elte.hu> |
24 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 24 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
25 | */ | 25 | */ |
26 | 26 | ||
27 | #include <linux/delay.h> | 27 | #include <linux/delay.h> |
28 | #include <linux/stop_machine.h> | 28 | #include <linux/stop_machine.h> |
29 | 29 | ||
30 | #define RCU_KTHREAD_PRIO 1 | 30 | #define RCU_KTHREAD_PRIO 1 |
31 | 31 | ||
32 | #ifdef CONFIG_RCU_BOOST | 32 | #ifdef CONFIG_RCU_BOOST |
33 | #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO | 33 | #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO |
34 | #else | 34 | #else |
35 | #define RCU_BOOST_PRIO RCU_KTHREAD_PRIO | 35 | #define RCU_BOOST_PRIO RCU_KTHREAD_PRIO |
36 | #endif | 36 | #endif |
37 | 37 | ||
38 | /* | 38 | /* |
39 | * Check the RCU kernel configuration parameters and print informative | 39 | * Check the RCU kernel configuration parameters and print informative |
40 | * messages about anything out of the ordinary. If you like #ifdef, you | 40 | * messages about anything out of the ordinary. If you like #ifdef, you |
41 | * will love this function. | 41 | * will love this function. |
42 | */ | 42 | */ |
43 | static void __init rcu_bootup_announce_oddness(void) | 43 | static void __init rcu_bootup_announce_oddness(void) |
44 | { | 44 | { |
45 | #ifdef CONFIG_RCU_TRACE | 45 | #ifdef CONFIG_RCU_TRACE |
46 | printk(KERN_INFO "\tRCU debugfs-based tracing is enabled.\n"); | 46 | printk(KERN_INFO "\tRCU debugfs-based tracing is enabled.\n"); |
47 | #endif | 47 | #endif |
48 | #if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32) | 48 | #if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32) |
49 | printk(KERN_INFO "\tCONFIG_RCU_FANOUT set to non-default value of %d\n", | 49 | printk(KERN_INFO "\tCONFIG_RCU_FANOUT set to non-default value of %d\n", |
50 | CONFIG_RCU_FANOUT); | 50 | CONFIG_RCU_FANOUT); |
51 | #endif | 51 | #endif |
52 | #ifdef CONFIG_RCU_FANOUT_EXACT | 52 | #ifdef CONFIG_RCU_FANOUT_EXACT |
53 | printk(KERN_INFO "\tHierarchical RCU autobalancing is disabled.\n"); | 53 | printk(KERN_INFO "\tHierarchical RCU autobalancing is disabled.\n"); |
54 | #endif | 54 | #endif |
55 | #ifdef CONFIG_RCU_FAST_NO_HZ | 55 | #ifdef CONFIG_RCU_FAST_NO_HZ |
56 | printk(KERN_INFO | 56 | printk(KERN_INFO |
57 | "\tRCU dyntick-idle grace-period acceleration is enabled.\n"); | 57 | "\tRCU dyntick-idle grace-period acceleration is enabled.\n"); |
58 | #endif | 58 | #endif |
59 | #ifdef CONFIG_PROVE_RCU | 59 | #ifdef CONFIG_PROVE_RCU |
60 | printk(KERN_INFO "\tRCU lockdep checking is enabled.\n"); | 60 | printk(KERN_INFO "\tRCU lockdep checking is enabled.\n"); |
61 | #endif | 61 | #endif |
62 | #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE | 62 | #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE |
63 | printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); | 63 | printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); |
64 | #endif | 64 | #endif |
65 | #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) | 65 | #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) |
66 | printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); | 66 | printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); |
67 | #endif | 67 | #endif |
68 | #if NUM_RCU_LVL_4 != 0 | 68 | #if NUM_RCU_LVL_4 != 0 |
69 | printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); | 69 | printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); |
70 | #endif | 70 | #endif |
71 | } | 71 | } |
72 | 72 | ||
73 | #ifdef CONFIG_TREE_PREEMPT_RCU | 73 | #ifdef CONFIG_TREE_PREEMPT_RCU |
74 | 74 | ||
75 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt); | 75 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt); |
76 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); | 76 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); |
77 | static struct rcu_state *rcu_state = &rcu_preempt_state; | 77 | static struct rcu_state *rcu_state = &rcu_preempt_state; |
78 | 78 | ||
79 | static void rcu_read_unlock_special(struct task_struct *t); | 79 | static void rcu_read_unlock_special(struct task_struct *t); |
80 | static int rcu_preempted_readers_exp(struct rcu_node *rnp); | 80 | static int rcu_preempted_readers_exp(struct rcu_node *rnp); |
81 | 81 | ||
82 | /* | 82 | /* |
83 | * Tell them what RCU they are running. | 83 | * Tell them what RCU they are running. |
84 | */ | 84 | */ |
85 | static void __init rcu_bootup_announce(void) | 85 | static void __init rcu_bootup_announce(void) |
86 | { | 86 | { |
87 | printk(KERN_INFO "Preemptible hierarchical RCU implementation.\n"); | 87 | printk(KERN_INFO "Preemptible hierarchical RCU implementation.\n"); |
88 | rcu_bootup_announce_oddness(); | 88 | rcu_bootup_announce_oddness(); |
89 | } | 89 | } |
90 | 90 | ||
91 | /* | 91 | /* |
92 | * Return the number of RCU-preempt batches processed thus far | 92 | * Return the number of RCU-preempt batches processed thus far |
93 | * for debug and statistics. | 93 | * for debug and statistics. |
94 | */ | 94 | */ |
95 | long rcu_batches_completed_preempt(void) | 95 | long rcu_batches_completed_preempt(void) |
96 | { | 96 | { |
97 | return rcu_preempt_state.completed; | 97 | return rcu_preempt_state.completed; |
98 | } | 98 | } |
99 | EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt); | 99 | EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt); |
100 | 100 | ||
101 | /* | 101 | /* |
102 | * Return the number of RCU batches processed thus far for debug & stats. | 102 | * Return the number of RCU batches processed thus far for debug & stats. |
103 | */ | 103 | */ |
104 | long rcu_batches_completed(void) | 104 | long rcu_batches_completed(void) |
105 | { | 105 | { |
106 | return rcu_batches_completed_preempt(); | 106 | return rcu_batches_completed_preempt(); |
107 | } | 107 | } |
108 | EXPORT_SYMBOL_GPL(rcu_batches_completed); | 108 | EXPORT_SYMBOL_GPL(rcu_batches_completed); |
109 | 109 | ||
110 | /* | 110 | /* |
111 | * Force a quiescent state for preemptible RCU. | 111 | * Force a quiescent state for preemptible RCU. |
112 | */ | 112 | */ |
113 | void rcu_force_quiescent_state(void) | 113 | void rcu_force_quiescent_state(void) |
114 | { | 114 | { |
115 | force_quiescent_state(&rcu_preempt_state, 0); | 115 | force_quiescent_state(&rcu_preempt_state, 0); |
116 | } | 116 | } |
117 | EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); | 117 | EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); |
118 | 118 | ||
119 | /* | 119 | /* |
120 | * Record a preemptible-RCU quiescent state for the specified CPU. Note | 120 | * Record a preemptible-RCU quiescent state for the specified CPU. Note |
121 | * that this just means that the task currently running on the CPU is | 121 | * that this just means that the task currently running on the CPU is |
122 | * not in a quiescent state. There might be any number of tasks blocked | 122 | * not in a quiescent state. There might be any number of tasks blocked |
123 | * while in an RCU read-side critical section. | 123 | * while in an RCU read-side critical section. |
124 | * | 124 | * |
125 | * Unlike the other rcu_*_qs() functions, callers to this function | 125 | * Unlike the other rcu_*_qs() functions, callers to this function |
126 | * must disable irqs in order to protect the assignment to | 126 | * must disable irqs in order to protect the assignment to |
127 | * ->rcu_read_unlock_special. | 127 | * ->rcu_read_unlock_special. |
128 | */ | 128 | */ |
129 | static void rcu_preempt_qs(int cpu) | 129 | static void rcu_preempt_qs(int cpu) |
130 | { | 130 | { |
131 | struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); | 131 | struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); |
132 | 132 | ||
133 | rdp->passed_quiesce_gpnum = rdp->gpnum; | 133 | rdp->passed_quiesce_gpnum = rdp->gpnum; |
134 | barrier(); | 134 | barrier(); |
135 | if (rdp->passed_quiesce == 0) | 135 | if (rdp->passed_quiesce == 0) |
136 | trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs"); | 136 | trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs"); |
137 | rdp->passed_quiesce = 1; | 137 | rdp->passed_quiesce = 1; |
138 | current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | 138 | current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; |
139 | } | 139 | } |
140 | 140 | ||
141 | /* | 141 | /* |
142 | * We have entered the scheduler, and the current task might soon be | 142 | * We have entered the scheduler, and the current task might soon be |
143 | * context-switched away from. If this task is in an RCU read-side | 143 | * context-switched away from. If this task is in an RCU read-side |
144 | * critical section, we will no longer be able to rely on the CPU to | 144 | * critical section, we will no longer be able to rely on the CPU to |
145 | * record that fact, so we enqueue the task on the blkd_tasks list. | 145 | * record that fact, so we enqueue the task on the blkd_tasks list. |
146 | * The task will dequeue itself when it exits the outermost enclosing | 146 | * The task will dequeue itself when it exits the outermost enclosing |
147 | * RCU read-side critical section. Therefore, the current grace period | 147 | * RCU read-side critical section. Therefore, the current grace period |
148 | * cannot be permitted to complete until the blkd_tasks list entries | 148 | * cannot be permitted to complete until the blkd_tasks list entries |
149 | * predating the current grace period drain, in other words, until | 149 | * predating the current grace period drain, in other words, until |
150 | * rnp->gp_tasks becomes NULL. | 150 | * rnp->gp_tasks becomes NULL. |
151 | * | 151 | * |
152 | * Caller must disable preemption. | 152 | * Caller must disable preemption. |
153 | */ | 153 | */ |
154 | static void rcu_preempt_note_context_switch(int cpu) | 154 | static void rcu_preempt_note_context_switch(int cpu) |
155 | { | 155 | { |
156 | struct task_struct *t = current; | 156 | struct task_struct *t = current; |
157 | unsigned long flags; | 157 | unsigned long flags; |
158 | struct rcu_data *rdp; | 158 | struct rcu_data *rdp; |
159 | struct rcu_node *rnp; | 159 | struct rcu_node *rnp; |
160 | 160 | ||
161 | if (t->rcu_read_lock_nesting > 0 && | 161 | if (t->rcu_read_lock_nesting > 0 && |
162 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { | 162 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { |
163 | 163 | ||
164 | /* Possibly blocking in an RCU read-side critical section. */ | 164 | /* Possibly blocking in an RCU read-side critical section. */ |
165 | rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); | 165 | rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); |
166 | rnp = rdp->mynode; | 166 | rnp = rdp->mynode; |
167 | raw_spin_lock_irqsave(&rnp->lock, flags); | 167 | raw_spin_lock_irqsave(&rnp->lock, flags); |
168 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; | 168 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; |
169 | t->rcu_blocked_node = rnp; | 169 | t->rcu_blocked_node = rnp; |
170 | 170 | ||
171 | /* | 171 | /* |
172 | * If this CPU has already checked in, then this task | 172 | * If this CPU has already checked in, then this task |
173 | * will hold up the next grace period rather than the | 173 | * will hold up the next grace period rather than the |
174 | * current grace period. Queue the task accordingly. | 174 | * current grace period. Queue the task accordingly. |
175 | * If the task is queued for the current grace period | 175 | * If the task is queued for the current grace period |
176 | * (i.e., this CPU has not yet passed through a quiescent | 176 | * (i.e., this CPU has not yet passed through a quiescent |
177 | * state for the current grace period), then as long | 177 | * state for the current grace period), then as long |
178 | * as that task remains queued, the current grace period | 178 | * as that task remains queued, the current grace period |
179 | * cannot end. Note that there is some uncertainty as | 179 | * cannot end. Note that there is some uncertainty as |
180 | * to exactly when the current grace period started. | 180 | * to exactly when the current grace period started. |
181 | * We take a conservative approach, which can result | 181 | * We take a conservative approach, which can result |
182 | * in unnecessarily waiting on tasks that started very | 182 | * in unnecessarily waiting on tasks that started very |
183 | * slightly after the current grace period began. C'est | 183 | * slightly after the current grace period began. C'est |
184 | * la vie!!! | 184 | * la vie!!! |
185 | * | 185 | * |
186 | * But first, note that the current CPU must still be | 186 | * But first, note that the current CPU must still be |
187 | * on line! | 187 | * on line! |
188 | */ | 188 | */ |
189 | WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); | 189 | WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); |
190 | WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); | 190 | WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); |
191 | if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) { | 191 | if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) { |
192 | list_add(&t->rcu_node_entry, rnp->gp_tasks->prev); | 192 | list_add(&t->rcu_node_entry, rnp->gp_tasks->prev); |
193 | rnp->gp_tasks = &t->rcu_node_entry; | 193 | rnp->gp_tasks = &t->rcu_node_entry; |
194 | #ifdef CONFIG_RCU_BOOST | 194 | #ifdef CONFIG_RCU_BOOST |
195 | if (rnp->boost_tasks != NULL) | 195 | if (rnp->boost_tasks != NULL) |
196 | rnp->boost_tasks = rnp->gp_tasks; | 196 | rnp->boost_tasks = rnp->gp_tasks; |
197 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 197 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
198 | } else { | 198 | } else { |
199 | list_add(&t->rcu_node_entry, &rnp->blkd_tasks); | 199 | list_add(&t->rcu_node_entry, &rnp->blkd_tasks); |
200 | if (rnp->qsmask & rdp->grpmask) | 200 | if (rnp->qsmask & rdp->grpmask) |
201 | rnp->gp_tasks = &t->rcu_node_entry; | 201 | rnp->gp_tasks = &t->rcu_node_entry; |
202 | } | 202 | } |
203 | trace_rcu_preempt_task(rdp->rsp->name, | 203 | trace_rcu_preempt_task(rdp->rsp->name, |
204 | t->pid, | 204 | t->pid, |
205 | (rnp->qsmask & rdp->grpmask) | 205 | (rnp->qsmask & rdp->grpmask) |
206 | ? rnp->gpnum | 206 | ? rnp->gpnum |
207 | : rnp->gpnum + 1); | 207 | : rnp->gpnum + 1); |
208 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 208 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
209 | } else if (t->rcu_read_lock_nesting < 0 && | 209 | } else if (t->rcu_read_lock_nesting < 0 && |
210 | t->rcu_read_unlock_special) { | 210 | t->rcu_read_unlock_special) { |
211 | 211 | ||
212 | /* | 212 | /* |
213 | * Complete exit from RCU read-side critical section on | 213 | * Complete exit from RCU read-side critical section on |
214 | * behalf of preempted instance of __rcu_read_unlock(). | 214 | * behalf of preempted instance of __rcu_read_unlock(). |
215 | */ | 215 | */ |
216 | rcu_read_unlock_special(t); | 216 | rcu_read_unlock_special(t); |
217 | } | 217 | } |
218 | 218 | ||
219 | /* | 219 | /* |
220 | * Either we were not in an RCU read-side critical section to | 220 | * Either we were not in an RCU read-side critical section to |
221 | * begin with, or we have now recorded that critical section | 221 | * begin with, or we have now recorded that critical section |
222 | * globally. Either way, we can now note a quiescent state | 222 | * globally. Either way, we can now note a quiescent state |
223 | * for this CPU. Again, if we were in an RCU read-side critical | 223 | * for this CPU. Again, if we were in an RCU read-side critical |
224 | * section, and if that critical section was blocking the current | 224 | * section, and if that critical section was blocking the current |
225 | * grace period, then the fact that the task has been enqueued | 225 | * grace period, then the fact that the task has been enqueued |
226 | * means that we continue to block the current grace period. | 226 | * means that we continue to block the current grace period. |
227 | */ | 227 | */ |
228 | local_irq_save(flags); | 228 | local_irq_save(flags); |
229 | rcu_preempt_qs(cpu); | 229 | rcu_preempt_qs(cpu); |
230 | local_irq_restore(flags); | 230 | local_irq_restore(flags); |
231 | } | 231 | } |
232 | 232 | ||
233 | /* | 233 | /* |
234 | * Tree-preemptible RCU implementation for rcu_read_lock(). | 234 | * Tree-preemptible RCU implementation for rcu_read_lock(). |
235 | * Just increment ->rcu_read_lock_nesting, shared state will be updated | 235 | * Just increment ->rcu_read_lock_nesting, shared state will be updated |
236 | * if we block. | 236 | * if we block. |
237 | */ | 237 | */ |
238 | void __rcu_read_lock(void) | 238 | void __rcu_read_lock(void) |
239 | { | 239 | { |
240 | current->rcu_read_lock_nesting++; | 240 | current->rcu_read_lock_nesting++; |
241 | barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */ | 241 | barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */ |
242 | } | 242 | } |
243 | EXPORT_SYMBOL_GPL(__rcu_read_lock); | 243 | EXPORT_SYMBOL_GPL(__rcu_read_lock); |
244 | 244 | ||
245 | /* | 245 | /* |
246 | * Check for preempted RCU readers blocking the current grace period | 246 | * Check for preempted RCU readers blocking the current grace period |
247 | * for the specified rcu_node structure. If the caller needs a reliable | 247 | * for the specified rcu_node structure. If the caller needs a reliable |
248 | * answer, it must hold the rcu_node's ->lock. | 248 | * answer, it must hold the rcu_node's ->lock. |
249 | */ | 249 | */ |
250 | static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) | 250 | static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) |
251 | { | 251 | { |
252 | return rnp->gp_tasks != NULL; | 252 | return rnp->gp_tasks != NULL; |
253 | } | 253 | } |
254 | 254 | ||
255 | /* | 255 | /* |
256 | * Record a quiescent state for all tasks that were previously queued | 256 | * Record a quiescent state for all tasks that were previously queued |
257 | * on the specified rcu_node structure and that were blocking the current | 257 | * on the specified rcu_node structure and that were blocking the current |
258 | * RCU grace period. The caller must hold the specified rnp->lock with | 258 | * RCU grace period. The caller must hold the specified rnp->lock with |
259 | * irqs disabled, and this lock is released upon return, but irqs remain | 259 | * irqs disabled, and this lock is released upon return, but irqs remain |
260 | * disabled. | 260 | * disabled. |
261 | */ | 261 | */ |
262 | static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) | 262 | static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) |
263 | __releases(rnp->lock) | 263 | __releases(rnp->lock) |
264 | { | 264 | { |
265 | unsigned long mask; | 265 | unsigned long mask; |
266 | struct rcu_node *rnp_p; | 266 | struct rcu_node *rnp_p; |
267 | 267 | ||
268 | if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { | 268 | if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { |
269 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 269 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
270 | return; /* Still need more quiescent states! */ | 270 | return; /* Still need more quiescent states! */ |
271 | } | 271 | } |
272 | 272 | ||
273 | rnp_p = rnp->parent; | 273 | rnp_p = rnp->parent; |
274 | if (rnp_p == NULL) { | 274 | if (rnp_p == NULL) { |
275 | /* | 275 | /* |
276 | * Either there is only one rcu_node in the tree, | 276 | * Either there is only one rcu_node in the tree, |
277 | * or tasks were kicked up to root rcu_node due to | 277 | * or tasks were kicked up to root rcu_node due to |
278 | * CPUs going offline. | 278 | * CPUs going offline. |
279 | */ | 279 | */ |
280 | rcu_report_qs_rsp(&rcu_preempt_state, flags); | 280 | rcu_report_qs_rsp(&rcu_preempt_state, flags); |
281 | return; | 281 | return; |
282 | } | 282 | } |
283 | 283 | ||
284 | /* Report up the rest of the hierarchy. */ | 284 | /* Report up the rest of the hierarchy. */ |
285 | mask = rnp->grpmask; | 285 | mask = rnp->grpmask; |
286 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 286 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
287 | raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */ | 287 | raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */ |
288 | rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags); | 288 | rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags); |
289 | } | 289 | } |
290 | 290 | ||
291 | /* | 291 | /* |
292 | * Advance a ->blkd_tasks-list pointer to the next entry, instead | 292 | * Advance a ->blkd_tasks-list pointer to the next entry, instead |
293 | * returning NULL if at the end of the list. | 293 | * returning NULL if at the end of the list. |
294 | */ | 294 | */ |
295 | static struct list_head *rcu_next_node_entry(struct task_struct *t, | 295 | static struct list_head *rcu_next_node_entry(struct task_struct *t, |
296 | struct rcu_node *rnp) | 296 | struct rcu_node *rnp) |
297 | { | 297 | { |
298 | struct list_head *np; | 298 | struct list_head *np; |
299 | 299 | ||
300 | np = t->rcu_node_entry.next; | 300 | np = t->rcu_node_entry.next; |
301 | if (np == &rnp->blkd_tasks) | 301 | if (np == &rnp->blkd_tasks) |
302 | np = NULL; | 302 | np = NULL; |
303 | return np; | 303 | return np; |
304 | } | 304 | } |
305 | 305 | ||
306 | /* | 306 | /* |
307 | * Handle special cases during rcu_read_unlock(), such as needing to | 307 | * Handle special cases during rcu_read_unlock(), such as needing to |
308 | * notify RCU core processing or task having blocked during the RCU | 308 | * notify RCU core processing or task having blocked during the RCU |
309 | * read-side critical section. | 309 | * read-side critical section. |
310 | */ | 310 | */ |
311 | static noinline void rcu_read_unlock_special(struct task_struct *t) | 311 | static noinline void rcu_read_unlock_special(struct task_struct *t) |
312 | { | 312 | { |
313 | int empty; | 313 | int empty; |
314 | int empty_exp; | 314 | int empty_exp; |
315 | int empty_exp_now; | 315 | int empty_exp_now; |
316 | unsigned long flags; | 316 | unsigned long flags; |
317 | struct list_head *np; | 317 | struct list_head *np; |
318 | #ifdef CONFIG_RCU_BOOST | 318 | #ifdef CONFIG_RCU_BOOST |
319 | struct rt_mutex *rbmp = NULL; | 319 | struct rt_mutex *rbmp = NULL; |
320 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 320 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
321 | struct rcu_node *rnp; | 321 | struct rcu_node *rnp; |
322 | int special; | 322 | int special; |
323 | 323 | ||
324 | /* NMI handlers cannot block and cannot safely manipulate state. */ | 324 | /* NMI handlers cannot block and cannot safely manipulate state. */ |
325 | if (in_nmi()) | 325 | if (in_nmi()) |
326 | return; | 326 | return; |
327 | 327 | ||
328 | local_irq_save(flags); | 328 | local_irq_save(flags); |
329 | 329 | ||
330 | /* | 330 | /* |
331 | * If RCU core is waiting for this CPU to exit critical section, | 331 | * If RCU core is waiting for this CPU to exit critical section, |
332 | * let it know that we have done so. | 332 | * let it know that we have done so. |
333 | */ | 333 | */ |
334 | special = t->rcu_read_unlock_special; | 334 | special = t->rcu_read_unlock_special; |
335 | if (special & RCU_READ_UNLOCK_NEED_QS) { | 335 | if (special & RCU_READ_UNLOCK_NEED_QS) { |
336 | rcu_preempt_qs(smp_processor_id()); | 336 | rcu_preempt_qs(smp_processor_id()); |
337 | } | 337 | } |
338 | 338 | ||
339 | /* Hardware IRQ handlers cannot block. */ | 339 | /* Hardware IRQ handlers cannot block. */ |
340 | if (in_irq() || in_serving_softirq()) { | 340 | if (in_irq() || in_serving_softirq()) { |
341 | local_irq_restore(flags); | 341 | local_irq_restore(flags); |
342 | return; | 342 | return; |
343 | } | 343 | } |
344 | 344 | ||
345 | /* Clean up if blocked during RCU read-side critical section. */ | 345 | /* Clean up if blocked during RCU read-side critical section. */ |
346 | if (special & RCU_READ_UNLOCK_BLOCKED) { | 346 | if (special & RCU_READ_UNLOCK_BLOCKED) { |
347 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; | 347 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; |
348 | 348 | ||
349 | /* | 349 | /* |
350 | * Remove this task from the list it blocked on. The | 350 | * Remove this task from the list it blocked on. The |
351 | * task can migrate while we acquire the lock, but at | 351 | * task can migrate while we acquire the lock, but at |
352 | * most one time. So at most two passes through loop. | 352 | * most one time. So at most two passes through loop. |
353 | */ | 353 | */ |
354 | for (;;) { | 354 | for (;;) { |
355 | rnp = t->rcu_blocked_node; | 355 | rnp = t->rcu_blocked_node; |
356 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 356 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ |
357 | if (rnp == t->rcu_blocked_node) | 357 | if (rnp == t->rcu_blocked_node) |
358 | break; | 358 | break; |
359 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 359 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
360 | } | 360 | } |
361 | empty = !rcu_preempt_blocked_readers_cgp(rnp); | 361 | empty = !rcu_preempt_blocked_readers_cgp(rnp); |
362 | empty_exp = !rcu_preempted_readers_exp(rnp); | 362 | empty_exp = !rcu_preempted_readers_exp(rnp); |
363 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ | 363 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ |
364 | np = rcu_next_node_entry(t, rnp); | 364 | np = rcu_next_node_entry(t, rnp); |
365 | list_del_init(&t->rcu_node_entry); | 365 | list_del_init(&t->rcu_node_entry); |
366 | t->rcu_blocked_node = NULL; | 366 | t->rcu_blocked_node = NULL; |
367 | trace_rcu_unlock_preempted_task("rcu_preempt", | 367 | trace_rcu_unlock_preempted_task("rcu_preempt", |
368 | rnp->gpnum, t->pid); | 368 | rnp->gpnum, t->pid); |
369 | if (&t->rcu_node_entry == rnp->gp_tasks) | 369 | if (&t->rcu_node_entry == rnp->gp_tasks) |
370 | rnp->gp_tasks = np; | 370 | rnp->gp_tasks = np; |
371 | if (&t->rcu_node_entry == rnp->exp_tasks) | 371 | if (&t->rcu_node_entry == rnp->exp_tasks) |
372 | rnp->exp_tasks = np; | 372 | rnp->exp_tasks = np; |
373 | #ifdef CONFIG_RCU_BOOST | 373 | #ifdef CONFIG_RCU_BOOST |
374 | if (&t->rcu_node_entry == rnp->boost_tasks) | 374 | if (&t->rcu_node_entry == rnp->boost_tasks) |
375 | rnp->boost_tasks = np; | 375 | rnp->boost_tasks = np; |
376 | /* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */ | 376 | /* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */ |
377 | if (t->rcu_boost_mutex) { | 377 | if (t->rcu_boost_mutex) { |
378 | rbmp = t->rcu_boost_mutex; | 378 | rbmp = t->rcu_boost_mutex; |
379 | t->rcu_boost_mutex = NULL; | 379 | t->rcu_boost_mutex = NULL; |
380 | } | 380 | } |
381 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 381 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
382 | 382 | ||
383 | /* | 383 | /* |
384 | * If this was the last task on the current list, and if | 384 | * If this was the last task on the current list, and if |
385 | * we aren't waiting on any CPUs, report the quiescent state. | 385 | * we aren't waiting on any CPUs, report the quiescent state. |
386 | * Note that rcu_report_unblock_qs_rnp() releases rnp->lock, | 386 | * Note that rcu_report_unblock_qs_rnp() releases rnp->lock, |
387 | * so we must take a snapshot of the expedited state. | 387 | * so we must take a snapshot of the expedited state. |
388 | */ | 388 | */ |
389 | empty_exp_now = !rcu_preempted_readers_exp(rnp); | 389 | empty_exp_now = !rcu_preempted_readers_exp(rnp); |
390 | if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { | 390 | if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { |
391 | trace_rcu_quiescent_state_report("preempt_rcu", | 391 | trace_rcu_quiescent_state_report("preempt_rcu", |
392 | rnp->gpnum, | 392 | rnp->gpnum, |
393 | 0, rnp->qsmask, | 393 | 0, rnp->qsmask, |
394 | rnp->level, | 394 | rnp->level, |
395 | rnp->grplo, | 395 | rnp->grplo, |
396 | rnp->grphi, | 396 | rnp->grphi, |
397 | !!rnp->gp_tasks); | 397 | !!rnp->gp_tasks); |
398 | rcu_report_unblock_qs_rnp(rnp, flags); | 398 | rcu_report_unblock_qs_rnp(rnp, flags); |
399 | } else | 399 | } else |
400 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 400 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
401 | 401 | ||
402 | #ifdef CONFIG_RCU_BOOST | 402 | #ifdef CONFIG_RCU_BOOST |
403 | /* Unboost if we were boosted. */ | 403 | /* Unboost if we were boosted. */ |
404 | if (rbmp) | 404 | if (rbmp) |
405 | rt_mutex_unlock(rbmp); | 405 | rt_mutex_unlock(rbmp); |
406 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 406 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
407 | 407 | ||
408 | /* | 408 | /* |
409 | * If this was the last task on the expedited lists, | 409 | * If this was the last task on the expedited lists, |
410 | * then we need to report up the rcu_node hierarchy. | 410 | * then we need to report up the rcu_node hierarchy. |
411 | */ | 411 | */ |
412 | if (!empty_exp && empty_exp_now) | 412 | if (!empty_exp && empty_exp_now) |
413 | rcu_report_exp_rnp(&rcu_preempt_state, rnp, true); | 413 | rcu_report_exp_rnp(&rcu_preempt_state, rnp, true); |
414 | } else { | 414 | } else { |
415 | local_irq_restore(flags); | 415 | local_irq_restore(flags); |
416 | } | 416 | } |
417 | } | 417 | } |
418 | 418 | ||
419 | /* | 419 | /* |
420 | * Tree-preemptible RCU implementation for rcu_read_unlock(). | 420 | * Tree-preemptible RCU implementation for rcu_read_unlock(). |
421 | * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost | 421 | * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost |
422 | * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then | 422 | * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then |
423 | * invoke rcu_read_unlock_special() to clean up after a context switch | 423 | * invoke rcu_read_unlock_special() to clean up after a context switch |
424 | * in an RCU read-side critical section and other special cases. | 424 | * in an RCU read-side critical section and other special cases. |
425 | */ | 425 | */ |
426 | void __rcu_read_unlock(void) | 426 | void __rcu_read_unlock(void) |
427 | { | 427 | { |
428 | struct task_struct *t = current; | 428 | struct task_struct *t = current; |
429 | 429 | ||
430 | if (t->rcu_read_lock_nesting != 1) | 430 | if (t->rcu_read_lock_nesting != 1) |
431 | --t->rcu_read_lock_nesting; | 431 | --t->rcu_read_lock_nesting; |
432 | else { | 432 | else { |
433 | barrier(); /* critical section before exit code. */ | 433 | barrier(); /* critical section before exit code. */ |
434 | t->rcu_read_lock_nesting = INT_MIN; | 434 | t->rcu_read_lock_nesting = INT_MIN; |
435 | barrier(); /* assign before ->rcu_read_unlock_special load */ | 435 | barrier(); /* assign before ->rcu_read_unlock_special load */ |
436 | if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) | 436 | if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) |
437 | rcu_read_unlock_special(t); | 437 | rcu_read_unlock_special(t); |
438 | barrier(); /* ->rcu_read_unlock_special load before assign */ | 438 | barrier(); /* ->rcu_read_unlock_special load before assign */ |
439 | t->rcu_read_lock_nesting = 0; | 439 | t->rcu_read_lock_nesting = 0; |
440 | } | 440 | } |
441 | #ifdef CONFIG_PROVE_LOCKING | 441 | #ifdef CONFIG_PROVE_LOCKING |
442 | { | 442 | { |
443 | int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); | 443 | int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); |
444 | 444 | ||
445 | WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); | 445 | WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); |
446 | } | 446 | } |
447 | #endif /* #ifdef CONFIG_PROVE_LOCKING */ | 447 | #endif /* #ifdef CONFIG_PROVE_LOCKING */ |
448 | } | 448 | } |
449 | EXPORT_SYMBOL_GPL(__rcu_read_unlock); | 449 | EXPORT_SYMBOL_GPL(__rcu_read_unlock); |
450 | 450 | ||
451 | #ifdef CONFIG_RCU_CPU_STALL_VERBOSE | 451 | #ifdef CONFIG_RCU_CPU_STALL_VERBOSE |
452 | 452 | ||
453 | /* | 453 | /* |
454 | * Dump detailed information for all tasks blocking the current RCU | 454 | * Dump detailed information for all tasks blocking the current RCU |
455 | * grace period on the specified rcu_node structure. | 455 | * grace period on the specified rcu_node structure. |
456 | */ | 456 | */ |
457 | static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) | 457 | static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) |
458 | { | 458 | { |
459 | unsigned long flags; | 459 | unsigned long flags; |
460 | struct task_struct *t; | 460 | struct task_struct *t; |
461 | 461 | ||
462 | if (!rcu_preempt_blocked_readers_cgp(rnp)) | 462 | if (!rcu_preempt_blocked_readers_cgp(rnp)) |
463 | return; | 463 | return; |
464 | raw_spin_lock_irqsave(&rnp->lock, flags); | 464 | raw_spin_lock_irqsave(&rnp->lock, flags); |
465 | t = list_entry(rnp->gp_tasks, | 465 | t = list_entry(rnp->gp_tasks, |
466 | struct task_struct, rcu_node_entry); | 466 | struct task_struct, rcu_node_entry); |
467 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) | 467 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) |
468 | sched_show_task(t); | 468 | sched_show_task(t); |
469 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 469 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
470 | } | 470 | } |
471 | 471 | ||
472 | /* | 472 | /* |
473 | * Dump detailed information for all tasks blocking the current RCU | 473 | * Dump detailed information for all tasks blocking the current RCU |
474 | * grace period. | 474 | * grace period. |
475 | */ | 475 | */ |
476 | static void rcu_print_detail_task_stall(struct rcu_state *rsp) | 476 | static void rcu_print_detail_task_stall(struct rcu_state *rsp) |
477 | { | 477 | { |
478 | struct rcu_node *rnp = rcu_get_root(rsp); | 478 | struct rcu_node *rnp = rcu_get_root(rsp); |
479 | 479 | ||
480 | rcu_print_detail_task_stall_rnp(rnp); | 480 | rcu_print_detail_task_stall_rnp(rnp); |
481 | rcu_for_each_leaf_node(rsp, rnp) | 481 | rcu_for_each_leaf_node(rsp, rnp) |
482 | rcu_print_detail_task_stall_rnp(rnp); | 482 | rcu_print_detail_task_stall_rnp(rnp); |
483 | } | 483 | } |
484 | 484 | ||
485 | #else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ | 485 | #else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ |
486 | 486 | ||
487 | static void rcu_print_detail_task_stall(struct rcu_state *rsp) | 487 | static void rcu_print_detail_task_stall(struct rcu_state *rsp) |
488 | { | 488 | { |
489 | } | 489 | } |
490 | 490 | ||
491 | #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ | 491 | #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ |
492 | 492 | ||
493 | /* | 493 | /* |
494 | * Scan the current list of tasks blocked within RCU read-side critical | 494 | * Scan the current list of tasks blocked within RCU read-side critical |
495 | * sections, printing out the tid of each. | 495 | * sections, printing out the tid of each. |
496 | */ | 496 | */ |
497 | static int rcu_print_task_stall(struct rcu_node *rnp) | 497 | static int rcu_print_task_stall(struct rcu_node *rnp) |
498 | { | 498 | { |
499 | struct task_struct *t; | 499 | struct task_struct *t; |
500 | int ndetected = 0; | 500 | int ndetected = 0; |
501 | 501 | ||
502 | if (!rcu_preempt_blocked_readers_cgp(rnp)) | 502 | if (!rcu_preempt_blocked_readers_cgp(rnp)) |
503 | return 0; | 503 | return 0; |
504 | t = list_entry(rnp->gp_tasks, | 504 | t = list_entry(rnp->gp_tasks, |
505 | struct task_struct, rcu_node_entry); | 505 | struct task_struct, rcu_node_entry); |
506 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { | 506 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { |
507 | printk(" P%d", t->pid); | 507 | printk(" P%d", t->pid); |
508 | ndetected++; | 508 | ndetected++; |
509 | } | 509 | } |
510 | return ndetected; | 510 | return ndetected; |
511 | } | 511 | } |
512 | 512 | ||
513 | /* | 513 | /* |
514 | * Suppress preemptible RCU's CPU stall warnings by pushing the | 514 | * Suppress preemptible RCU's CPU stall warnings by pushing the |
515 | * time of the next stall-warning message comfortably far into the | 515 | * time of the next stall-warning message comfortably far into the |
516 | * future. | 516 | * future. |
517 | */ | 517 | */ |
518 | static void rcu_preempt_stall_reset(void) | 518 | static void rcu_preempt_stall_reset(void) |
519 | { | 519 | { |
520 | rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; | 520 | rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; |
521 | } | 521 | } |
522 | 522 | ||
523 | /* | 523 | /* |
524 | * Check that the list of blocked tasks for the newly completed grace | 524 | * Check that the list of blocked tasks for the newly completed grace |
525 | * period is in fact empty. It is a serious bug to complete a grace | 525 | * period is in fact empty. It is a serious bug to complete a grace |
526 | * period that still has RCU readers blocked! This function must be | 526 | * period that still has RCU readers blocked! This function must be |
527 | * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock | 527 | * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock |
528 | * must be held by the caller. | 528 | * must be held by the caller. |
529 | * | 529 | * |
530 | * Also, if there are blocked tasks on the list, they automatically | 530 | * Also, if there are blocked tasks on the list, they automatically |
531 | * block the newly created grace period, so set up ->gp_tasks accordingly. | 531 | * block the newly created grace period, so set up ->gp_tasks accordingly. |
532 | */ | 532 | */ |
533 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) | 533 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) |
534 | { | 534 | { |
535 | WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); | 535 | WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); |
536 | if (!list_empty(&rnp->blkd_tasks)) | 536 | if (!list_empty(&rnp->blkd_tasks)) |
537 | rnp->gp_tasks = rnp->blkd_tasks.next; | 537 | rnp->gp_tasks = rnp->blkd_tasks.next; |
538 | WARN_ON_ONCE(rnp->qsmask); | 538 | WARN_ON_ONCE(rnp->qsmask); |
539 | } | 539 | } |
540 | 540 | ||
541 | #ifdef CONFIG_HOTPLUG_CPU | 541 | #ifdef CONFIG_HOTPLUG_CPU |
542 | 542 | ||
543 | /* | 543 | /* |
544 | * Handle tasklist migration for case in which all CPUs covered by the | 544 | * Handle tasklist migration for case in which all CPUs covered by the |
545 | * specified rcu_node have gone offline. Move them up to the root | 545 | * specified rcu_node have gone offline. Move them up to the root |
546 | * rcu_node. The reason for not just moving them to the immediate | 546 | * rcu_node. The reason for not just moving them to the immediate |
547 | * parent is to remove the need for rcu_read_unlock_special() to | 547 | * parent is to remove the need for rcu_read_unlock_special() to |
548 | * make more than two attempts to acquire the target rcu_node's lock. | 548 | * make more than two attempts to acquire the target rcu_node's lock. |
549 | * Returns true if there were tasks blocking the current RCU grace | 549 | * Returns true if there were tasks blocking the current RCU grace |
550 | * period. | 550 | * period. |
551 | * | 551 | * |
552 | * Returns 1 if there was previously a task blocking the current grace | 552 | * Returns 1 if there was previously a task blocking the current grace |
553 | * period on the specified rcu_node structure. | 553 | * period on the specified rcu_node structure. |
554 | * | 554 | * |
555 | * The caller must hold rnp->lock with irqs disabled. | 555 | * The caller must hold rnp->lock with irqs disabled. |
556 | */ | 556 | */ |
557 | static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | 557 | static int rcu_preempt_offline_tasks(struct rcu_state *rsp, |
558 | struct rcu_node *rnp, | 558 | struct rcu_node *rnp, |
559 | struct rcu_data *rdp) | 559 | struct rcu_data *rdp) |
560 | { | 560 | { |
561 | struct list_head *lp; | 561 | struct list_head *lp; |
562 | struct list_head *lp_root; | 562 | struct list_head *lp_root; |
563 | int retval = 0; | 563 | int retval = 0; |
564 | struct rcu_node *rnp_root = rcu_get_root(rsp); | 564 | struct rcu_node *rnp_root = rcu_get_root(rsp); |
565 | struct task_struct *t; | 565 | struct task_struct *t; |
566 | 566 | ||
567 | if (rnp == rnp_root) { | 567 | if (rnp == rnp_root) { |
568 | WARN_ONCE(1, "Last CPU thought to be offlined?"); | 568 | WARN_ONCE(1, "Last CPU thought to be offlined?"); |
569 | return 0; /* Shouldn't happen: at least one CPU online. */ | 569 | return 0; /* Shouldn't happen: at least one CPU online. */ |
570 | } | 570 | } |
571 | 571 | ||
572 | /* If we are on an internal node, complain bitterly. */ | 572 | /* If we are on an internal node, complain bitterly. */ |
573 | WARN_ON_ONCE(rnp != rdp->mynode); | 573 | WARN_ON_ONCE(rnp != rdp->mynode); |
574 | 574 | ||
575 | /* | 575 | /* |
576 | * Move tasks up to root rcu_node. Don't try to get fancy for | 576 | * Move tasks up to root rcu_node. Don't try to get fancy for |
577 | * this corner-case operation -- just put this node's tasks | 577 | * this corner-case operation -- just put this node's tasks |
578 | * at the head of the root node's list, and update the root node's | 578 | * at the head of the root node's list, and update the root node's |
579 | * ->gp_tasks and ->exp_tasks pointers to those of this node's, | 579 | * ->gp_tasks and ->exp_tasks pointers to those of this node's, |
580 | * if non-NULL. This might result in waiting for more tasks than | 580 | * if non-NULL. This might result in waiting for more tasks than |
581 | * absolutely necessary, but this is a good performance/complexity | 581 | * absolutely necessary, but this is a good performance/complexity |
582 | * tradeoff. | 582 | * tradeoff. |
583 | */ | 583 | */ |
584 | if (rcu_preempt_blocked_readers_cgp(rnp)) | 584 | if (rcu_preempt_blocked_readers_cgp(rnp)) |
585 | retval |= RCU_OFL_TASKS_NORM_GP; | 585 | retval |= RCU_OFL_TASKS_NORM_GP; |
586 | if (rcu_preempted_readers_exp(rnp)) | 586 | if (rcu_preempted_readers_exp(rnp)) |
587 | retval |= RCU_OFL_TASKS_EXP_GP; | 587 | retval |= RCU_OFL_TASKS_EXP_GP; |
588 | lp = &rnp->blkd_tasks; | 588 | lp = &rnp->blkd_tasks; |
589 | lp_root = &rnp_root->blkd_tasks; | 589 | lp_root = &rnp_root->blkd_tasks; |
590 | while (!list_empty(lp)) { | 590 | while (!list_empty(lp)) { |
591 | t = list_entry(lp->next, typeof(*t), rcu_node_entry); | 591 | t = list_entry(lp->next, typeof(*t), rcu_node_entry); |
592 | raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ | 592 | raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ |
593 | list_del(&t->rcu_node_entry); | 593 | list_del(&t->rcu_node_entry); |
594 | t->rcu_blocked_node = rnp_root; | 594 | t->rcu_blocked_node = rnp_root; |
595 | list_add(&t->rcu_node_entry, lp_root); | 595 | list_add(&t->rcu_node_entry, lp_root); |
596 | if (&t->rcu_node_entry == rnp->gp_tasks) | 596 | if (&t->rcu_node_entry == rnp->gp_tasks) |
597 | rnp_root->gp_tasks = rnp->gp_tasks; | 597 | rnp_root->gp_tasks = rnp->gp_tasks; |
598 | if (&t->rcu_node_entry == rnp->exp_tasks) | 598 | if (&t->rcu_node_entry == rnp->exp_tasks) |
599 | rnp_root->exp_tasks = rnp->exp_tasks; | 599 | rnp_root->exp_tasks = rnp->exp_tasks; |
600 | #ifdef CONFIG_RCU_BOOST | 600 | #ifdef CONFIG_RCU_BOOST |
601 | if (&t->rcu_node_entry == rnp->boost_tasks) | 601 | if (&t->rcu_node_entry == rnp->boost_tasks) |
602 | rnp_root->boost_tasks = rnp->boost_tasks; | 602 | rnp_root->boost_tasks = rnp->boost_tasks; |
603 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 603 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
604 | raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ | 604 | raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ |
605 | } | 605 | } |
606 | 606 | ||
607 | #ifdef CONFIG_RCU_BOOST | 607 | #ifdef CONFIG_RCU_BOOST |
608 | /* In case root is being boosted and leaf is not. */ | 608 | /* In case root is being boosted and leaf is not. */ |
609 | raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ | 609 | raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ |
610 | if (rnp_root->boost_tasks != NULL && | 610 | if (rnp_root->boost_tasks != NULL && |
611 | rnp_root->boost_tasks != rnp_root->gp_tasks) | 611 | rnp_root->boost_tasks != rnp_root->gp_tasks) |
612 | rnp_root->boost_tasks = rnp_root->gp_tasks; | 612 | rnp_root->boost_tasks = rnp_root->gp_tasks; |
613 | raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ | 613 | raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ |
614 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 614 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
615 | 615 | ||
616 | rnp->gp_tasks = NULL; | 616 | rnp->gp_tasks = NULL; |
617 | rnp->exp_tasks = NULL; | 617 | rnp->exp_tasks = NULL; |
618 | return retval; | 618 | return retval; |
619 | } | 619 | } |
620 | 620 | ||
621 | /* | 621 | /* |
622 | * Do CPU-offline processing for preemptible RCU. | 622 | * Do CPU-offline processing for preemptible RCU. |
623 | */ | 623 | */ |
624 | static void rcu_preempt_offline_cpu(int cpu) | 624 | static void rcu_preempt_offline_cpu(int cpu) |
625 | { | 625 | { |
626 | __rcu_offline_cpu(cpu, &rcu_preempt_state); | 626 | __rcu_offline_cpu(cpu, &rcu_preempt_state); |
627 | } | 627 | } |
628 | 628 | ||
629 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 629 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
630 | 630 | ||
631 | /* | 631 | /* |
632 | * Check for a quiescent state from the current CPU. When a task blocks, | 632 | * Check for a quiescent state from the current CPU. When a task blocks, |
633 | * the task is recorded in the corresponding CPU's rcu_node structure, | 633 | * the task is recorded in the corresponding CPU's rcu_node structure, |
634 | * which is checked elsewhere. | 634 | * which is checked elsewhere. |
635 | * | 635 | * |
636 | * Caller must disable hard irqs. | 636 | * Caller must disable hard irqs. |
637 | */ | 637 | */ |
638 | static void rcu_preempt_check_callbacks(int cpu) | 638 | static void rcu_preempt_check_callbacks(int cpu) |
639 | { | 639 | { |
640 | struct task_struct *t = current; | 640 | struct task_struct *t = current; |
641 | 641 | ||
642 | if (t->rcu_read_lock_nesting == 0) { | 642 | if (t->rcu_read_lock_nesting == 0) { |
643 | rcu_preempt_qs(cpu); | 643 | rcu_preempt_qs(cpu); |
644 | return; | 644 | return; |
645 | } | 645 | } |
646 | if (t->rcu_read_lock_nesting > 0 && | 646 | if (t->rcu_read_lock_nesting > 0 && |
647 | per_cpu(rcu_preempt_data, cpu).qs_pending) | 647 | per_cpu(rcu_preempt_data, cpu).qs_pending) |
648 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; | 648 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; |
649 | } | 649 | } |
650 | 650 | ||
651 | /* | 651 | /* |
652 | * Process callbacks for preemptible RCU. | 652 | * Process callbacks for preemptible RCU. |
653 | */ | 653 | */ |
654 | static void rcu_preempt_process_callbacks(void) | 654 | static void rcu_preempt_process_callbacks(void) |
655 | { | 655 | { |
656 | __rcu_process_callbacks(&rcu_preempt_state, | 656 | __rcu_process_callbacks(&rcu_preempt_state, |
657 | &__get_cpu_var(rcu_preempt_data)); | 657 | &__get_cpu_var(rcu_preempt_data)); |
658 | } | 658 | } |
659 | 659 | ||
660 | #ifdef CONFIG_RCU_BOOST | 660 | #ifdef CONFIG_RCU_BOOST |
661 | 661 | ||
662 | static void rcu_preempt_do_callbacks(void) | 662 | static void rcu_preempt_do_callbacks(void) |
663 | { | 663 | { |
664 | rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data)); | 664 | rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data)); |
665 | } | 665 | } |
666 | 666 | ||
667 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 667 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
668 | 668 | ||
669 | /* | 669 | /* |
670 | * Queue a preemptible-RCU callback for invocation after a grace period. | 670 | * Queue a preemptible-RCU callback for invocation after a grace period. |
671 | */ | 671 | */ |
672 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | 672 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) |
673 | { | 673 | { |
674 | __call_rcu(head, func, &rcu_preempt_state); | 674 | __call_rcu(head, func, &rcu_preempt_state); |
675 | } | 675 | } |
676 | EXPORT_SYMBOL_GPL(call_rcu); | 676 | EXPORT_SYMBOL_GPL(call_rcu); |
677 | 677 | ||
678 | /** | 678 | /** |
679 | * synchronize_rcu - wait until a grace period has elapsed. | 679 | * synchronize_rcu - wait until a grace period has elapsed. |
680 | * | 680 | * |
681 | * Control will return to the caller some time after a full grace | 681 | * Control will return to the caller some time after a full grace |
682 | * period has elapsed, in other words after all currently executing RCU | 682 | * period has elapsed, in other words after all currently executing RCU |
683 | * read-side critical sections have completed. Note, however, that | 683 | * read-side critical sections have completed. Note, however, that |
684 | * upon return from synchronize_rcu(), the caller might well be executing | 684 | * upon return from synchronize_rcu(), the caller might well be executing |
685 | * concurrently with new RCU read-side critical sections that began while | 685 | * concurrently with new RCU read-side critical sections that began while |
686 | * synchronize_rcu() was waiting. RCU read-side critical sections are | 686 | * synchronize_rcu() was waiting. RCU read-side critical sections are |
687 | * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. | 687 | * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. |
688 | */ | 688 | */ |
689 | void synchronize_rcu(void) | 689 | void synchronize_rcu(void) |
690 | { | 690 | { |
691 | if (!rcu_scheduler_active) | 691 | if (!rcu_scheduler_active) |
692 | return; | 692 | return; |
693 | wait_rcu_gp(call_rcu); | 693 | wait_rcu_gp(call_rcu); |
694 | } | 694 | } |
695 | EXPORT_SYMBOL_GPL(synchronize_rcu); | 695 | EXPORT_SYMBOL_GPL(synchronize_rcu); |
696 | 696 | ||
697 | static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); | 697 | static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); |
698 | static long sync_rcu_preempt_exp_count; | 698 | static long sync_rcu_preempt_exp_count; |
699 | static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); | 699 | static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); |
700 | 700 | ||
701 | /* | 701 | /* |
702 | * Return non-zero if there are any tasks in RCU read-side critical | 702 | * Return non-zero if there are any tasks in RCU read-side critical |
703 | * sections blocking the current preemptible-RCU expedited grace period. | 703 | * sections blocking the current preemptible-RCU expedited grace period. |
704 | * If there is no preemptible-RCU expedited grace period currently in | 704 | * If there is no preemptible-RCU expedited grace period currently in |
705 | * progress, returns zero unconditionally. | 705 | * progress, returns zero unconditionally. |
706 | */ | 706 | */ |
707 | static int rcu_preempted_readers_exp(struct rcu_node *rnp) | 707 | static int rcu_preempted_readers_exp(struct rcu_node *rnp) |
708 | { | 708 | { |
709 | return rnp->exp_tasks != NULL; | 709 | return rnp->exp_tasks != NULL; |
710 | } | 710 | } |
711 | 711 | ||
712 | /* | 712 | /* |
713 | * return non-zero if there is no RCU expedited grace period in progress | 713 | * return non-zero if there is no RCU expedited grace period in progress |
714 | * for the specified rcu_node structure, in other words, if all CPUs and | 714 | * for the specified rcu_node structure, in other words, if all CPUs and |
715 | * tasks covered by the specified rcu_node structure have done their bit | 715 | * tasks covered by the specified rcu_node structure have done their bit |
716 | * for the current expedited grace period. Works only for preemptible | 716 | * for the current expedited grace period. Works only for preemptible |
717 | * RCU -- other RCU implementation use other means. | 717 | * RCU -- other RCU implementation use other means. |
718 | * | 718 | * |
719 | * Caller must hold sync_rcu_preempt_exp_mutex. | 719 | * Caller must hold sync_rcu_preempt_exp_mutex. |
720 | */ | 720 | */ |
721 | static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) | 721 | static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) |
722 | { | 722 | { |
723 | return !rcu_preempted_readers_exp(rnp) && | 723 | return !rcu_preempted_readers_exp(rnp) && |
724 | ACCESS_ONCE(rnp->expmask) == 0; | 724 | ACCESS_ONCE(rnp->expmask) == 0; |
725 | } | 725 | } |
726 | 726 | ||
727 | /* | 727 | /* |
728 | * Report the exit from RCU read-side critical section for the last task | 728 | * Report the exit from RCU read-side critical section for the last task |
729 | * that queued itself during or before the current expedited preemptible-RCU | 729 | * that queued itself during or before the current expedited preemptible-RCU |
730 | * grace period. This event is reported either to the rcu_node structure on | 730 | * grace period. This event is reported either to the rcu_node structure on |
731 | * which the task was queued or to one of that rcu_node structure's ancestors, | 731 | * which the task was queued or to one of that rcu_node structure's ancestors, |
732 | * recursively up the tree. (Calm down, calm down, we do the recursion | 732 | * recursively up the tree. (Calm down, calm down, we do the recursion |
733 | * iteratively!) | 733 | * iteratively!) |
734 | * | 734 | * |
735 | * Most callers will set the "wake" flag, but the task initiating the | 735 | * Most callers will set the "wake" flag, but the task initiating the |
736 | * expedited grace period need not wake itself. | 736 | * expedited grace period need not wake itself. |
737 | * | 737 | * |
738 | * Caller must hold sync_rcu_preempt_exp_mutex. | 738 | * Caller must hold sync_rcu_preempt_exp_mutex. |
739 | */ | 739 | */ |
740 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, | 740 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, |
741 | bool wake) | 741 | bool wake) |
742 | { | 742 | { |
743 | unsigned long flags; | 743 | unsigned long flags; |
744 | unsigned long mask; | 744 | unsigned long mask; |
745 | 745 | ||
746 | raw_spin_lock_irqsave(&rnp->lock, flags); | 746 | raw_spin_lock_irqsave(&rnp->lock, flags); |
747 | for (;;) { | 747 | for (;;) { |
748 | if (!sync_rcu_preempt_exp_done(rnp)) { | 748 | if (!sync_rcu_preempt_exp_done(rnp)) { |
749 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 749 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
750 | break; | 750 | break; |
751 | } | 751 | } |
752 | if (rnp->parent == NULL) { | 752 | if (rnp->parent == NULL) { |
753 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 753 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
754 | if (wake) | 754 | if (wake) |
755 | wake_up(&sync_rcu_preempt_exp_wq); | 755 | wake_up(&sync_rcu_preempt_exp_wq); |
756 | break; | 756 | break; |
757 | } | 757 | } |
758 | mask = rnp->grpmask; | 758 | mask = rnp->grpmask; |
759 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | 759 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ |
760 | rnp = rnp->parent; | 760 | rnp = rnp->parent; |
761 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ | 761 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ |
762 | rnp->expmask &= ~mask; | 762 | rnp->expmask &= ~mask; |
763 | } | 763 | } |
764 | } | 764 | } |
765 | 765 | ||
766 | /* | 766 | /* |
767 | * Snapshot the tasks blocking the newly started preemptible-RCU expedited | 767 | * Snapshot the tasks blocking the newly started preemptible-RCU expedited |
768 | * grace period for the specified rcu_node structure. If there are no such | 768 | * grace period for the specified rcu_node structure. If there are no such |
769 | * tasks, report it up the rcu_node hierarchy. | 769 | * tasks, report it up the rcu_node hierarchy. |
770 | * | 770 | * |
771 | * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock. | 771 | * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock. |
772 | */ | 772 | */ |
773 | static void | 773 | static void |
774 | sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | 774 | sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) |
775 | { | 775 | { |
776 | unsigned long flags; | 776 | unsigned long flags; |
777 | int must_wait = 0; | 777 | int must_wait = 0; |
778 | 778 | ||
779 | raw_spin_lock_irqsave(&rnp->lock, flags); | 779 | raw_spin_lock_irqsave(&rnp->lock, flags); |
780 | if (list_empty(&rnp->blkd_tasks)) | 780 | if (list_empty(&rnp->blkd_tasks)) |
781 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 781 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
782 | else { | 782 | else { |
783 | rnp->exp_tasks = rnp->blkd_tasks.next; | 783 | rnp->exp_tasks = rnp->blkd_tasks.next; |
784 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ | 784 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ |
785 | must_wait = 1; | 785 | must_wait = 1; |
786 | } | 786 | } |
787 | if (!must_wait) | 787 | if (!must_wait) |
788 | rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */ | 788 | rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */ |
789 | } | 789 | } |
790 | 790 | ||
791 | /* | 791 | /* |
792 | * Wait for an rcu-preempt grace period, but expedite it. The basic idea | 792 | * Wait for an rcu-preempt grace period, but expedite it. The basic idea |
793 | * is to invoke synchronize_sched_expedited() to push all the tasks to | 793 | * is to invoke synchronize_sched_expedited() to push all the tasks to |
794 | * the ->blkd_tasks lists and wait for this list to drain. | 794 | * the ->blkd_tasks lists and wait for this list to drain. |
795 | */ | 795 | */ |
796 | void synchronize_rcu_expedited(void) | 796 | void synchronize_rcu_expedited(void) |
797 | { | 797 | { |
798 | unsigned long flags; | 798 | unsigned long flags; |
799 | struct rcu_node *rnp; | 799 | struct rcu_node *rnp; |
800 | struct rcu_state *rsp = &rcu_preempt_state; | 800 | struct rcu_state *rsp = &rcu_preempt_state; |
801 | long snap; | 801 | long snap; |
802 | int trycount = 0; | 802 | int trycount = 0; |
803 | 803 | ||
804 | smp_mb(); /* Caller's modifications seen first by other CPUs. */ | 804 | smp_mb(); /* Caller's modifications seen first by other CPUs. */ |
805 | snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1; | 805 | snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1; |
806 | smp_mb(); /* Above access cannot bleed into critical section. */ | 806 | smp_mb(); /* Above access cannot bleed into critical section. */ |
807 | 807 | ||
808 | /* | 808 | /* |
809 | * Acquire lock, falling back to synchronize_rcu() if too many | 809 | * Acquire lock, falling back to synchronize_rcu() if too many |
810 | * lock-acquisition failures. Of course, if someone does the | 810 | * lock-acquisition failures. Of course, if someone does the |
811 | * expedited grace period for us, just leave. | 811 | * expedited grace period for us, just leave. |
812 | */ | 812 | */ |
813 | while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { | 813 | while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { |
814 | if (trycount++ < 10) | 814 | if (trycount++ < 10) |
815 | udelay(trycount * num_online_cpus()); | 815 | udelay(trycount * num_online_cpus()); |
816 | else { | 816 | else { |
817 | synchronize_rcu(); | 817 | synchronize_rcu(); |
818 | return; | 818 | return; |
819 | } | 819 | } |
820 | if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) | 820 | if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) |
821 | goto mb_ret; /* Others did our work for us. */ | 821 | goto mb_ret; /* Others did our work for us. */ |
822 | } | 822 | } |
823 | if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) | 823 | if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) |
824 | goto unlock_mb_ret; /* Others did our work for us. */ | 824 | goto unlock_mb_ret; /* Others did our work for us. */ |
825 | 825 | ||
826 | /* force all RCU readers onto ->blkd_tasks lists. */ | 826 | /* force all RCU readers onto ->blkd_tasks lists. */ |
827 | synchronize_sched_expedited(); | 827 | synchronize_sched_expedited(); |
828 | 828 | ||
829 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | 829 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
830 | 830 | ||
831 | /* Initialize ->expmask for all non-leaf rcu_node structures. */ | 831 | /* Initialize ->expmask for all non-leaf rcu_node structures. */ |
832 | rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { | 832 | rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { |
833 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 833 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ |
834 | rnp->expmask = rnp->qsmaskinit; | 834 | rnp->expmask = rnp->qsmaskinit; |
835 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 835 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
836 | } | 836 | } |
837 | 837 | ||
838 | /* Snapshot current state of ->blkd_tasks lists. */ | 838 | /* Snapshot current state of ->blkd_tasks lists. */ |
839 | rcu_for_each_leaf_node(rsp, rnp) | 839 | rcu_for_each_leaf_node(rsp, rnp) |
840 | sync_rcu_preempt_exp_init(rsp, rnp); | 840 | sync_rcu_preempt_exp_init(rsp, rnp); |
841 | if (NUM_RCU_NODES > 1) | 841 | if (NUM_RCU_NODES > 1) |
842 | sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); | 842 | sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); |
843 | 843 | ||
844 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 844 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); |
845 | 845 | ||
846 | /* Wait for snapshotted ->blkd_tasks lists to drain. */ | 846 | /* Wait for snapshotted ->blkd_tasks lists to drain. */ |
847 | rnp = rcu_get_root(rsp); | 847 | rnp = rcu_get_root(rsp); |
848 | wait_event(sync_rcu_preempt_exp_wq, | 848 | wait_event(sync_rcu_preempt_exp_wq, |
849 | sync_rcu_preempt_exp_done(rnp)); | 849 | sync_rcu_preempt_exp_done(rnp)); |
850 | 850 | ||
851 | /* Clean up and exit. */ | 851 | /* Clean up and exit. */ |
852 | smp_mb(); /* ensure expedited GP seen before counter increment. */ | 852 | smp_mb(); /* ensure expedited GP seen before counter increment. */ |
853 | ACCESS_ONCE(sync_rcu_preempt_exp_count)++; | 853 | ACCESS_ONCE(sync_rcu_preempt_exp_count)++; |
854 | unlock_mb_ret: | 854 | unlock_mb_ret: |
855 | mutex_unlock(&sync_rcu_preempt_exp_mutex); | 855 | mutex_unlock(&sync_rcu_preempt_exp_mutex); |
856 | mb_ret: | 856 | mb_ret: |
857 | smp_mb(); /* ensure subsequent action seen after grace period. */ | 857 | smp_mb(); /* ensure subsequent action seen after grace period. */ |
858 | } | 858 | } |
859 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | 859 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); |
860 | 860 | ||
861 | /* | 861 | /* |
862 | * Check to see if there is any immediate preemptible-RCU-related work | 862 | * Check to see if there is any immediate preemptible-RCU-related work |
863 | * to be done. | 863 | * to be done. |
864 | */ | 864 | */ |
865 | static int rcu_preempt_pending(int cpu) | 865 | static int rcu_preempt_pending(int cpu) |
866 | { | 866 | { |
867 | return __rcu_pending(&rcu_preempt_state, | 867 | return __rcu_pending(&rcu_preempt_state, |
868 | &per_cpu(rcu_preempt_data, cpu)); | 868 | &per_cpu(rcu_preempt_data, cpu)); |
869 | } | 869 | } |
870 | 870 | ||
871 | /* | 871 | /* |
872 | * Does preemptible RCU need the CPU to stay out of dynticks mode? | 872 | * Does preemptible RCU need the CPU to stay out of dynticks mode? |
873 | */ | 873 | */ |
874 | static int rcu_preempt_needs_cpu(int cpu) | 874 | static int rcu_preempt_needs_cpu(int cpu) |
875 | { | 875 | { |
876 | return !!per_cpu(rcu_preempt_data, cpu).nxtlist; | 876 | return !!per_cpu(rcu_preempt_data, cpu).nxtlist; |
877 | } | 877 | } |
878 | 878 | ||
879 | /** | 879 | /** |
880 | * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. | 880 | * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. |
881 | */ | 881 | */ |
882 | void rcu_barrier(void) | 882 | void rcu_barrier(void) |
883 | { | 883 | { |
884 | _rcu_barrier(&rcu_preempt_state, call_rcu); | 884 | _rcu_barrier(&rcu_preempt_state, call_rcu); |
885 | } | 885 | } |
886 | EXPORT_SYMBOL_GPL(rcu_barrier); | 886 | EXPORT_SYMBOL_GPL(rcu_barrier); |
887 | 887 | ||
888 | /* | 888 | /* |
889 | * Initialize preemptible RCU's per-CPU data. | 889 | * Initialize preemptible RCU's per-CPU data. |
890 | */ | 890 | */ |
891 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu) | 891 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu) |
892 | { | 892 | { |
893 | rcu_init_percpu_data(cpu, &rcu_preempt_state, 1); | 893 | rcu_init_percpu_data(cpu, &rcu_preempt_state, 1); |
894 | } | 894 | } |
895 | 895 | ||
896 | /* | 896 | /* |
897 | * Move preemptible RCU's callbacks from dying CPU to other online CPU. | 897 | * Move preemptible RCU's callbacks from dying CPU to other online CPU. |
898 | */ | 898 | */ |
899 | static void rcu_preempt_send_cbs_to_online(void) | 899 | static void rcu_preempt_send_cbs_to_online(void) |
900 | { | 900 | { |
901 | rcu_send_cbs_to_online(&rcu_preempt_state); | 901 | rcu_send_cbs_to_online(&rcu_preempt_state); |
902 | } | 902 | } |
903 | 903 | ||
904 | /* | 904 | /* |
905 | * Initialize preemptible RCU's state structures. | 905 | * Initialize preemptible RCU's state structures. |
906 | */ | 906 | */ |
907 | static void __init __rcu_init_preempt(void) | 907 | static void __init __rcu_init_preempt(void) |
908 | { | 908 | { |
909 | rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); | 909 | rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); |
910 | } | 910 | } |
911 | 911 | ||
912 | /* | 912 | /* |
913 | * Check for a task exiting while in a preemptible-RCU read-side | 913 | * Check for a task exiting while in a preemptible-RCU read-side |
914 | * critical section, clean up if so. No need to issue warnings, | 914 | * critical section, clean up if so. No need to issue warnings, |
915 | * as debug_check_no_locks_held() already does this if lockdep | 915 | * as debug_check_no_locks_held() already does this if lockdep |
916 | * is enabled. | 916 | * is enabled. |
917 | */ | 917 | */ |
918 | void exit_rcu(void) | 918 | void exit_rcu(void) |
919 | { | 919 | { |
920 | struct task_struct *t = current; | 920 | struct task_struct *t = current; |
921 | 921 | ||
922 | if (t->rcu_read_lock_nesting == 0) | 922 | if (t->rcu_read_lock_nesting == 0) |
923 | return; | 923 | return; |
924 | t->rcu_read_lock_nesting = 1; | 924 | t->rcu_read_lock_nesting = 1; |
925 | __rcu_read_unlock(); | 925 | __rcu_read_unlock(); |
926 | } | 926 | } |
927 | 927 | ||
928 | #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 928 | #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
929 | 929 | ||
930 | static struct rcu_state *rcu_state = &rcu_sched_state; | 930 | static struct rcu_state *rcu_state = &rcu_sched_state; |
931 | 931 | ||
932 | /* | 932 | /* |
933 | * Tell them what RCU they are running. | 933 | * Tell them what RCU they are running. |
934 | */ | 934 | */ |
935 | static void __init rcu_bootup_announce(void) | 935 | static void __init rcu_bootup_announce(void) |
936 | { | 936 | { |
937 | printk(KERN_INFO "Hierarchical RCU implementation.\n"); | 937 | printk(KERN_INFO "Hierarchical RCU implementation.\n"); |
938 | rcu_bootup_announce_oddness(); | 938 | rcu_bootup_announce_oddness(); |
939 | } | 939 | } |
940 | 940 | ||
941 | /* | 941 | /* |
942 | * Return the number of RCU batches processed thus far for debug & stats. | 942 | * Return the number of RCU batches processed thus far for debug & stats. |
943 | */ | 943 | */ |
944 | long rcu_batches_completed(void) | 944 | long rcu_batches_completed(void) |
945 | { | 945 | { |
946 | return rcu_batches_completed_sched(); | 946 | return rcu_batches_completed_sched(); |
947 | } | 947 | } |
948 | EXPORT_SYMBOL_GPL(rcu_batches_completed); | 948 | EXPORT_SYMBOL_GPL(rcu_batches_completed); |
949 | 949 | ||
950 | /* | 950 | /* |
951 | * Force a quiescent state for RCU, which, because there is no preemptible | 951 | * Force a quiescent state for RCU, which, because there is no preemptible |
952 | * RCU, becomes the same as rcu-sched. | 952 | * RCU, becomes the same as rcu-sched. |
953 | */ | 953 | */ |
954 | void rcu_force_quiescent_state(void) | 954 | void rcu_force_quiescent_state(void) |
955 | { | 955 | { |
956 | rcu_sched_force_quiescent_state(); | 956 | rcu_sched_force_quiescent_state(); |
957 | } | 957 | } |
958 | EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); | 958 | EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); |
959 | 959 | ||
960 | /* | 960 | /* |
961 | * Because preemptible RCU does not exist, we never have to check for | 961 | * Because preemptible RCU does not exist, we never have to check for |
962 | * CPUs being in quiescent states. | 962 | * CPUs being in quiescent states. |
963 | */ | 963 | */ |
964 | static void rcu_preempt_note_context_switch(int cpu) | 964 | static void rcu_preempt_note_context_switch(int cpu) |
965 | { | 965 | { |
966 | } | 966 | } |
967 | 967 | ||
968 | /* | 968 | /* |
969 | * Because preemptible RCU does not exist, there are never any preempted | 969 | * Because preemptible RCU does not exist, there are never any preempted |
970 | * RCU readers. | 970 | * RCU readers. |
971 | */ | 971 | */ |
972 | static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) | 972 | static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) |
973 | { | 973 | { |
974 | return 0; | 974 | return 0; |
975 | } | 975 | } |
976 | 976 | ||
977 | #ifdef CONFIG_HOTPLUG_CPU | 977 | #ifdef CONFIG_HOTPLUG_CPU |
978 | 978 | ||
979 | /* Because preemptible RCU does not exist, no quieting of tasks. */ | 979 | /* Because preemptible RCU does not exist, no quieting of tasks. */ |
980 | static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) | 980 | static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) |
981 | { | 981 | { |
982 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 982 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
983 | } | 983 | } |
984 | 984 | ||
985 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 985 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
986 | 986 | ||
987 | /* | 987 | /* |
988 | * Because preemptible RCU does not exist, we never have to check for | 988 | * Because preemptible RCU does not exist, we never have to check for |
989 | * tasks blocked within RCU read-side critical sections. | 989 | * tasks blocked within RCU read-side critical sections. |
990 | */ | 990 | */ |
991 | static void rcu_print_detail_task_stall(struct rcu_state *rsp) | 991 | static void rcu_print_detail_task_stall(struct rcu_state *rsp) |
992 | { | 992 | { |
993 | } | 993 | } |
994 | 994 | ||
995 | /* | 995 | /* |
996 | * Because preemptible RCU does not exist, we never have to check for | 996 | * Because preemptible RCU does not exist, we never have to check for |
997 | * tasks blocked within RCU read-side critical sections. | 997 | * tasks blocked within RCU read-side critical sections. |
998 | */ | 998 | */ |
999 | static int rcu_print_task_stall(struct rcu_node *rnp) | 999 | static int rcu_print_task_stall(struct rcu_node *rnp) |
1000 | { | 1000 | { |
1001 | return 0; | 1001 | return 0; |
1002 | } | 1002 | } |
1003 | 1003 | ||
1004 | /* | 1004 | /* |
1005 | * Because preemptible RCU does not exist, there is no need to suppress | 1005 | * Because preemptible RCU does not exist, there is no need to suppress |
1006 | * its CPU stall warnings. | 1006 | * its CPU stall warnings. |
1007 | */ | 1007 | */ |
1008 | static void rcu_preempt_stall_reset(void) | 1008 | static void rcu_preempt_stall_reset(void) |
1009 | { | 1009 | { |
1010 | } | 1010 | } |
1011 | 1011 | ||
1012 | /* | 1012 | /* |
1013 | * Because there is no preemptible RCU, there can be no readers blocked, | 1013 | * Because there is no preemptible RCU, there can be no readers blocked, |
1014 | * so there is no need to check for blocked tasks. So check only for | 1014 | * so there is no need to check for blocked tasks. So check only for |
1015 | * bogus qsmask values. | 1015 | * bogus qsmask values. |
1016 | */ | 1016 | */ |
1017 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) | 1017 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) |
1018 | { | 1018 | { |
1019 | WARN_ON_ONCE(rnp->qsmask); | 1019 | WARN_ON_ONCE(rnp->qsmask); |
1020 | } | 1020 | } |
1021 | 1021 | ||
1022 | #ifdef CONFIG_HOTPLUG_CPU | 1022 | #ifdef CONFIG_HOTPLUG_CPU |
1023 | 1023 | ||
1024 | /* | 1024 | /* |
1025 | * Because preemptible RCU does not exist, it never needs to migrate | 1025 | * Because preemptible RCU does not exist, it never needs to migrate |
1026 | * tasks that were blocked within RCU read-side critical sections, and | 1026 | * tasks that were blocked within RCU read-side critical sections, and |
1027 | * such non-existent tasks cannot possibly have been blocking the current | 1027 | * such non-existent tasks cannot possibly have been blocking the current |
1028 | * grace period. | 1028 | * grace period. |
1029 | */ | 1029 | */ |
1030 | static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | 1030 | static int rcu_preempt_offline_tasks(struct rcu_state *rsp, |
1031 | struct rcu_node *rnp, | 1031 | struct rcu_node *rnp, |
1032 | struct rcu_data *rdp) | 1032 | struct rcu_data *rdp) |
1033 | { | 1033 | { |
1034 | return 0; | 1034 | return 0; |
1035 | } | 1035 | } |
1036 | 1036 | ||
1037 | /* | 1037 | /* |
1038 | * Because preemptible RCU does not exist, it never needs CPU-offline | 1038 | * Because preemptible RCU does not exist, it never needs CPU-offline |
1039 | * processing. | 1039 | * processing. |
1040 | */ | 1040 | */ |
1041 | static void rcu_preempt_offline_cpu(int cpu) | 1041 | static void rcu_preempt_offline_cpu(int cpu) |
1042 | { | 1042 | { |
1043 | } | 1043 | } |
1044 | 1044 | ||
1045 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 1045 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
1046 | 1046 | ||
1047 | /* | 1047 | /* |
1048 | * Because preemptible RCU does not exist, it never has any callbacks | 1048 | * Because preemptible RCU does not exist, it never has any callbacks |
1049 | * to check. | 1049 | * to check. |
1050 | */ | 1050 | */ |
1051 | static void rcu_preempt_check_callbacks(int cpu) | 1051 | static void rcu_preempt_check_callbacks(int cpu) |
1052 | { | 1052 | { |
1053 | } | 1053 | } |
1054 | 1054 | ||
1055 | /* | 1055 | /* |
1056 | * Because preemptible RCU does not exist, it never has any callbacks | 1056 | * Because preemptible RCU does not exist, it never has any callbacks |
1057 | * to process. | 1057 | * to process. |
1058 | */ | 1058 | */ |
1059 | static void rcu_preempt_process_callbacks(void) | 1059 | static void rcu_preempt_process_callbacks(void) |
1060 | { | 1060 | { |
1061 | } | 1061 | } |
1062 | 1062 | ||
1063 | /* | 1063 | /* |
1064 | * Wait for an rcu-preempt grace period, but make it happen quickly. | 1064 | * Wait for an rcu-preempt grace period, but make it happen quickly. |
1065 | * But because preemptible RCU does not exist, map to rcu-sched. | 1065 | * But because preemptible RCU does not exist, map to rcu-sched. |
1066 | */ | 1066 | */ |
1067 | void synchronize_rcu_expedited(void) | 1067 | void synchronize_rcu_expedited(void) |
1068 | { | 1068 | { |
1069 | synchronize_sched_expedited(); | 1069 | synchronize_sched_expedited(); |
1070 | } | 1070 | } |
1071 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | 1071 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); |
1072 | 1072 | ||
1073 | #ifdef CONFIG_HOTPLUG_CPU | 1073 | #ifdef CONFIG_HOTPLUG_CPU |
1074 | 1074 | ||
1075 | /* | 1075 | /* |
1076 | * Because preemptible RCU does not exist, there is never any need to | 1076 | * Because preemptible RCU does not exist, there is never any need to |
1077 | * report on tasks preempted in RCU read-side critical sections during | 1077 | * report on tasks preempted in RCU read-side critical sections during |
1078 | * expedited RCU grace periods. | 1078 | * expedited RCU grace periods. |
1079 | */ | 1079 | */ |
1080 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, | 1080 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, |
1081 | bool wake) | 1081 | bool wake) |
1082 | { | 1082 | { |
1083 | } | 1083 | } |
1084 | 1084 | ||
1085 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 1085 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
1086 | 1086 | ||
1087 | /* | 1087 | /* |
1088 | * Because preemptible RCU does not exist, it never has any work to do. | 1088 | * Because preemptible RCU does not exist, it never has any work to do. |
1089 | */ | 1089 | */ |
1090 | static int rcu_preempt_pending(int cpu) | 1090 | static int rcu_preempt_pending(int cpu) |
1091 | { | 1091 | { |
1092 | return 0; | 1092 | return 0; |
1093 | } | 1093 | } |
1094 | 1094 | ||
1095 | /* | 1095 | /* |
1096 | * Because preemptible RCU does not exist, it never needs any CPU. | 1096 | * Because preemptible RCU does not exist, it never needs any CPU. |
1097 | */ | 1097 | */ |
1098 | static int rcu_preempt_needs_cpu(int cpu) | 1098 | static int rcu_preempt_needs_cpu(int cpu) |
1099 | { | 1099 | { |
1100 | return 0; | 1100 | return 0; |
1101 | } | 1101 | } |
1102 | 1102 | ||
1103 | /* | 1103 | /* |
1104 | * Because preemptible RCU does not exist, rcu_barrier() is just | 1104 | * Because preemptible RCU does not exist, rcu_barrier() is just |
1105 | * another name for rcu_barrier_sched(). | 1105 | * another name for rcu_barrier_sched(). |
1106 | */ | 1106 | */ |
1107 | void rcu_barrier(void) | 1107 | void rcu_barrier(void) |
1108 | { | 1108 | { |
1109 | rcu_barrier_sched(); | 1109 | rcu_barrier_sched(); |
1110 | } | 1110 | } |
1111 | EXPORT_SYMBOL_GPL(rcu_barrier); | 1111 | EXPORT_SYMBOL_GPL(rcu_barrier); |
1112 | 1112 | ||
1113 | /* | 1113 | /* |
1114 | * Because preemptible RCU does not exist, there is no per-CPU | 1114 | * Because preemptible RCU does not exist, there is no per-CPU |
1115 | * data to initialize. | 1115 | * data to initialize. |
1116 | */ | 1116 | */ |
1117 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu) | 1117 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu) |
1118 | { | 1118 | { |
1119 | } | 1119 | } |
1120 | 1120 | ||
1121 | /* | 1121 | /* |
1122 | * Because there is no preemptible RCU, there are no callbacks to move. | 1122 | * Because there is no preemptible RCU, there are no callbacks to move. |
1123 | */ | 1123 | */ |
1124 | static void rcu_preempt_send_cbs_to_online(void) | 1124 | static void rcu_preempt_send_cbs_to_online(void) |
1125 | { | 1125 | { |
1126 | } | 1126 | } |
1127 | 1127 | ||
1128 | /* | 1128 | /* |
1129 | * Because preemptible RCU does not exist, it need not be initialized. | 1129 | * Because preemptible RCU does not exist, it need not be initialized. |
1130 | */ | 1130 | */ |
1131 | static void __init __rcu_init_preempt(void) | 1131 | static void __init __rcu_init_preempt(void) |
1132 | { | 1132 | { |
1133 | } | 1133 | } |
1134 | 1134 | ||
1135 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ | 1135 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ |
1136 | 1136 | ||
1137 | #ifdef CONFIG_RCU_BOOST | 1137 | #ifdef CONFIG_RCU_BOOST |
1138 | 1138 | ||
1139 | #include "rtmutex_common.h" | 1139 | #include "rtmutex_common.h" |
1140 | 1140 | ||
1141 | #ifdef CONFIG_RCU_TRACE | 1141 | #ifdef CONFIG_RCU_TRACE |
1142 | 1142 | ||
1143 | static void rcu_initiate_boost_trace(struct rcu_node *rnp) | 1143 | static void rcu_initiate_boost_trace(struct rcu_node *rnp) |
1144 | { | 1144 | { |
1145 | if (list_empty(&rnp->blkd_tasks)) | 1145 | if (list_empty(&rnp->blkd_tasks)) |
1146 | rnp->n_balk_blkd_tasks++; | 1146 | rnp->n_balk_blkd_tasks++; |
1147 | else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL) | 1147 | else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL) |
1148 | rnp->n_balk_exp_gp_tasks++; | 1148 | rnp->n_balk_exp_gp_tasks++; |
1149 | else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL) | 1149 | else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL) |
1150 | rnp->n_balk_boost_tasks++; | 1150 | rnp->n_balk_boost_tasks++; |
1151 | else if (rnp->gp_tasks != NULL && rnp->qsmask != 0) | 1151 | else if (rnp->gp_tasks != NULL && rnp->qsmask != 0) |
1152 | rnp->n_balk_notblocked++; | 1152 | rnp->n_balk_notblocked++; |
1153 | else if (rnp->gp_tasks != NULL && | 1153 | else if (rnp->gp_tasks != NULL && |
1154 | ULONG_CMP_LT(jiffies, rnp->boost_time)) | 1154 | ULONG_CMP_LT(jiffies, rnp->boost_time)) |
1155 | rnp->n_balk_notyet++; | 1155 | rnp->n_balk_notyet++; |
1156 | else | 1156 | else |
1157 | rnp->n_balk_nos++; | 1157 | rnp->n_balk_nos++; |
1158 | } | 1158 | } |
1159 | 1159 | ||
1160 | #else /* #ifdef CONFIG_RCU_TRACE */ | 1160 | #else /* #ifdef CONFIG_RCU_TRACE */ |
1161 | 1161 | ||
1162 | static void rcu_initiate_boost_trace(struct rcu_node *rnp) | 1162 | static void rcu_initiate_boost_trace(struct rcu_node *rnp) |
1163 | { | 1163 | { |
1164 | } | 1164 | } |
1165 | 1165 | ||
1166 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | 1166 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ |
1167 | 1167 | ||
1168 | static struct lock_class_key rcu_boost_class; | 1168 | static struct lock_class_key rcu_boost_class; |
1169 | 1169 | ||
1170 | /* | 1170 | /* |
1171 | * Carry out RCU priority boosting on the task indicated by ->exp_tasks | 1171 | * Carry out RCU priority boosting on the task indicated by ->exp_tasks |
1172 | * or ->boost_tasks, advancing the pointer to the next task in the | 1172 | * or ->boost_tasks, advancing the pointer to the next task in the |
1173 | * ->blkd_tasks list. | 1173 | * ->blkd_tasks list. |
1174 | * | 1174 | * |
1175 | * Note that irqs must be enabled: boosting the task can block. | 1175 | * Note that irqs must be enabled: boosting the task can block. |
1176 | * Returns 1 if there are more tasks needing to be boosted. | 1176 | * Returns 1 if there are more tasks needing to be boosted. |
1177 | */ | 1177 | */ |
1178 | static int rcu_boost(struct rcu_node *rnp) | 1178 | static int rcu_boost(struct rcu_node *rnp) |
1179 | { | 1179 | { |
1180 | unsigned long flags; | 1180 | unsigned long flags; |
1181 | struct rt_mutex mtx; | 1181 | struct rt_mutex mtx; |
1182 | struct task_struct *t; | 1182 | struct task_struct *t; |
1183 | struct list_head *tb; | 1183 | struct list_head *tb; |
1184 | 1184 | ||
1185 | if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) | 1185 | if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) |
1186 | return 0; /* Nothing left to boost. */ | 1186 | return 0; /* Nothing left to boost. */ |
1187 | 1187 | ||
1188 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1188 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1189 | 1189 | ||
1190 | /* | 1190 | /* |
1191 | * Recheck under the lock: all tasks in need of boosting | 1191 | * Recheck under the lock: all tasks in need of boosting |
1192 | * might exit their RCU read-side critical sections on their own. | 1192 | * might exit their RCU read-side critical sections on their own. |
1193 | */ | 1193 | */ |
1194 | if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) { | 1194 | if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) { |
1195 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1195 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1196 | return 0; | 1196 | return 0; |
1197 | } | 1197 | } |
1198 | 1198 | ||
1199 | /* | 1199 | /* |
1200 | * Preferentially boost tasks blocking expedited grace periods. | 1200 | * Preferentially boost tasks blocking expedited grace periods. |
1201 | * This cannot starve the normal grace periods because a second | 1201 | * This cannot starve the normal grace periods because a second |
1202 | * expedited grace period must boost all blocked tasks, including | 1202 | * expedited grace period must boost all blocked tasks, including |
1203 | * those blocking the pre-existing normal grace period. | 1203 | * those blocking the pre-existing normal grace period. |
1204 | */ | 1204 | */ |
1205 | if (rnp->exp_tasks != NULL) { | 1205 | if (rnp->exp_tasks != NULL) { |
1206 | tb = rnp->exp_tasks; | 1206 | tb = rnp->exp_tasks; |
1207 | rnp->n_exp_boosts++; | 1207 | rnp->n_exp_boosts++; |
1208 | } else { | 1208 | } else { |
1209 | tb = rnp->boost_tasks; | 1209 | tb = rnp->boost_tasks; |
1210 | rnp->n_normal_boosts++; | 1210 | rnp->n_normal_boosts++; |
1211 | } | 1211 | } |
1212 | rnp->n_tasks_boosted++; | 1212 | rnp->n_tasks_boosted++; |
1213 | 1213 | ||
1214 | /* | 1214 | /* |
1215 | * We boost task t by manufacturing an rt_mutex that appears to | 1215 | * We boost task t by manufacturing an rt_mutex that appears to |
1216 | * be held by task t. We leave a pointer to that rt_mutex where | 1216 | * be held by task t. We leave a pointer to that rt_mutex where |
1217 | * task t can find it, and task t will release the mutex when it | 1217 | * task t can find it, and task t will release the mutex when it |
1218 | * exits its outermost RCU read-side critical section. Then | 1218 | * exits its outermost RCU read-side critical section. Then |
1219 | * simply acquiring this artificial rt_mutex will boost task | 1219 | * simply acquiring this artificial rt_mutex will boost task |
1220 | * t's priority. (Thanks to tglx for suggesting this approach!) | 1220 | * t's priority. (Thanks to tglx for suggesting this approach!) |
1221 | * | 1221 | * |
1222 | * Note that task t must acquire rnp->lock to remove itself from | 1222 | * Note that task t must acquire rnp->lock to remove itself from |
1223 | * the ->blkd_tasks list, which it will do from exit() if from | 1223 | * the ->blkd_tasks list, which it will do from exit() if from |
1224 | * nowhere else. We therefore are guaranteed that task t will | 1224 | * nowhere else. We therefore are guaranteed that task t will |
1225 | * stay around at least until we drop rnp->lock. Note that | 1225 | * stay around at least until we drop rnp->lock. Note that |
1226 | * rnp->lock also resolves races between our priority boosting | 1226 | * rnp->lock also resolves races between our priority boosting |
1227 | * and task t's exiting its outermost RCU read-side critical | 1227 | * and task t's exiting its outermost RCU read-side critical |
1228 | * section. | 1228 | * section. |
1229 | */ | 1229 | */ |
1230 | t = container_of(tb, struct task_struct, rcu_node_entry); | 1230 | t = container_of(tb, struct task_struct, rcu_node_entry); |
1231 | rt_mutex_init_proxy_locked(&mtx, t); | 1231 | rt_mutex_init_proxy_locked(&mtx, t); |
1232 | /* Avoid lockdep false positives. This rt_mutex is its own thing. */ | 1232 | /* Avoid lockdep false positives. This rt_mutex is its own thing. */ |
1233 | lockdep_set_class_and_name(&mtx.wait_lock, &rcu_boost_class, | 1233 | lockdep_set_class_and_name(&mtx.wait_lock, &rcu_boost_class, |
1234 | "rcu_boost_mutex"); | 1234 | "rcu_boost_mutex"); |
1235 | t->rcu_boost_mutex = &mtx; | 1235 | t->rcu_boost_mutex = &mtx; |
1236 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1236 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1237 | rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ | 1237 | rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ |
1238 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ | 1238 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ |
1239 | 1239 | ||
1240 | return rnp->exp_tasks != NULL || rnp->boost_tasks != NULL; | 1240 | return rnp->exp_tasks != NULL || rnp->boost_tasks != NULL; |
1241 | } | 1241 | } |
1242 | 1242 | ||
1243 | /* | 1243 | /* |
1244 | * Timer handler to initiate waking up of boost kthreads that | 1244 | * Timer handler to initiate waking up of boost kthreads that |
1245 | * have yielded the CPU due to excessive numbers of tasks to | 1245 | * have yielded the CPU due to excessive numbers of tasks to |
1246 | * boost. We wake up the per-rcu_node kthread, which in turn | 1246 | * boost. We wake up the per-rcu_node kthread, which in turn |
1247 | * will wake up the booster kthread. | 1247 | * will wake up the booster kthread. |
1248 | */ | 1248 | */ |
1249 | static void rcu_boost_kthread_timer(unsigned long arg) | 1249 | static void rcu_boost_kthread_timer(unsigned long arg) |
1250 | { | 1250 | { |
1251 | invoke_rcu_node_kthread((struct rcu_node *)arg); | 1251 | invoke_rcu_node_kthread((struct rcu_node *)arg); |
1252 | } | 1252 | } |
1253 | 1253 | ||
1254 | /* | 1254 | /* |
1255 | * Priority-boosting kthread. One per leaf rcu_node and one for the | 1255 | * Priority-boosting kthread. One per leaf rcu_node and one for the |
1256 | * root rcu_node. | 1256 | * root rcu_node. |
1257 | */ | 1257 | */ |
1258 | static int rcu_boost_kthread(void *arg) | 1258 | static int rcu_boost_kthread(void *arg) |
1259 | { | 1259 | { |
1260 | struct rcu_node *rnp = (struct rcu_node *)arg; | 1260 | struct rcu_node *rnp = (struct rcu_node *)arg; |
1261 | int spincnt = 0; | 1261 | int spincnt = 0; |
1262 | int more2boost; | 1262 | int more2boost; |
1263 | 1263 | ||
1264 | trace_rcu_utilization("Start boost kthread@init"); | 1264 | trace_rcu_utilization("Start boost kthread@init"); |
1265 | for (;;) { | 1265 | for (;;) { |
1266 | rnp->boost_kthread_status = RCU_KTHREAD_WAITING; | 1266 | rnp->boost_kthread_status = RCU_KTHREAD_WAITING; |
1267 | trace_rcu_utilization("End boost kthread@rcu_wait"); | 1267 | trace_rcu_utilization("End boost kthread@rcu_wait"); |
1268 | rcu_wait(rnp->boost_tasks || rnp->exp_tasks); | 1268 | rcu_wait(rnp->boost_tasks || rnp->exp_tasks); |
1269 | trace_rcu_utilization("Start boost kthread@rcu_wait"); | 1269 | trace_rcu_utilization("Start boost kthread@rcu_wait"); |
1270 | rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; | 1270 | rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; |
1271 | more2boost = rcu_boost(rnp); | 1271 | more2boost = rcu_boost(rnp); |
1272 | if (more2boost) | 1272 | if (more2boost) |
1273 | spincnt++; | 1273 | spincnt++; |
1274 | else | 1274 | else |
1275 | spincnt = 0; | 1275 | spincnt = 0; |
1276 | if (spincnt > 10) { | 1276 | if (spincnt > 10) { |
1277 | trace_rcu_utilization("End boost kthread@rcu_yield"); | 1277 | trace_rcu_utilization("End boost kthread@rcu_yield"); |
1278 | rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp); | 1278 | rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp); |
1279 | trace_rcu_utilization("Start boost kthread@rcu_yield"); | 1279 | trace_rcu_utilization("Start boost kthread@rcu_yield"); |
1280 | spincnt = 0; | 1280 | spincnt = 0; |
1281 | } | 1281 | } |
1282 | } | 1282 | } |
1283 | /* NOTREACHED */ | 1283 | /* NOTREACHED */ |
1284 | trace_rcu_utilization("End boost kthread@notreached"); | 1284 | trace_rcu_utilization("End boost kthread@notreached"); |
1285 | return 0; | 1285 | return 0; |
1286 | } | 1286 | } |
1287 | 1287 | ||
1288 | /* | 1288 | /* |
1289 | * Check to see if it is time to start boosting RCU readers that are | 1289 | * Check to see if it is time to start boosting RCU readers that are |
1290 | * blocking the current grace period, and, if so, tell the per-rcu_node | 1290 | * blocking the current grace period, and, if so, tell the per-rcu_node |
1291 | * kthread to start boosting them. If there is an expedited grace | 1291 | * kthread to start boosting them. If there is an expedited grace |
1292 | * period in progress, it is always time to boost. | 1292 | * period in progress, it is always time to boost. |
1293 | * | 1293 | * |
1294 | * The caller must hold rnp->lock, which this function releases, | 1294 | * The caller must hold rnp->lock, which this function releases, |
1295 | * but irqs remain disabled. The ->boost_kthread_task is immortal, | 1295 | * but irqs remain disabled. The ->boost_kthread_task is immortal, |
1296 | * so we don't need to worry about it going away. | 1296 | * so we don't need to worry about it going away. |
1297 | */ | 1297 | */ |
1298 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | 1298 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) |
1299 | { | 1299 | { |
1300 | struct task_struct *t; | 1300 | struct task_struct *t; |
1301 | 1301 | ||
1302 | if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) { | 1302 | if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) { |
1303 | rnp->n_balk_exp_gp_tasks++; | 1303 | rnp->n_balk_exp_gp_tasks++; |
1304 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1304 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1305 | return; | 1305 | return; |
1306 | } | 1306 | } |
1307 | if (rnp->exp_tasks != NULL || | 1307 | if (rnp->exp_tasks != NULL || |
1308 | (rnp->gp_tasks != NULL && | 1308 | (rnp->gp_tasks != NULL && |
1309 | rnp->boost_tasks == NULL && | 1309 | rnp->boost_tasks == NULL && |
1310 | rnp->qsmask == 0 && | 1310 | rnp->qsmask == 0 && |
1311 | ULONG_CMP_GE(jiffies, rnp->boost_time))) { | 1311 | ULONG_CMP_GE(jiffies, rnp->boost_time))) { |
1312 | if (rnp->exp_tasks == NULL) | 1312 | if (rnp->exp_tasks == NULL) |
1313 | rnp->boost_tasks = rnp->gp_tasks; | 1313 | rnp->boost_tasks = rnp->gp_tasks; |
1314 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1314 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1315 | t = rnp->boost_kthread_task; | 1315 | t = rnp->boost_kthread_task; |
1316 | if (t != NULL) | 1316 | if (t != NULL) |
1317 | wake_up_process(t); | 1317 | wake_up_process(t); |
1318 | } else { | 1318 | } else { |
1319 | rcu_initiate_boost_trace(rnp); | 1319 | rcu_initiate_boost_trace(rnp); |
1320 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1320 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1321 | } | 1321 | } |
1322 | } | 1322 | } |
1323 | 1323 | ||
1324 | /* | 1324 | /* |
1325 | * Wake up the per-CPU kthread to invoke RCU callbacks. | 1325 | * Wake up the per-CPU kthread to invoke RCU callbacks. |
1326 | */ | 1326 | */ |
1327 | static void invoke_rcu_callbacks_kthread(void) | 1327 | static void invoke_rcu_callbacks_kthread(void) |
1328 | { | 1328 | { |
1329 | unsigned long flags; | 1329 | unsigned long flags; |
1330 | 1330 | ||
1331 | local_irq_save(flags); | 1331 | local_irq_save(flags); |
1332 | __this_cpu_write(rcu_cpu_has_work, 1); | 1332 | __this_cpu_write(rcu_cpu_has_work, 1); |
1333 | if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && | 1333 | if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && |
1334 | current != __this_cpu_read(rcu_cpu_kthread_task)) | 1334 | current != __this_cpu_read(rcu_cpu_kthread_task)) |
1335 | wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); | 1335 | wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); |
1336 | local_irq_restore(flags); | 1336 | local_irq_restore(flags); |
1337 | } | 1337 | } |
1338 | 1338 | ||
1339 | /* | 1339 | /* |
1340 | * Set the affinity of the boost kthread. The CPU-hotplug locks are | 1340 | * Set the affinity of the boost kthread. The CPU-hotplug locks are |
1341 | * held, so no one should be messing with the existence of the boost | 1341 | * held, so no one should be messing with the existence of the boost |
1342 | * kthread. | 1342 | * kthread. |
1343 | */ | 1343 | */ |
1344 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, | 1344 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, |
1345 | cpumask_var_t cm) | 1345 | cpumask_var_t cm) |
1346 | { | 1346 | { |
1347 | struct task_struct *t; | 1347 | struct task_struct *t; |
1348 | 1348 | ||
1349 | t = rnp->boost_kthread_task; | 1349 | t = rnp->boost_kthread_task; |
1350 | if (t != NULL) | 1350 | if (t != NULL) |
1351 | set_cpus_allowed_ptr(rnp->boost_kthread_task, cm); | 1351 | set_cpus_allowed_ptr(rnp->boost_kthread_task, cm); |
1352 | } | 1352 | } |
1353 | 1353 | ||
1354 | #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000) | 1354 | #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000) |
1355 | 1355 | ||
1356 | /* | 1356 | /* |
1357 | * Do priority-boost accounting for the start of a new grace period. | 1357 | * Do priority-boost accounting for the start of a new grace period. |
1358 | */ | 1358 | */ |
1359 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) | 1359 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) |
1360 | { | 1360 | { |
1361 | rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES; | 1361 | rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES; |
1362 | } | 1362 | } |
1363 | 1363 | ||
1364 | /* | 1364 | /* |
1365 | * Create an RCU-boost kthread for the specified node if one does not | 1365 | * Create an RCU-boost kthread for the specified node if one does not |
1366 | * already exist. We only create this kthread for preemptible RCU. | 1366 | * already exist. We only create this kthread for preemptible RCU. |
1367 | * Returns zero if all is well, a negated errno otherwise. | 1367 | * Returns zero if all is well, a negated errno otherwise. |
1368 | */ | 1368 | */ |
1369 | static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | 1369 | static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, |
1370 | struct rcu_node *rnp, | 1370 | struct rcu_node *rnp, |
1371 | int rnp_index) | 1371 | int rnp_index) |
1372 | { | 1372 | { |
1373 | unsigned long flags; | 1373 | unsigned long flags; |
1374 | struct sched_param sp; | 1374 | struct sched_param sp; |
1375 | struct task_struct *t; | 1375 | struct task_struct *t; |
1376 | 1376 | ||
1377 | if (&rcu_preempt_state != rsp) | 1377 | if (&rcu_preempt_state != rsp) |
1378 | return 0; | 1378 | return 0; |
1379 | rsp->boost = 1; | 1379 | rsp->boost = 1; |
1380 | if (rnp->boost_kthread_task != NULL) | 1380 | if (rnp->boost_kthread_task != NULL) |
1381 | return 0; | 1381 | return 0; |
1382 | t = kthread_create(rcu_boost_kthread, (void *)rnp, | 1382 | t = kthread_create(rcu_boost_kthread, (void *)rnp, |
1383 | "rcub/%d", rnp_index); | 1383 | "rcub/%d", rnp_index); |
1384 | if (IS_ERR(t)) | 1384 | if (IS_ERR(t)) |
1385 | return PTR_ERR(t); | 1385 | return PTR_ERR(t); |
1386 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1386 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1387 | rnp->boost_kthread_task = t; | 1387 | rnp->boost_kthread_task = t; |
1388 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1388 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1389 | sp.sched_priority = RCU_BOOST_PRIO; | 1389 | sp.sched_priority = RCU_BOOST_PRIO; |
1390 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | 1390 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
1391 | wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ | 1391 | wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ |
1392 | return 0; | 1392 | return 0; |
1393 | } | 1393 | } |
1394 | 1394 | ||
1395 | #ifdef CONFIG_HOTPLUG_CPU | 1395 | #ifdef CONFIG_HOTPLUG_CPU |
1396 | 1396 | ||
1397 | /* | 1397 | /* |
1398 | * Stop the RCU's per-CPU kthread when its CPU goes offline,. | 1398 | * Stop the RCU's per-CPU kthread when its CPU goes offline,. |
1399 | */ | 1399 | */ |
1400 | static void rcu_stop_cpu_kthread(int cpu) | 1400 | static void rcu_stop_cpu_kthread(int cpu) |
1401 | { | 1401 | { |
1402 | struct task_struct *t; | 1402 | struct task_struct *t; |
1403 | 1403 | ||
1404 | /* Stop the CPU's kthread. */ | 1404 | /* Stop the CPU's kthread. */ |
1405 | t = per_cpu(rcu_cpu_kthread_task, cpu); | 1405 | t = per_cpu(rcu_cpu_kthread_task, cpu); |
1406 | if (t != NULL) { | 1406 | if (t != NULL) { |
1407 | per_cpu(rcu_cpu_kthread_task, cpu) = NULL; | 1407 | per_cpu(rcu_cpu_kthread_task, cpu) = NULL; |
1408 | kthread_stop(t); | 1408 | kthread_stop(t); |
1409 | } | 1409 | } |
1410 | } | 1410 | } |
1411 | 1411 | ||
1412 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 1412 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
1413 | 1413 | ||
1414 | static void rcu_kthread_do_work(void) | 1414 | static void rcu_kthread_do_work(void) |
1415 | { | 1415 | { |
1416 | rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); | 1416 | rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); |
1417 | rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); | 1417 | rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); |
1418 | rcu_preempt_do_callbacks(); | 1418 | rcu_preempt_do_callbacks(); |
1419 | } | 1419 | } |
1420 | 1420 | ||
1421 | /* | 1421 | /* |
1422 | * Wake up the specified per-rcu_node-structure kthread. | 1422 | * Wake up the specified per-rcu_node-structure kthread. |
1423 | * Because the per-rcu_node kthreads are immortal, we don't need | 1423 | * Because the per-rcu_node kthreads are immortal, we don't need |
1424 | * to do anything to keep them alive. | 1424 | * to do anything to keep them alive. |
1425 | */ | 1425 | */ |
1426 | static void invoke_rcu_node_kthread(struct rcu_node *rnp) | 1426 | static void invoke_rcu_node_kthread(struct rcu_node *rnp) |
1427 | { | 1427 | { |
1428 | struct task_struct *t; | 1428 | struct task_struct *t; |
1429 | 1429 | ||
1430 | t = rnp->node_kthread_task; | 1430 | t = rnp->node_kthread_task; |
1431 | if (t != NULL) | 1431 | if (t != NULL) |
1432 | wake_up_process(t); | 1432 | wake_up_process(t); |
1433 | } | 1433 | } |
1434 | 1434 | ||
1435 | /* | 1435 | /* |
1436 | * Set the specified CPU's kthread to run RT or not, as specified by | 1436 | * Set the specified CPU's kthread to run RT or not, as specified by |
1437 | * the to_rt argument. The CPU-hotplug locks are held, so the task | 1437 | * the to_rt argument. The CPU-hotplug locks are held, so the task |
1438 | * is not going away. | 1438 | * is not going away. |
1439 | */ | 1439 | */ |
1440 | static void rcu_cpu_kthread_setrt(int cpu, int to_rt) | 1440 | static void rcu_cpu_kthread_setrt(int cpu, int to_rt) |
1441 | { | 1441 | { |
1442 | int policy; | 1442 | int policy; |
1443 | struct sched_param sp; | 1443 | struct sched_param sp; |
1444 | struct task_struct *t; | 1444 | struct task_struct *t; |
1445 | 1445 | ||
1446 | t = per_cpu(rcu_cpu_kthread_task, cpu); | 1446 | t = per_cpu(rcu_cpu_kthread_task, cpu); |
1447 | if (t == NULL) | 1447 | if (t == NULL) |
1448 | return; | 1448 | return; |
1449 | if (to_rt) { | 1449 | if (to_rt) { |
1450 | policy = SCHED_FIFO; | 1450 | policy = SCHED_FIFO; |
1451 | sp.sched_priority = RCU_KTHREAD_PRIO; | 1451 | sp.sched_priority = RCU_KTHREAD_PRIO; |
1452 | } else { | 1452 | } else { |
1453 | policy = SCHED_NORMAL; | 1453 | policy = SCHED_NORMAL; |
1454 | sp.sched_priority = 0; | 1454 | sp.sched_priority = 0; |
1455 | } | 1455 | } |
1456 | sched_setscheduler_nocheck(t, policy, &sp); | 1456 | sched_setscheduler_nocheck(t, policy, &sp); |
1457 | } | 1457 | } |
1458 | 1458 | ||
1459 | /* | 1459 | /* |
1460 | * Timer handler to initiate the waking up of per-CPU kthreads that | 1460 | * Timer handler to initiate the waking up of per-CPU kthreads that |
1461 | * have yielded the CPU due to excess numbers of RCU callbacks. | 1461 | * have yielded the CPU due to excess numbers of RCU callbacks. |
1462 | * We wake up the per-rcu_node kthread, which in turn will wake up | 1462 | * We wake up the per-rcu_node kthread, which in turn will wake up |
1463 | * the booster kthread. | 1463 | * the booster kthread. |
1464 | */ | 1464 | */ |
1465 | static void rcu_cpu_kthread_timer(unsigned long arg) | 1465 | static void rcu_cpu_kthread_timer(unsigned long arg) |
1466 | { | 1466 | { |
1467 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); | 1467 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); |
1468 | struct rcu_node *rnp = rdp->mynode; | 1468 | struct rcu_node *rnp = rdp->mynode; |
1469 | 1469 | ||
1470 | atomic_or(rdp->grpmask, &rnp->wakemask); | 1470 | atomic_or(rdp->grpmask, &rnp->wakemask); |
1471 | invoke_rcu_node_kthread(rnp); | 1471 | invoke_rcu_node_kthread(rnp); |
1472 | } | 1472 | } |
1473 | 1473 | ||
1474 | /* | 1474 | /* |
1475 | * Drop to non-real-time priority and yield, but only after posting a | 1475 | * Drop to non-real-time priority and yield, but only after posting a |
1476 | * timer that will cause us to regain our real-time priority if we | 1476 | * timer that will cause us to regain our real-time priority if we |
1477 | * remain preempted. Either way, we restore our real-time priority | 1477 | * remain preempted. Either way, we restore our real-time priority |
1478 | * before returning. | 1478 | * before returning. |
1479 | */ | 1479 | */ |
1480 | static void rcu_yield(void (*f)(unsigned long), unsigned long arg) | 1480 | static void rcu_yield(void (*f)(unsigned long), unsigned long arg) |
1481 | { | 1481 | { |
1482 | struct sched_param sp; | 1482 | struct sched_param sp; |
1483 | struct timer_list yield_timer; | 1483 | struct timer_list yield_timer; |
1484 | int prio = current->rt_priority; | 1484 | int prio = current->rt_priority; |
1485 | 1485 | ||
1486 | setup_timer_on_stack(&yield_timer, f, arg); | 1486 | setup_timer_on_stack(&yield_timer, f, arg); |
1487 | mod_timer(&yield_timer, jiffies + 2); | 1487 | mod_timer(&yield_timer, jiffies + 2); |
1488 | sp.sched_priority = 0; | 1488 | sp.sched_priority = 0; |
1489 | sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); | 1489 | sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); |
1490 | set_user_nice(current, 19); | 1490 | set_user_nice(current, 19); |
1491 | schedule(); | 1491 | schedule(); |
1492 | set_user_nice(current, 0); | 1492 | set_user_nice(current, 0); |
1493 | sp.sched_priority = prio; | 1493 | sp.sched_priority = prio; |
1494 | sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | 1494 | sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); |
1495 | del_timer(&yield_timer); | 1495 | del_timer(&yield_timer); |
1496 | } | 1496 | } |
1497 | 1497 | ||
1498 | /* | 1498 | /* |
1499 | * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU. | 1499 | * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU. |
1500 | * This can happen while the corresponding CPU is either coming online | 1500 | * This can happen while the corresponding CPU is either coming online |
1501 | * or going offline. We cannot wait until the CPU is fully online | 1501 | * or going offline. We cannot wait until the CPU is fully online |
1502 | * before starting the kthread, because the various notifier functions | 1502 | * before starting the kthread, because the various notifier functions |
1503 | * can wait for RCU grace periods. So we park rcu_cpu_kthread() until | 1503 | * can wait for RCU grace periods. So we park rcu_cpu_kthread() until |
1504 | * the corresponding CPU is online. | 1504 | * the corresponding CPU is online. |
1505 | * | 1505 | * |
1506 | * Return 1 if the kthread needs to stop, 0 otherwise. | 1506 | * Return 1 if the kthread needs to stop, 0 otherwise. |
1507 | * | 1507 | * |
1508 | * Caller must disable bh. This function can momentarily enable it. | 1508 | * Caller must disable bh. This function can momentarily enable it. |
1509 | */ | 1509 | */ |
1510 | static int rcu_cpu_kthread_should_stop(int cpu) | 1510 | static int rcu_cpu_kthread_should_stop(int cpu) |
1511 | { | 1511 | { |
1512 | while (cpu_is_offline(cpu) || | 1512 | while (cpu_is_offline(cpu) || |
1513 | !cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu)) || | 1513 | !cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu)) || |
1514 | smp_processor_id() != cpu) { | 1514 | smp_processor_id() != cpu) { |
1515 | if (kthread_should_stop()) | 1515 | if (kthread_should_stop()) |
1516 | return 1; | 1516 | return 1; |
1517 | per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; | 1517 | per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; |
1518 | per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id(); | 1518 | per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id(); |
1519 | local_bh_enable(); | 1519 | local_bh_enable(); |
1520 | schedule_timeout_uninterruptible(1); | 1520 | schedule_timeout_uninterruptible(1); |
1521 | if (!cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu))) | 1521 | if (!cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu))) |
1522 | set_cpus_allowed_ptr(current, cpumask_of(cpu)); | 1522 | set_cpus_allowed_ptr(current, cpumask_of(cpu)); |
1523 | local_bh_disable(); | 1523 | local_bh_disable(); |
1524 | } | 1524 | } |
1525 | per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; | 1525 | per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; |
1526 | return 0; | 1526 | return 0; |
1527 | } | 1527 | } |
1528 | 1528 | ||
1529 | /* | 1529 | /* |
1530 | * Per-CPU kernel thread that invokes RCU callbacks. This replaces the | 1530 | * Per-CPU kernel thread that invokes RCU callbacks. This replaces the |
1531 | * RCU softirq used in flavors and configurations of RCU that do not | 1531 | * RCU softirq used in flavors and configurations of RCU that do not |
1532 | * support RCU priority boosting. | 1532 | * support RCU priority boosting. |
1533 | */ | 1533 | */ |
1534 | static int rcu_cpu_kthread(void *arg) | 1534 | static int rcu_cpu_kthread(void *arg) |
1535 | { | 1535 | { |
1536 | int cpu = (int)(long)arg; | 1536 | int cpu = (int)(long)arg; |
1537 | unsigned long flags; | 1537 | unsigned long flags; |
1538 | int spincnt = 0; | 1538 | int spincnt = 0; |
1539 | unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); | 1539 | unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); |
1540 | char work; | 1540 | char work; |
1541 | char *workp = &per_cpu(rcu_cpu_has_work, cpu); | 1541 | char *workp = &per_cpu(rcu_cpu_has_work, cpu); |
1542 | 1542 | ||
1543 | trace_rcu_utilization("Start CPU kthread@init"); | 1543 | trace_rcu_utilization("Start CPU kthread@init"); |
1544 | for (;;) { | 1544 | for (;;) { |
1545 | *statusp = RCU_KTHREAD_WAITING; | 1545 | *statusp = RCU_KTHREAD_WAITING; |
1546 | trace_rcu_utilization("End CPU kthread@rcu_wait"); | 1546 | trace_rcu_utilization("End CPU kthread@rcu_wait"); |
1547 | rcu_wait(*workp != 0 || kthread_should_stop()); | 1547 | rcu_wait(*workp != 0 || kthread_should_stop()); |
1548 | trace_rcu_utilization("Start CPU kthread@rcu_wait"); | 1548 | trace_rcu_utilization("Start CPU kthread@rcu_wait"); |
1549 | local_bh_disable(); | 1549 | local_bh_disable(); |
1550 | if (rcu_cpu_kthread_should_stop(cpu)) { | 1550 | if (rcu_cpu_kthread_should_stop(cpu)) { |
1551 | local_bh_enable(); | 1551 | local_bh_enable(); |
1552 | break; | 1552 | break; |
1553 | } | 1553 | } |
1554 | *statusp = RCU_KTHREAD_RUNNING; | 1554 | *statusp = RCU_KTHREAD_RUNNING; |
1555 | per_cpu(rcu_cpu_kthread_loops, cpu)++; | 1555 | per_cpu(rcu_cpu_kthread_loops, cpu)++; |
1556 | local_irq_save(flags); | 1556 | local_irq_save(flags); |
1557 | work = *workp; | 1557 | work = *workp; |
1558 | *workp = 0; | 1558 | *workp = 0; |
1559 | local_irq_restore(flags); | 1559 | local_irq_restore(flags); |
1560 | if (work) | 1560 | if (work) |
1561 | rcu_kthread_do_work(); | 1561 | rcu_kthread_do_work(); |
1562 | local_bh_enable(); | 1562 | local_bh_enable(); |
1563 | if (*workp != 0) | 1563 | if (*workp != 0) |
1564 | spincnt++; | 1564 | spincnt++; |
1565 | else | 1565 | else |
1566 | spincnt = 0; | 1566 | spincnt = 0; |
1567 | if (spincnt > 10) { | 1567 | if (spincnt > 10) { |
1568 | *statusp = RCU_KTHREAD_YIELDING; | 1568 | *statusp = RCU_KTHREAD_YIELDING; |
1569 | trace_rcu_utilization("End CPU kthread@rcu_yield"); | 1569 | trace_rcu_utilization("End CPU kthread@rcu_yield"); |
1570 | rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); | 1570 | rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); |
1571 | trace_rcu_utilization("Start CPU kthread@rcu_yield"); | 1571 | trace_rcu_utilization("Start CPU kthread@rcu_yield"); |
1572 | spincnt = 0; | 1572 | spincnt = 0; |
1573 | } | 1573 | } |
1574 | } | 1574 | } |
1575 | *statusp = RCU_KTHREAD_STOPPED; | 1575 | *statusp = RCU_KTHREAD_STOPPED; |
1576 | trace_rcu_utilization("End CPU kthread@term"); | 1576 | trace_rcu_utilization("End CPU kthread@term"); |
1577 | return 0; | 1577 | return 0; |
1578 | } | 1578 | } |
1579 | 1579 | ||
1580 | /* | 1580 | /* |
1581 | * Spawn a per-CPU kthread, setting up affinity and priority. | 1581 | * Spawn a per-CPU kthread, setting up affinity and priority. |
1582 | * Because the CPU hotplug lock is held, no other CPU will be attempting | 1582 | * Because the CPU hotplug lock is held, no other CPU will be attempting |
1583 | * to manipulate rcu_cpu_kthread_task. There might be another CPU | 1583 | * to manipulate rcu_cpu_kthread_task. There might be another CPU |
1584 | * attempting to access it during boot, but the locking in kthread_bind() | 1584 | * attempting to access it during boot, but the locking in kthread_bind() |
1585 | * will enforce sufficient ordering. | 1585 | * will enforce sufficient ordering. |
1586 | * | 1586 | * |
1587 | * Please note that we cannot simply refuse to wake up the per-CPU | 1587 | * Please note that we cannot simply refuse to wake up the per-CPU |
1588 | * kthread because kthreads are created in TASK_UNINTERRUPTIBLE state, | 1588 | * kthread because kthreads are created in TASK_UNINTERRUPTIBLE state, |
1589 | * which can result in softlockup complaints if the task ends up being | 1589 | * which can result in softlockup complaints if the task ends up being |
1590 | * idle for more than a couple of minutes. | 1590 | * idle for more than a couple of minutes. |
1591 | * | 1591 | * |
1592 | * However, please note also that we cannot bind the per-CPU kthread to its | 1592 | * However, please note also that we cannot bind the per-CPU kthread to its |
1593 | * CPU until that CPU is fully online. We also cannot wait until the | 1593 | * CPU until that CPU is fully online. We also cannot wait until the |
1594 | * CPU is fully online before we create its per-CPU kthread, as this would | 1594 | * CPU is fully online before we create its per-CPU kthread, as this would |
1595 | * deadlock the system when CPU notifiers tried waiting for grace | 1595 | * deadlock the system when CPU notifiers tried waiting for grace |
1596 | * periods. So we bind the per-CPU kthread to its CPU only if the CPU | 1596 | * periods. So we bind the per-CPU kthread to its CPU only if the CPU |
1597 | * is online. If its CPU is not yet fully online, then the code in | 1597 | * is online. If its CPU is not yet fully online, then the code in |
1598 | * rcu_cpu_kthread() will wait until it is fully online, and then do | 1598 | * rcu_cpu_kthread() will wait until it is fully online, and then do |
1599 | * the binding. | 1599 | * the binding. |
1600 | */ | 1600 | */ |
1601 | static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) | 1601 | static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) |
1602 | { | 1602 | { |
1603 | struct sched_param sp; | 1603 | struct sched_param sp; |
1604 | struct task_struct *t; | 1604 | struct task_struct *t; |
1605 | 1605 | ||
1606 | if (!rcu_scheduler_fully_active || | 1606 | if (!rcu_scheduler_fully_active || |
1607 | per_cpu(rcu_cpu_kthread_task, cpu) != NULL) | 1607 | per_cpu(rcu_cpu_kthread_task, cpu) != NULL) |
1608 | return 0; | 1608 | return 0; |
1609 | t = kthread_create_on_node(rcu_cpu_kthread, | 1609 | t = kthread_create_on_node(rcu_cpu_kthread, |
1610 | (void *)(long)cpu, | 1610 | (void *)(long)cpu, |
1611 | cpu_to_node(cpu), | 1611 | cpu_to_node(cpu), |
1612 | "rcuc/%d", cpu); | 1612 | "rcuc/%d", cpu); |
1613 | if (IS_ERR(t)) | 1613 | if (IS_ERR(t)) |
1614 | return PTR_ERR(t); | 1614 | return PTR_ERR(t); |
1615 | if (cpu_online(cpu)) | 1615 | if (cpu_online(cpu)) |
1616 | kthread_bind(t, cpu); | 1616 | kthread_bind(t, cpu); |
1617 | per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; | 1617 | per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; |
1618 | WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); | 1618 | WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); |
1619 | sp.sched_priority = RCU_KTHREAD_PRIO; | 1619 | sp.sched_priority = RCU_KTHREAD_PRIO; |
1620 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | 1620 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
1621 | per_cpu(rcu_cpu_kthread_task, cpu) = t; | 1621 | per_cpu(rcu_cpu_kthread_task, cpu) = t; |
1622 | wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */ | 1622 | wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */ |
1623 | return 0; | 1623 | return 0; |
1624 | } | 1624 | } |
1625 | 1625 | ||
1626 | /* | 1626 | /* |
1627 | * Per-rcu_node kthread, which is in charge of waking up the per-CPU | 1627 | * Per-rcu_node kthread, which is in charge of waking up the per-CPU |
1628 | * kthreads when needed. We ignore requests to wake up kthreads | 1628 | * kthreads when needed. We ignore requests to wake up kthreads |
1629 | * for offline CPUs, which is OK because force_quiescent_state() | 1629 | * for offline CPUs, which is OK because force_quiescent_state() |
1630 | * takes care of this case. | 1630 | * takes care of this case. |
1631 | */ | 1631 | */ |
1632 | static int rcu_node_kthread(void *arg) | 1632 | static int rcu_node_kthread(void *arg) |
1633 | { | 1633 | { |
1634 | int cpu; | 1634 | int cpu; |
1635 | unsigned long flags; | 1635 | unsigned long flags; |
1636 | unsigned long mask; | 1636 | unsigned long mask; |
1637 | struct rcu_node *rnp = (struct rcu_node *)arg; | 1637 | struct rcu_node *rnp = (struct rcu_node *)arg; |
1638 | struct sched_param sp; | 1638 | struct sched_param sp; |
1639 | struct task_struct *t; | 1639 | struct task_struct *t; |
1640 | 1640 | ||
1641 | for (;;) { | 1641 | for (;;) { |
1642 | rnp->node_kthread_status = RCU_KTHREAD_WAITING; | 1642 | rnp->node_kthread_status = RCU_KTHREAD_WAITING; |
1643 | rcu_wait(atomic_read(&rnp->wakemask) != 0); | 1643 | rcu_wait(atomic_read(&rnp->wakemask) != 0); |
1644 | rnp->node_kthread_status = RCU_KTHREAD_RUNNING; | 1644 | rnp->node_kthread_status = RCU_KTHREAD_RUNNING; |
1645 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1645 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1646 | mask = atomic_xchg(&rnp->wakemask, 0); | 1646 | mask = atomic_xchg(&rnp->wakemask, 0); |
1647 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ | 1647 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ |
1648 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { | 1648 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { |
1649 | if ((mask & 0x1) == 0) | 1649 | if ((mask & 0x1) == 0) |
1650 | continue; | 1650 | continue; |
1651 | preempt_disable(); | 1651 | preempt_disable(); |
1652 | t = per_cpu(rcu_cpu_kthread_task, cpu); | 1652 | t = per_cpu(rcu_cpu_kthread_task, cpu); |
1653 | if (!cpu_online(cpu) || t == NULL) { | 1653 | if (!cpu_online(cpu) || t == NULL) { |
1654 | preempt_enable(); | 1654 | preempt_enable(); |
1655 | continue; | 1655 | continue; |
1656 | } | 1656 | } |
1657 | per_cpu(rcu_cpu_has_work, cpu) = 1; | 1657 | per_cpu(rcu_cpu_has_work, cpu) = 1; |
1658 | sp.sched_priority = RCU_KTHREAD_PRIO; | 1658 | sp.sched_priority = RCU_KTHREAD_PRIO; |
1659 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | 1659 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
1660 | preempt_enable(); | 1660 | preempt_enable(); |
1661 | } | 1661 | } |
1662 | } | 1662 | } |
1663 | /* NOTREACHED */ | 1663 | /* NOTREACHED */ |
1664 | rnp->node_kthread_status = RCU_KTHREAD_STOPPED; | 1664 | rnp->node_kthread_status = RCU_KTHREAD_STOPPED; |
1665 | return 0; | 1665 | return 0; |
1666 | } | 1666 | } |
1667 | 1667 | ||
1668 | /* | 1668 | /* |
1669 | * Set the per-rcu_node kthread's affinity to cover all CPUs that are | 1669 | * Set the per-rcu_node kthread's affinity to cover all CPUs that are |
1670 | * served by the rcu_node in question. The CPU hotplug lock is still | 1670 | * served by the rcu_node in question. The CPU hotplug lock is still |
1671 | * held, so the value of rnp->qsmaskinit will be stable. | 1671 | * held, so the value of rnp->qsmaskinit will be stable. |
1672 | * | 1672 | * |
1673 | * We don't include outgoingcpu in the affinity set, use -1 if there is | 1673 | * We don't include outgoingcpu in the affinity set, use -1 if there is |
1674 | * no outgoing CPU. If there are no CPUs left in the affinity set, | 1674 | * no outgoing CPU. If there are no CPUs left in the affinity set, |
1675 | * this function allows the kthread to execute on any CPU. | 1675 | * this function allows the kthread to execute on any CPU. |
1676 | */ | 1676 | */ |
1677 | static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) | 1677 | static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) |
1678 | { | 1678 | { |
1679 | cpumask_var_t cm; | 1679 | cpumask_var_t cm; |
1680 | int cpu; | 1680 | int cpu; |
1681 | unsigned long mask = rnp->qsmaskinit; | 1681 | unsigned long mask = rnp->qsmaskinit; |
1682 | 1682 | ||
1683 | if (rnp->node_kthread_task == NULL) | 1683 | if (rnp->node_kthread_task == NULL) |
1684 | return; | 1684 | return; |
1685 | if (!alloc_cpumask_var(&cm, GFP_KERNEL)) | 1685 | if (!alloc_cpumask_var(&cm, GFP_KERNEL)) |
1686 | return; | 1686 | return; |
1687 | cpumask_clear(cm); | 1687 | cpumask_clear(cm); |
1688 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) | 1688 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) |
1689 | if ((mask & 0x1) && cpu != outgoingcpu) | 1689 | if ((mask & 0x1) && cpu != outgoingcpu) |
1690 | cpumask_set_cpu(cpu, cm); | 1690 | cpumask_set_cpu(cpu, cm); |
1691 | if (cpumask_weight(cm) == 0) { | 1691 | if (cpumask_weight(cm) == 0) { |
1692 | cpumask_setall(cm); | 1692 | cpumask_setall(cm); |
1693 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) | 1693 | for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) |
1694 | cpumask_clear_cpu(cpu, cm); | 1694 | cpumask_clear_cpu(cpu, cm); |
1695 | WARN_ON_ONCE(cpumask_weight(cm) == 0); | 1695 | WARN_ON_ONCE(cpumask_weight(cm) == 0); |
1696 | } | 1696 | } |
1697 | set_cpus_allowed_ptr(rnp->node_kthread_task, cm); | 1697 | set_cpus_allowed_ptr(rnp->node_kthread_task, cm); |
1698 | rcu_boost_kthread_setaffinity(rnp, cm); | 1698 | rcu_boost_kthread_setaffinity(rnp, cm); |
1699 | free_cpumask_var(cm); | 1699 | free_cpumask_var(cm); |
1700 | } | 1700 | } |
1701 | 1701 | ||
1702 | /* | 1702 | /* |
1703 | * Spawn a per-rcu_node kthread, setting priority and affinity. | 1703 | * Spawn a per-rcu_node kthread, setting priority and affinity. |
1704 | * Called during boot before online/offline can happen, or, if | 1704 | * Called during boot before online/offline can happen, or, if |
1705 | * during runtime, with the main CPU-hotplug locks held. So only | 1705 | * during runtime, with the main CPU-hotplug locks held. So only |
1706 | * one of these can be executing at a time. | 1706 | * one of these can be executing at a time. |
1707 | */ | 1707 | */ |
1708 | static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, | 1708 | static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, |
1709 | struct rcu_node *rnp) | 1709 | struct rcu_node *rnp) |
1710 | { | 1710 | { |
1711 | unsigned long flags; | 1711 | unsigned long flags; |
1712 | int rnp_index = rnp - &rsp->node[0]; | 1712 | int rnp_index = rnp - &rsp->node[0]; |
1713 | struct sched_param sp; | 1713 | struct sched_param sp; |
1714 | struct task_struct *t; | 1714 | struct task_struct *t; |
1715 | 1715 | ||
1716 | if (!rcu_scheduler_fully_active || | 1716 | if (!rcu_scheduler_fully_active || |
1717 | rnp->qsmaskinit == 0) | 1717 | rnp->qsmaskinit == 0) |
1718 | return 0; | 1718 | return 0; |
1719 | if (rnp->node_kthread_task == NULL) { | 1719 | if (rnp->node_kthread_task == NULL) { |
1720 | t = kthread_create(rcu_node_kthread, (void *)rnp, | 1720 | t = kthread_create(rcu_node_kthread, (void *)rnp, |
1721 | "rcun/%d", rnp_index); | 1721 | "rcun/%d", rnp_index); |
1722 | if (IS_ERR(t)) | 1722 | if (IS_ERR(t)) |
1723 | return PTR_ERR(t); | 1723 | return PTR_ERR(t); |
1724 | raw_spin_lock_irqsave(&rnp->lock, flags); | 1724 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1725 | rnp->node_kthread_task = t; | 1725 | rnp->node_kthread_task = t; |
1726 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1726 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1727 | sp.sched_priority = 99; | 1727 | sp.sched_priority = 99; |
1728 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | 1728 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
1729 | wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ | 1729 | wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ |
1730 | } | 1730 | } |
1731 | return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); | 1731 | return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); |
1732 | } | 1732 | } |
1733 | 1733 | ||
1734 | /* | 1734 | /* |
1735 | * Spawn all kthreads -- called as soon as the scheduler is running. | 1735 | * Spawn all kthreads -- called as soon as the scheduler is running. |
1736 | */ | 1736 | */ |
1737 | static int __init rcu_spawn_kthreads(void) | 1737 | static int __init rcu_spawn_kthreads(void) |
1738 | { | 1738 | { |
1739 | int cpu; | 1739 | int cpu; |
1740 | struct rcu_node *rnp; | 1740 | struct rcu_node *rnp; |
1741 | 1741 | ||
1742 | rcu_scheduler_fully_active = 1; | 1742 | rcu_scheduler_fully_active = 1; |
1743 | for_each_possible_cpu(cpu) { | 1743 | for_each_possible_cpu(cpu) { |
1744 | per_cpu(rcu_cpu_has_work, cpu) = 0; | 1744 | per_cpu(rcu_cpu_has_work, cpu) = 0; |
1745 | if (cpu_online(cpu)) | 1745 | if (cpu_online(cpu)) |
1746 | (void)rcu_spawn_one_cpu_kthread(cpu); | 1746 | (void)rcu_spawn_one_cpu_kthread(cpu); |
1747 | } | 1747 | } |
1748 | rnp = rcu_get_root(rcu_state); | 1748 | rnp = rcu_get_root(rcu_state); |
1749 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); | 1749 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); |
1750 | if (NUM_RCU_NODES > 1) { | 1750 | if (NUM_RCU_NODES > 1) { |
1751 | rcu_for_each_leaf_node(rcu_state, rnp) | 1751 | rcu_for_each_leaf_node(rcu_state, rnp) |
1752 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); | 1752 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); |
1753 | } | 1753 | } |
1754 | return 0; | 1754 | return 0; |
1755 | } | 1755 | } |
1756 | early_initcall(rcu_spawn_kthreads); | 1756 | early_initcall(rcu_spawn_kthreads); |
1757 | 1757 | ||
1758 | static void __cpuinit rcu_prepare_kthreads(int cpu) | 1758 | static void __cpuinit rcu_prepare_kthreads(int cpu) |
1759 | { | 1759 | { |
1760 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); | 1760 | struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); |
1761 | struct rcu_node *rnp = rdp->mynode; | 1761 | struct rcu_node *rnp = rdp->mynode; |
1762 | 1762 | ||
1763 | /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ | 1763 | /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ |
1764 | if (rcu_scheduler_fully_active) { | 1764 | if (rcu_scheduler_fully_active) { |
1765 | (void)rcu_spawn_one_cpu_kthread(cpu); | 1765 | (void)rcu_spawn_one_cpu_kthread(cpu); |
1766 | if (rnp->node_kthread_task == NULL) | 1766 | if (rnp->node_kthread_task == NULL) |
1767 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); | 1767 | (void)rcu_spawn_one_node_kthread(rcu_state, rnp); |
1768 | } | 1768 | } |
1769 | } | 1769 | } |
1770 | 1770 | ||
1771 | #else /* #ifdef CONFIG_RCU_BOOST */ | 1771 | #else /* #ifdef CONFIG_RCU_BOOST */ |
1772 | 1772 | ||
1773 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | 1773 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) |
1774 | { | 1774 | { |
1775 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1775 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1776 | } | 1776 | } |
1777 | 1777 | ||
1778 | static void invoke_rcu_callbacks_kthread(void) | 1778 | static void invoke_rcu_callbacks_kthread(void) |
1779 | { | 1779 | { |
1780 | WARN_ON_ONCE(1); | 1780 | WARN_ON_ONCE(1); |
1781 | } | 1781 | } |
1782 | 1782 | ||
1783 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) | 1783 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) |
1784 | { | 1784 | { |
1785 | } | 1785 | } |
1786 | 1786 | ||
1787 | #ifdef CONFIG_HOTPLUG_CPU | 1787 | #ifdef CONFIG_HOTPLUG_CPU |
1788 | 1788 | ||
1789 | static void rcu_stop_cpu_kthread(int cpu) | 1789 | static void rcu_stop_cpu_kthread(int cpu) |
1790 | { | 1790 | { |
1791 | } | 1791 | } |
1792 | 1792 | ||
1793 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 1793 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
1794 | 1794 | ||
1795 | static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) | 1795 | static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) |
1796 | { | 1796 | { |
1797 | } | 1797 | } |
1798 | 1798 | ||
1799 | static void rcu_cpu_kthread_setrt(int cpu, int to_rt) | 1799 | static void rcu_cpu_kthread_setrt(int cpu, int to_rt) |
1800 | { | 1800 | { |
1801 | } | 1801 | } |
1802 | 1802 | ||
1803 | static int __init rcu_scheduler_really_started(void) | 1803 | static int __init rcu_scheduler_really_started(void) |
1804 | { | 1804 | { |
1805 | rcu_scheduler_fully_active = 1; | 1805 | rcu_scheduler_fully_active = 1; |
1806 | return 0; | 1806 | return 0; |
1807 | } | 1807 | } |
1808 | early_initcall(rcu_scheduler_really_started); | 1808 | early_initcall(rcu_scheduler_really_started); |
1809 | 1809 | ||
1810 | static void __cpuinit rcu_prepare_kthreads(int cpu) | 1810 | static void __cpuinit rcu_prepare_kthreads(int cpu) |
1811 | { | 1811 | { |
1812 | } | 1812 | } |
1813 | 1813 | ||
1814 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | 1814 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ |
1815 | 1815 | ||
1816 | #ifndef CONFIG_SMP | 1816 | #ifndef CONFIG_SMP |
1817 | 1817 | ||
1818 | void synchronize_sched_expedited(void) | 1818 | void synchronize_sched_expedited(void) |
1819 | { | 1819 | { |
1820 | cond_resched(); | 1820 | cond_resched(); |
1821 | } | 1821 | } |
1822 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | 1822 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); |
1823 | 1823 | ||
1824 | #else /* #ifndef CONFIG_SMP */ | 1824 | #else /* #ifndef CONFIG_SMP */ |
1825 | 1825 | ||
1826 | static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0); | 1826 | static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0); |
1827 | static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0); | 1827 | static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0); |
1828 | 1828 | ||
1829 | static int synchronize_sched_expedited_cpu_stop(void *data) | 1829 | static int synchronize_sched_expedited_cpu_stop(void *data) |
1830 | { | 1830 | { |
1831 | /* | 1831 | /* |
1832 | * There must be a full memory barrier on each affected CPU | 1832 | * There must be a full memory barrier on each affected CPU |
1833 | * between the time that try_stop_cpus() is called and the | 1833 | * between the time that try_stop_cpus() is called and the |
1834 | * time that it returns. | 1834 | * time that it returns. |
1835 | * | 1835 | * |
1836 | * In the current initial implementation of cpu_stop, the | 1836 | * In the current initial implementation of cpu_stop, the |
1837 | * above condition is already met when the control reaches | 1837 | * above condition is already met when the control reaches |
1838 | * this point and the following smp_mb() is not strictly | 1838 | * this point and the following smp_mb() is not strictly |
1839 | * necessary. Do smp_mb() anyway for documentation and | 1839 | * necessary. Do smp_mb() anyway for documentation and |
1840 | * robustness against future implementation changes. | 1840 | * robustness against future implementation changes. |
1841 | */ | 1841 | */ |
1842 | smp_mb(); /* See above comment block. */ | 1842 | smp_mb(); /* See above comment block. */ |
1843 | return 0; | 1843 | return 0; |
1844 | } | 1844 | } |
1845 | 1845 | ||
1846 | /* | 1846 | /* |
1847 | * Wait for an rcu-sched grace period to elapse, but use "big hammer" | 1847 | * Wait for an rcu-sched grace period to elapse, but use "big hammer" |
1848 | * approach to force grace period to end quickly. This consumes | 1848 | * approach to force grace period to end quickly. This consumes |
1849 | * significant time on all CPUs, and is thus not recommended for | 1849 | * significant time on all CPUs, and is thus not recommended for |
1850 | * any sort of common-case code. | 1850 | * any sort of common-case code. |
1851 | * | 1851 | * |
1852 | * Note that it is illegal to call this function while holding any | 1852 | * Note that it is illegal to call this function while holding any |
1853 | * lock that is acquired by a CPU-hotplug notifier. Failing to | 1853 | * lock that is acquired by a CPU-hotplug notifier. Failing to |
1854 | * observe this restriction will result in deadlock. | 1854 | * observe this restriction will result in deadlock. |
1855 | * | 1855 | * |
1856 | * This implementation can be thought of as an application of ticket | 1856 | * This implementation can be thought of as an application of ticket |
1857 | * locking to RCU, with sync_sched_expedited_started and | 1857 | * locking to RCU, with sync_sched_expedited_started and |
1858 | * sync_sched_expedited_done taking on the roles of the halves | 1858 | * sync_sched_expedited_done taking on the roles of the halves |
1859 | * of the ticket-lock word. Each task atomically increments | 1859 | * of the ticket-lock word. Each task atomically increments |
1860 | * sync_sched_expedited_started upon entry, snapshotting the old value, | 1860 | * sync_sched_expedited_started upon entry, snapshotting the old value, |
1861 | * then attempts to stop all the CPUs. If this succeeds, then each | 1861 | * then attempts to stop all the CPUs. If this succeeds, then each |
1862 | * CPU will have executed a context switch, resulting in an RCU-sched | 1862 | * CPU will have executed a context switch, resulting in an RCU-sched |
1863 | * grace period. We are then done, so we use atomic_cmpxchg() to | 1863 | * grace period. We are then done, so we use atomic_cmpxchg() to |
1864 | * update sync_sched_expedited_done to match our snapshot -- but | 1864 | * update sync_sched_expedited_done to match our snapshot -- but |
1865 | * only if someone else has not already advanced past our snapshot. | 1865 | * only if someone else has not already advanced past our snapshot. |
1866 | * | 1866 | * |
1867 | * On the other hand, if try_stop_cpus() fails, we check the value | 1867 | * On the other hand, if try_stop_cpus() fails, we check the value |
1868 | * of sync_sched_expedited_done. If it has advanced past our | 1868 | * of sync_sched_expedited_done. If it has advanced past our |
1869 | * initial snapshot, then someone else must have forced a grace period | 1869 | * initial snapshot, then someone else must have forced a grace period |
1870 | * some time after we took our snapshot. In this case, our work is | 1870 | * some time after we took our snapshot. In this case, our work is |
1871 | * done for us, and we can simply return. Otherwise, we try again, | 1871 | * done for us, and we can simply return. Otherwise, we try again, |
1872 | * but keep our initial snapshot for purposes of checking for someone | 1872 | * but keep our initial snapshot for purposes of checking for someone |
1873 | * doing our work for us. | 1873 | * doing our work for us. |
1874 | * | 1874 | * |
1875 | * If we fail too many times in a row, we fall back to synchronize_sched(). | 1875 | * If we fail too many times in a row, we fall back to synchronize_sched(). |
1876 | */ | 1876 | */ |
1877 | void synchronize_sched_expedited(void) | 1877 | void synchronize_sched_expedited(void) |
1878 | { | 1878 | { |
1879 | int firstsnap, s, snap, trycount = 0; | 1879 | int firstsnap, s, snap, trycount = 0; |
1880 | 1880 | ||
1881 | /* Note that atomic_inc_return() implies full memory barrier. */ | 1881 | /* Note that atomic_inc_return() implies full memory barrier. */ |
1882 | firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started); | 1882 | firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started); |
1883 | get_online_cpus(); | 1883 | get_online_cpus(); |
1884 | 1884 | ||
1885 | /* | 1885 | /* |
1886 | * Each pass through the following loop attempts to force a | 1886 | * Each pass through the following loop attempts to force a |
1887 | * context switch on each CPU. | 1887 | * context switch on each CPU. |
1888 | */ | 1888 | */ |
1889 | while (try_stop_cpus(cpu_online_mask, | 1889 | while (try_stop_cpus(cpu_online_mask, |
1890 | synchronize_sched_expedited_cpu_stop, | 1890 | synchronize_sched_expedited_cpu_stop, |
1891 | NULL) == -EAGAIN) { | 1891 | NULL) == -EAGAIN) { |
1892 | put_online_cpus(); | 1892 | put_online_cpus(); |
1893 | 1893 | ||
1894 | /* No joy, try again later. Or just synchronize_sched(). */ | 1894 | /* No joy, try again later. Or just synchronize_sched(). */ |
1895 | if (trycount++ < 10) | 1895 | if (trycount++ < 10) |
1896 | udelay(trycount * num_online_cpus()); | 1896 | udelay(trycount * num_online_cpus()); |
1897 | else { | 1897 | else { |
1898 | synchronize_sched(); | 1898 | synchronize_sched(); |
1899 | return; | 1899 | return; |
1900 | } | 1900 | } |
1901 | 1901 | ||
1902 | /* Check to see if someone else did our work for us. */ | 1902 | /* Check to see if someone else did our work for us. */ |
1903 | s = atomic_read(&sync_sched_expedited_done); | 1903 | s = atomic_read(&sync_sched_expedited_done); |
1904 | if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) { | 1904 | if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) { |
1905 | smp_mb(); /* ensure test happens before caller kfree */ | 1905 | smp_mb(); /* ensure test happens before caller kfree */ |
1906 | return; | 1906 | return; |
1907 | } | 1907 | } |
1908 | 1908 | ||
1909 | /* | 1909 | /* |
1910 | * Refetching sync_sched_expedited_started allows later | 1910 | * Refetching sync_sched_expedited_started allows later |
1911 | * callers to piggyback on our grace period. We subtract | 1911 | * callers to piggyback on our grace period. We subtract |
1912 | * 1 to get the same token that the last incrementer got. | 1912 | * 1 to get the same token that the last incrementer got. |
1913 | * We retry after they started, so our grace period works | 1913 | * We retry after they started, so our grace period works |
1914 | * for them, and they started after our first try, so their | 1914 | * for them, and they started after our first try, so their |
1915 | * grace period works for us. | 1915 | * grace period works for us. |
1916 | */ | 1916 | */ |
1917 | get_online_cpus(); | 1917 | get_online_cpus(); |
1918 | snap = atomic_read(&sync_sched_expedited_started); | 1918 | snap = atomic_read(&sync_sched_expedited_started); |
1919 | smp_mb(); /* ensure read is before try_stop_cpus(). */ | 1919 | smp_mb(); /* ensure read is before try_stop_cpus(). */ |
1920 | } | 1920 | } |
1921 | 1921 | ||
1922 | /* | 1922 | /* |
1923 | * Everyone up to our most recent fetch is covered by our grace | 1923 | * Everyone up to our most recent fetch is covered by our grace |
1924 | * period. Update the counter, but only if our work is still | 1924 | * period. Update the counter, but only if our work is still |
1925 | * relevant -- which it won't be if someone who started later | 1925 | * relevant -- which it won't be if someone who started later |
1926 | * than we did beat us to the punch. | 1926 | * than we did beat us to the punch. |
1927 | */ | 1927 | */ |
1928 | do { | 1928 | do { |
1929 | s = atomic_read(&sync_sched_expedited_done); | 1929 | s = atomic_read(&sync_sched_expedited_done); |
1930 | if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) { | 1930 | if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) { |
1931 | smp_mb(); /* ensure test happens before caller kfree */ | 1931 | smp_mb(); /* ensure test happens before caller kfree */ |
1932 | break; | 1932 | break; |
1933 | } | 1933 | } |
1934 | } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s); | 1934 | } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s); |
1935 | 1935 | ||
1936 | put_online_cpus(); | 1936 | put_online_cpus(); |
1937 | } | 1937 | } |
1938 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | 1938 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); |
1939 | 1939 | ||
1940 | #endif /* #else #ifndef CONFIG_SMP */ | 1940 | #endif /* #else #ifndef CONFIG_SMP */ |
1941 | 1941 | ||
1942 | #if !defined(CONFIG_RCU_FAST_NO_HZ) | 1942 | #if !defined(CONFIG_RCU_FAST_NO_HZ) |
1943 | 1943 | ||
1944 | /* | 1944 | /* |
1945 | * Check to see if any future RCU-related work will need to be done | 1945 | * Check to see if any future RCU-related work will need to be done |
1946 | * by the current CPU, even if none need be done immediately, returning | 1946 | * by the current CPU, even if none need be done immediately, returning |
1947 | * 1 if so. This function is part of the RCU implementation; it is -not- | 1947 | * 1 if so. This function is part of the RCU implementation; it is -not- |
1948 | * an exported member of the RCU API. | 1948 | * an exported member of the RCU API. |
1949 | * | 1949 | * |
1950 | * Because we have preemptible RCU, just check whether this CPU needs | 1950 | * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs |
1951 | * any flavor of RCU. Do not chew up lots of CPU cycles with preemption | 1951 | * any flavor of RCU. |
1952 | * disabled in a most-likely vain attempt to cause RCU not to need this CPU. | ||
1953 | */ | 1952 | */ |
1954 | int rcu_needs_cpu(int cpu) | 1953 | int rcu_needs_cpu(int cpu) |
1955 | { | 1954 | { |
1956 | return rcu_cpu_has_callbacks(cpu); | 1955 | return rcu_cpu_has_callbacks(cpu); |
1957 | } | 1956 | } |
1958 | 1957 | ||
1959 | /* | 1958 | /* |
1959 | * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it. | ||
1960 | */ | ||
1961 | static void rcu_prepare_for_idle_init(int cpu) | ||
1962 | { | ||
1963 | } | ||
1964 | |||
1965 | /* | ||
1966 | * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up | ||
1967 | * after it. | ||
1968 | */ | ||
1969 | static void rcu_cleanup_after_idle(int cpu) | ||
1970 | { | ||
1971 | } | ||
1972 | |||
1973 | /* | ||
1960 | * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=y, | 1974 | * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=y, |
1961 | * is nothing. | 1975 | * is nothing. |
1962 | */ | 1976 | */ |
1963 | static void rcu_prepare_for_idle(int cpu) | 1977 | static void rcu_prepare_for_idle(int cpu) |
1964 | { | 1978 | { |
1965 | } | 1979 | } |
1966 | 1980 | ||
1967 | #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | 1981 | #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ |
1968 | 1982 | ||
1969 | #define RCU_NEEDS_CPU_FLUSHES 5 | 1983 | #define RCU_NEEDS_CPU_FLUSHES 5 /* Allow for callback self-repost. */ |
1984 | #define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ | ||
1970 | static DEFINE_PER_CPU(int, rcu_dyntick_drain); | 1985 | static DEFINE_PER_CPU(int, rcu_dyntick_drain); |
1971 | static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); | 1986 | static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); |
1987 | static DEFINE_PER_CPU(struct hrtimer, rcu_idle_gp_timer); | ||
1988 | static ktime_t rcu_idle_gp_wait; | ||
1972 | 1989 | ||
1973 | /* | 1990 | /* |
1974 | * Allow the CPU to enter dyntick-idle mode if either: (1) There are no | 1991 | * Allow the CPU to enter dyntick-idle mode if either: (1) There are no |
1975 | * callbacks on this CPU, (2) this CPU has not yet attempted to enter | 1992 | * callbacks on this CPU, (2) this CPU has not yet attempted to enter |
1976 | * dyntick-idle mode, or (3) this CPU is in the process of attempting to | 1993 | * dyntick-idle mode, or (3) this CPU is in the process of attempting to |
1977 | * enter dyntick-idle mode. Otherwise, if we have recently tried and failed | 1994 | * enter dyntick-idle mode. Otherwise, if we have recently tried and failed |
1978 | * to enter dyntick-idle mode, we refuse to try to enter it. After all, | 1995 | * to enter dyntick-idle mode, we refuse to try to enter it. After all, |
1979 | * it is better to incur scheduling-clock interrupts than to spin | 1996 | * it is better to incur scheduling-clock interrupts than to spin |
1980 | * continuously for the same time duration! | 1997 | * continuously for the same time duration! |
1981 | */ | 1998 | */ |
1982 | int rcu_needs_cpu(int cpu) | 1999 | int rcu_needs_cpu(int cpu) |
1983 | { | 2000 | { |
1984 | /* If no callbacks, RCU doesn't need the CPU. */ | 2001 | /* If no callbacks, RCU doesn't need the CPU. */ |
1985 | if (!rcu_cpu_has_callbacks(cpu)) | 2002 | if (!rcu_cpu_has_callbacks(cpu)) |
1986 | return 0; | 2003 | return 0; |
1987 | /* Otherwise, RCU needs the CPU only if it recently tried and failed. */ | 2004 | /* Otherwise, RCU needs the CPU only if it recently tried and failed. */ |
1988 | return per_cpu(rcu_dyntick_holdoff, cpu) == jiffies; | 2005 | return per_cpu(rcu_dyntick_holdoff, cpu) == jiffies; |
1989 | } | 2006 | } |
1990 | 2007 | ||
1991 | /* | 2008 | /* |
2009 | * Timer handler used to force CPU to start pushing its remaining RCU | ||
2010 | * callbacks in the case where it entered dyntick-idle mode with callbacks | ||
2011 | * pending. The hander doesn't really need to do anything because the | ||
2012 | * real work is done upon re-entry to idle, or by the next scheduling-clock | ||
2013 | * interrupt should idle not be re-entered. | ||
2014 | */ | ||
2015 | static enum hrtimer_restart rcu_idle_gp_timer_func(struct hrtimer *hrtp) | ||
2016 | { | ||
2017 | trace_rcu_prep_idle("Timer"); | ||
2018 | return HRTIMER_NORESTART; | ||
2019 | } | ||
2020 | |||
2021 | /* | ||
2022 | * Initialize the timer used to pull CPUs out of dyntick-idle mode. | ||
2023 | */ | ||
2024 | static void rcu_prepare_for_idle_init(int cpu) | ||
2025 | { | ||
2026 | static int firsttime = 1; | ||
2027 | struct hrtimer *hrtp = &per_cpu(rcu_idle_gp_timer, cpu); | ||
2028 | |||
2029 | hrtimer_init(hrtp, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
2030 | hrtp->function = rcu_idle_gp_timer_func; | ||
2031 | if (firsttime) { | ||
2032 | unsigned int upj = jiffies_to_usecs(RCU_IDLE_GP_DELAY); | ||
2033 | |||
2034 | rcu_idle_gp_wait = ns_to_ktime(upj * (u64)1000); | ||
2035 | firsttime = 0; | ||
2036 | } | ||
2037 | } | ||
2038 | |||
2039 | /* | ||
2040 | * Clean up for exit from idle. Because we are exiting from idle, there | ||
2041 | * is no longer any point to rcu_idle_gp_timer, so cancel it. This will | ||
2042 | * do nothing if this timer is not active, so just cancel it unconditionally. | ||
2043 | */ | ||
2044 | static void rcu_cleanup_after_idle(int cpu) | ||
2045 | { | ||
2046 | hrtimer_cancel(&per_cpu(rcu_idle_gp_timer, cpu)); | ||
2047 | } | ||
2048 | |||
2049 | /* | ||
1992 | * Check to see if any RCU-related work can be done by the current CPU, | 2050 | * Check to see if any RCU-related work can be done by the current CPU, |
1993 | * and if so, schedule a softirq to get it done. This function is part | 2051 | * and if so, schedule a softirq to get it done. This function is part |
1994 | * of the RCU implementation; it is -not- an exported member of the RCU API. | 2052 | * of the RCU implementation; it is -not- an exported member of the RCU API. |
1995 | * | 2053 | * |
1996 | * The idea is for the current CPU to clear out all work required by the | 2054 | * The idea is for the current CPU to clear out all work required by the |
1997 | * RCU core for the current grace period, so that this CPU can be permitted | 2055 | * RCU core for the current grace period, so that this CPU can be permitted |
1998 | * to enter dyntick-idle mode. In some cases, it will need to be awakened | 2056 | * to enter dyntick-idle mode. In some cases, it will need to be awakened |
1999 | * at the end of the grace period by whatever CPU ends the grace period. | 2057 | * at the end of the grace period by whatever CPU ends the grace period. |
2000 | * This allows CPUs to go dyntick-idle more quickly, and to reduce the | 2058 | * This allows CPUs to go dyntick-idle more quickly, and to reduce the |
2001 | * number of wakeups by a modest integer factor. | 2059 | * number of wakeups by a modest integer factor. |
2002 | * | 2060 | * |
2003 | * Because it is not legal to invoke rcu_process_callbacks() with irqs | 2061 | * Because it is not legal to invoke rcu_process_callbacks() with irqs |
2004 | * disabled, we do one pass of force_quiescent_state(), then do a | 2062 | * disabled, we do one pass of force_quiescent_state(), then do a |
2005 | * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked | 2063 | * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked |
2006 | * later. The per-cpu rcu_dyntick_drain variable controls the sequencing. | 2064 | * later. The per-cpu rcu_dyntick_drain variable controls the sequencing. |
2007 | * | 2065 | * |
2008 | * The caller must have disabled interrupts. | 2066 | * The caller must have disabled interrupts. |
2009 | */ | 2067 | */ |
2010 | static void rcu_prepare_for_idle(int cpu) | 2068 | static void rcu_prepare_for_idle(int cpu) |
2011 | { | 2069 | { |
2012 | unsigned long flags; | 2070 | unsigned long flags; |
2013 | 2071 | ||
2014 | local_irq_save(flags); | 2072 | local_irq_save(flags); |
2015 | 2073 | ||
2016 | /* | 2074 | /* |
2017 | * If there are no callbacks on this CPU, enter dyntick-idle mode. | 2075 | * If there are no callbacks on this CPU, enter dyntick-idle mode. |
2018 | * Also reset state to avoid prejudicing later attempts. | 2076 | * Also reset state to avoid prejudicing later attempts. |
2019 | */ | 2077 | */ |
2020 | if (!rcu_cpu_has_callbacks(cpu)) { | 2078 | if (!rcu_cpu_has_callbacks(cpu)) { |
2021 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; | 2079 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; |
2022 | per_cpu(rcu_dyntick_drain, cpu) = 0; | 2080 | per_cpu(rcu_dyntick_drain, cpu) = 0; |
2023 | local_irq_restore(flags); | 2081 | local_irq_restore(flags); |
2024 | trace_rcu_prep_idle("No callbacks"); | 2082 | trace_rcu_prep_idle("No callbacks"); |
2025 | return; | 2083 | return; |
2026 | } | 2084 | } |
2027 | 2085 | ||
2028 | /* | 2086 | /* |
2029 | * If in holdoff mode, just return. We will presumably have | 2087 | * If in holdoff mode, just return. We will presumably have |
2030 | * refrained from disabling the scheduling-clock tick. | 2088 | * refrained from disabling the scheduling-clock tick. |
2031 | */ | 2089 | */ |
2032 | if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) { | 2090 | if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) { |
2033 | local_irq_restore(flags); | 2091 | local_irq_restore(flags); |
2034 | trace_rcu_prep_idle("In holdoff"); | 2092 | trace_rcu_prep_idle("In holdoff"); |
2035 | return; | 2093 | return; |
2036 | } | 2094 | } |
2037 | 2095 | ||
2038 | /* Check and update the rcu_dyntick_drain sequencing. */ | 2096 | /* Check and update the rcu_dyntick_drain sequencing. */ |
2039 | if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { | 2097 | if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { |
2040 | /* First time through, initialize the counter. */ | 2098 | /* First time through, initialize the counter. */ |
2041 | per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES; | 2099 | per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES; |
2042 | } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { | 2100 | } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { |
2101 | /* Can we go dyntick-idle despite still having callbacks? */ | ||
2102 | if (!rcu_pending(cpu)) { | ||
2103 | trace_rcu_prep_idle("Dyntick with callbacks"); | ||
2104 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; | ||
2105 | hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu), | ||
2106 | rcu_idle_gp_wait, HRTIMER_MODE_REL); | ||
2107 | return; /* Nothing more to do immediately. */ | ||
2108 | } | ||
2109 | |||
2043 | /* We have hit the limit, so time to give up. */ | 2110 | /* We have hit the limit, so time to give up. */ |
2044 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; | 2111 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; |
2045 | local_irq_restore(flags); | 2112 | local_irq_restore(flags); |
2046 | trace_rcu_prep_idle("Begin holdoff"); | 2113 | trace_rcu_prep_idle("Begin holdoff"); |
2047 | invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ | 2114 | invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ |
2048 | return; | 2115 | return; |
2049 | } | 2116 | } |
2050 | 2117 | ||
2051 | /* | 2118 | /* |
2052 | * Do one step of pushing the remaining RCU callbacks through | 2119 | * Do one step of pushing the remaining RCU callbacks through |
2053 | * the RCU core state machine. | 2120 | * the RCU core state machine. |
2054 | */ | 2121 | */ |
2055 | #ifdef CONFIG_TREE_PREEMPT_RCU | 2122 | #ifdef CONFIG_TREE_PREEMPT_RCU |
2056 | if (per_cpu(rcu_preempt_data, cpu).nxtlist) { | 2123 | if (per_cpu(rcu_preempt_data, cpu).nxtlist) { |
2057 | local_irq_restore(flags); | 2124 | local_irq_restore(flags); |
2058 | rcu_preempt_qs(cpu); | 2125 | rcu_preempt_qs(cpu); |
2059 | force_quiescent_state(&rcu_preempt_state, 0); | 2126 | force_quiescent_state(&rcu_preempt_state, 0); |
2060 | local_irq_save(flags); | 2127 | local_irq_save(flags); |
2061 | } | 2128 | } |
2062 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 2129 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
2063 | if (per_cpu(rcu_sched_data, cpu).nxtlist) { | 2130 | if (per_cpu(rcu_sched_data, cpu).nxtlist) { |
2064 | local_irq_restore(flags); | 2131 | local_irq_restore(flags); |
2065 | rcu_sched_qs(cpu); | 2132 | rcu_sched_qs(cpu); |
2066 | force_quiescent_state(&rcu_sched_state, 0); | 2133 | force_quiescent_state(&rcu_sched_state, 0); |
2067 | local_irq_save(flags); | 2134 | local_irq_save(flags); |
2068 | } | 2135 | } |
2069 | if (per_cpu(rcu_bh_data, cpu).nxtlist) { | 2136 | if (per_cpu(rcu_bh_data, cpu).nxtlist) { |
2070 | local_irq_restore(flags); | 2137 | local_irq_restore(flags); |
2071 | rcu_bh_qs(cpu); | 2138 | rcu_bh_qs(cpu); |
2072 | force_quiescent_state(&rcu_bh_state, 0); | 2139 | force_quiescent_state(&rcu_bh_state, 0); |
2073 | local_irq_save(flags); | 2140 | local_irq_save(flags); |
2074 | } | 2141 | } |
2075 | 2142 | ||
2076 | /* | 2143 | /* |
2077 | * If RCU callbacks are still pending, RCU still needs this CPU. | 2144 | * If RCU callbacks are still pending, RCU still needs this CPU. |
2078 | * So try forcing the callbacks through the grace period. | 2145 | * So try forcing the callbacks through the grace period. |
2079 | */ | 2146 | */ |
2080 | if (rcu_cpu_has_callbacks(cpu)) { | 2147 | if (rcu_cpu_has_callbacks(cpu)) { |
2081 | local_irq_restore(flags); | 2148 | local_irq_restore(flags); |
2082 | trace_rcu_prep_idle("More callbacks"); | 2149 | trace_rcu_prep_idle("More callbacks"); |
2083 | invoke_rcu_core(); | 2150 | invoke_rcu_core(); |
2084 | } else { | 2151 | } else { |
2085 | local_irq_restore(flags); | 2152 | local_irq_restore(flags); |
2086 | trace_rcu_prep_idle("Callbacks drained"); | 2153 | trace_rcu_prep_idle("Callbacks drained"); |
2087 | } | 2154 | } |
2088 | } | 2155 | } |
2089 | 2156 | ||
2090 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ | 2157 | #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ |