Commit 2439b696cb5303f1eeb6aeebcee19e0056c3dd6e

Authored by Paul E. McKenney
1 parent 7807acdb6b

rcu: Shrink TINY_RCU by moving exit_rcu()

Now that TINY_PREEMPT_RCU is no more, exit_rcu() is always an empty
function.  But if TINY_RCU is going to have an empty function, it should
be in include/linux/rcutiny.h, where it does not bloat the kernel.
This commit therefore moves exit_rcu() out of kernel/rcupdate.c to
kernel/rcutree_plugin.h, and places a static inline empty function in
include/linux/rcutiny.h in order to shrink TINY_RCU a bit.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>

Showing 5 changed files with 33 additions and 27 deletions Inline Diff

include/linux/rcupdate.h
1 /* 1 /*
2 * Read-Copy Update mechanism for mutual exclusion 2 * Read-Copy Update mechanism for mutual exclusion
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * 17 *
18 * Copyright IBM Corporation, 2001 18 * Copyright IBM Corporation, 2001
19 * 19 *
20 * Author: Dipankar Sarma <dipankar@in.ibm.com> 20 * Author: Dipankar Sarma <dipankar@in.ibm.com>
21 * 21 *
22 * Based on the original work by Paul McKenney <paulmck@us.ibm.com> 22 * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
23 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 23 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
24 * Papers: 24 * Papers:
25 * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf 25 * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
26 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) 26 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
27 * 27 *
28 * For detailed explanation of Read-Copy Update mechanism see - 28 * For detailed explanation of Read-Copy Update mechanism see -
29 * http://lse.sourceforge.net/locking/rcupdate.html 29 * http://lse.sourceforge.net/locking/rcupdate.html
30 * 30 *
31 */ 31 */
32 32
33 #ifndef __LINUX_RCUPDATE_H 33 #ifndef __LINUX_RCUPDATE_H
34 #define __LINUX_RCUPDATE_H 34 #define __LINUX_RCUPDATE_H
35 35
36 #include <linux/types.h> 36 #include <linux/types.h>
37 #include <linux/cache.h> 37 #include <linux/cache.h>
38 #include <linux/spinlock.h> 38 #include <linux/spinlock.h>
39 #include <linux/threads.h> 39 #include <linux/threads.h>
40 #include <linux/cpumask.h> 40 #include <linux/cpumask.h>
41 #include <linux/seqlock.h> 41 #include <linux/seqlock.h>
42 #include <linux/lockdep.h> 42 #include <linux/lockdep.h>
43 #include <linux/completion.h> 43 #include <linux/completion.h>
44 #include <linux/debugobjects.h> 44 #include <linux/debugobjects.h>
45 #include <linux/bug.h> 45 #include <linux/bug.h>
46 #include <linux/compiler.h> 46 #include <linux/compiler.h>
47 47
48 #ifdef CONFIG_RCU_TORTURE_TEST 48 #ifdef CONFIG_RCU_TORTURE_TEST
49 extern int rcutorture_runnable; /* for sysctl */ 49 extern int rcutorture_runnable; /* for sysctl */
50 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ 50 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
51 51
52 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) 52 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
53 extern void rcutorture_record_test_transition(void); 53 extern void rcutorture_record_test_transition(void);
54 extern void rcutorture_record_progress(unsigned long vernum); 54 extern void rcutorture_record_progress(unsigned long vernum);
55 extern void do_trace_rcu_torture_read(char *rcutorturename, 55 extern void do_trace_rcu_torture_read(char *rcutorturename,
56 struct rcu_head *rhp, 56 struct rcu_head *rhp,
57 unsigned long secs, 57 unsigned long secs,
58 unsigned long c_old, 58 unsigned long c_old,
59 unsigned long c); 59 unsigned long c);
60 #else 60 #else
61 static inline void rcutorture_record_test_transition(void) 61 static inline void rcutorture_record_test_transition(void)
62 { 62 {
63 } 63 }
64 static inline void rcutorture_record_progress(unsigned long vernum) 64 static inline void rcutorture_record_progress(unsigned long vernum)
65 { 65 {
66 } 66 }
67 #ifdef CONFIG_RCU_TRACE 67 #ifdef CONFIG_RCU_TRACE
68 extern void do_trace_rcu_torture_read(char *rcutorturename, 68 extern void do_trace_rcu_torture_read(char *rcutorturename,
69 struct rcu_head *rhp, 69 struct rcu_head *rhp,
70 unsigned long secs, 70 unsigned long secs,
71 unsigned long c_old, 71 unsigned long c_old,
72 unsigned long c); 72 unsigned long c);
73 #else 73 #else
74 #define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \ 74 #define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
75 do { } while (0) 75 do { } while (0)
76 #endif 76 #endif
77 #endif 77 #endif
78 78
79 #define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b)) 79 #define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b))
80 #define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b)) 80 #define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b))
81 #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) 81 #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
82 #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) 82 #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
83 #define ulong2long(a) (*(long *)(&(a))) 83 #define ulong2long(a) (*(long *)(&(a)))
84 84
85 /* Exported common interfaces */ 85 /* Exported common interfaces */
86 86
87 #ifdef CONFIG_PREEMPT_RCU 87 #ifdef CONFIG_PREEMPT_RCU
88 88
89 /** 89 /**
90 * call_rcu() - Queue an RCU callback for invocation after a grace period. 90 * call_rcu() - Queue an RCU callback for invocation after a grace period.
91 * @head: structure to be used for queueing the RCU updates. 91 * @head: structure to be used for queueing the RCU updates.
92 * @func: actual callback function to be invoked after the grace period 92 * @func: actual callback function to be invoked after the grace period
93 * 93 *
94 * The callback function will be invoked some time after a full grace 94 * The callback function will be invoked some time after a full grace
95 * period elapses, in other words after all pre-existing RCU read-side 95 * period elapses, in other words after all pre-existing RCU read-side
96 * critical sections have completed. However, the callback function 96 * critical sections have completed. However, the callback function
97 * might well execute concurrently with RCU read-side critical sections 97 * might well execute concurrently with RCU read-side critical sections
98 * that started after call_rcu() was invoked. RCU read-side critical 98 * that started after call_rcu() was invoked. RCU read-side critical
99 * sections are delimited by rcu_read_lock() and rcu_read_unlock(), 99 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
100 * and may be nested. 100 * and may be nested.
101 * 101 *
102 * Note that all CPUs must agree that the grace period extended beyond 102 * Note that all CPUs must agree that the grace period extended beyond
103 * all pre-existing RCU read-side critical section. On systems with more 103 * all pre-existing RCU read-side critical section. On systems with more
104 * than one CPU, this means that when "func()" is invoked, each CPU is 104 * than one CPU, this means that when "func()" is invoked, each CPU is
105 * guaranteed to have executed a full memory barrier since the end of its 105 * guaranteed to have executed a full memory barrier since the end of its
106 * last RCU read-side critical section whose beginning preceded the call 106 * last RCU read-side critical section whose beginning preceded the call
107 * to call_rcu(). It also means that each CPU executing an RCU read-side 107 * to call_rcu(). It also means that each CPU executing an RCU read-side
108 * critical section that continues beyond the start of "func()" must have 108 * critical section that continues beyond the start of "func()" must have
109 * executed a memory barrier after the call_rcu() but before the beginning 109 * executed a memory barrier after the call_rcu() but before the beginning
110 * of that RCU read-side critical section. Note that these guarantees 110 * of that RCU read-side critical section. Note that these guarantees
111 * include CPUs that are offline, idle, or executing in user mode, as 111 * include CPUs that are offline, idle, or executing in user mode, as
112 * well as CPUs that are executing in the kernel. 112 * well as CPUs that are executing in the kernel.
113 * 113 *
114 * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the 114 * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
115 * resulting RCU callback function "func()", then both CPU A and CPU B are 115 * resulting RCU callback function "func()", then both CPU A and CPU B are
116 * guaranteed to execute a full memory barrier during the time interval 116 * guaranteed to execute a full memory barrier during the time interval
117 * between the call to call_rcu() and the invocation of "func()" -- even 117 * between the call to call_rcu() and the invocation of "func()" -- even
118 * if CPU A and CPU B are the same CPU (but again only if the system has 118 * if CPU A and CPU B are the same CPU (but again only if the system has
119 * more than one CPU). 119 * more than one CPU).
120 */ 120 */
121 extern void call_rcu(struct rcu_head *head, 121 extern void call_rcu(struct rcu_head *head,
122 void (*func)(struct rcu_head *head)); 122 void (*func)(struct rcu_head *head));
123 123
124 #else /* #ifdef CONFIG_PREEMPT_RCU */ 124 #else /* #ifdef CONFIG_PREEMPT_RCU */
125 125
126 /* In classic RCU, call_rcu() is just call_rcu_sched(). */ 126 /* In classic RCU, call_rcu() is just call_rcu_sched(). */
127 #define call_rcu call_rcu_sched 127 #define call_rcu call_rcu_sched
128 128
129 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ 129 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
130 130
131 /** 131 /**
132 * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. 132 * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
133 * @head: structure to be used for queueing the RCU updates. 133 * @head: structure to be used for queueing the RCU updates.
134 * @func: actual callback function to be invoked after the grace period 134 * @func: actual callback function to be invoked after the grace period
135 * 135 *
136 * The callback function will be invoked some time after a full grace 136 * The callback function will be invoked some time after a full grace
137 * period elapses, in other words after all currently executing RCU 137 * period elapses, in other words after all currently executing RCU
138 * read-side critical sections have completed. call_rcu_bh() assumes 138 * read-side critical sections have completed. call_rcu_bh() assumes
139 * that the read-side critical sections end on completion of a softirq 139 * that the read-side critical sections end on completion of a softirq
140 * handler. This means that read-side critical sections in process 140 * handler. This means that read-side critical sections in process
141 * context must not be interrupted by softirqs. This interface is to be 141 * context must not be interrupted by softirqs. This interface is to be
142 * used when most of the read-side critical sections are in softirq context. 142 * used when most of the read-side critical sections are in softirq context.
143 * RCU read-side critical sections are delimited by : 143 * RCU read-side critical sections are delimited by :
144 * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context. 144 * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context.
145 * OR 145 * OR
146 * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. 146 * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
147 * These may be nested. 147 * These may be nested.
148 * 148 *
149 * See the description of call_rcu() for more detailed information on 149 * See the description of call_rcu() for more detailed information on
150 * memory ordering guarantees. 150 * memory ordering guarantees.
151 */ 151 */
152 extern void call_rcu_bh(struct rcu_head *head, 152 extern void call_rcu_bh(struct rcu_head *head,
153 void (*func)(struct rcu_head *head)); 153 void (*func)(struct rcu_head *head));
154 154
155 /** 155 /**
156 * call_rcu_sched() - Queue an RCU for invocation after sched grace period. 156 * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
157 * @head: structure to be used for queueing the RCU updates. 157 * @head: structure to be used for queueing the RCU updates.
158 * @func: actual callback function to be invoked after the grace period 158 * @func: actual callback function to be invoked after the grace period
159 * 159 *
160 * The callback function will be invoked some time after a full grace 160 * The callback function will be invoked some time after a full grace
161 * period elapses, in other words after all currently executing RCU 161 * period elapses, in other words after all currently executing RCU
162 * read-side critical sections have completed. call_rcu_sched() assumes 162 * read-side critical sections have completed. call_rcu_sched() assumes
163 * that the read-side critical sections end on enabling of preemption 163 * that the read-side critical sections end on enabling of preemption
164 * or on voluntary preemption. 164 * or on voluntary preemption.
165 * RCU read-side critical sections are delimited by : 165 * RCU read-side critical sections are delimited by :
166 * - rcu_read_lock_sched() and rcu_read_unlock_sched(), 166 * - rcu_read_lock_sched() and rcu_read_unlock_sched(),
167 * OR 167 * OR
168 * anything that disables preemption. 168 * anything that disables preemption.
169 * These may be nested. 169 * These may be nested.
170 * 170 *
171 * See the description of call_rcu() for more detailed information on 171 * See the description of call_rcu() for more detailed information on
172 * memory ordering guarantees. 172 * memory ordering guarantees.
173 */ 173 */
174 extern void call_rcu_sched(struct rcu_head *head, 174 extern void call_rcu_sched(struct rcu_head *head,
175 void (*func)(struct rcu_head *rcu)); 175 void (*func)(struct rcu_head *rcu));
176 176
177 extern void synchronize_sched(void); 177 extern void synchronize_sched(void);
178 178
179 #ifdef CONFIG_PREEMPT_RCU 179 #ifdef CONFIG_PREEMPT_RCU
180 180
181 extern void __rcu_read_lock(void); 181 extern void __rcu_read_lock(void);
182 extern void __rcu_read_unlock(void); 182 extern void __rcu_read_unlock(void);
183 extern void rcu_read_unlock_special(struct task_struct *t); 183 extern void rcu_read_unlock_special(struct task_struct *t);
184 void synchronize_rcu(void); 184 void synchronize_rcu(void);
185 185
186 /* 186 /*
187 * Defined as a macro as it is a very low level header included from 187 * Defined as a macro as it is a very low level header included from
188 * areas that don't even know about current. This gives the rcu_read_lock() 188 * areas that don't even know about current. This gives the rcu_read_lock()
189 * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other 189 * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other
190 * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. 190 * types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
191 */ 191 */
192 #define rcu_preempt_depth() (current->rcu_read_lock_nesting) 192 #define rcu_preempt_depth() (current->rcu_read_lock_nesting)
193 193
194 #else /* #ifdef CONFIG_PREEMPT_RCU */ 194 #else /* #ifdef CONFIG_PREEMPT_RCU */
195 195
196 static inline void __rcu_read_lock(void) 196 static inline void __rcu_read_lock(void)
197 { 197 {
198 preempt_disable(); 198 preempt_disable();
199 } 199 }
200 200
201 static inline void __rcu_read_unlock(void) 201 static inline void __rcu_read_unlock(void)
202 { 202 {
203 preempt_enable(); 203 preempt_enable();
204 } 204 }
205 205
206 static inline void synchronize_rcu(void) 206 static inline void synchronize_rcu(void)
207 { 207 {
208 synchronize_sched(); 208 synchronize_sched();
209 } 209 }
210 210
211 static inline int rcu_preempt_depth(void) 211 static inline int rcu_preempt_depth(void)
212 { 212 {
213 return 0; 213 return 0;
214 } 214 }
215 215
216 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ 216 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
217 217
218 /* Internal to kernel */ 218 /* Internal to kernel */
219 extern void rcu_init(void); 219 extern void rcu_init(void);
220 extern void rcu_sched_qs(int cpu); 220 extern void rcu_sched_qs(int cpu);
221 extern void rcu_bh_qs(int cpu); 221 extern void rcu_bh_qs(int cpu);
222 extern void rcu_check_callbacks(int cpu, int user); 222 extern void rcu_check_callbacks(int cpu, int user);
223 struct notifier_block; 223 struct notifier_block;
224 extern void rcu_idle_enter(void); 224 extern void rcu_idle_enter(void);
225 extern void rcu_idle_exit(void); 225 extern void rcu_idle_exit(void);
226 extern void rcu_irq_enter(void); 226 extern void rcu_irq_enter(void);
227 extern void rcu_irq_exit(void); 227 extern void rcu_irq_exit(void);
228 228
229 #ifdef CONFIG_RCU_USER_QS 229 #ifdef CONFIG_RCU_USER_QS
230 extern void rcu_user_enter(void); 230 extern void rcu_user_enter(void);
231 extern void rcu_user_exit(void); 231 extern void rcu_user_exit(void);
232 extern void rcu_user_enter_after_irq(void); 232 extern void rcu_user_enter_after_irq(void);
233 extern void rcu_user_exit_after_irq(void); 233 extern void rcu_user_exit_after_irq(void);
234 #else 234 #else
235 static inline void rcu_user_enter(void) { } 235 static inline void rcu_user_enter(void) { }
236 static inline void rcu_user_exit(void) { } 236 static inline void rcu_user_exit(void) { }
237 static inline void rcu_user_enter_after_irq(void) { } 237 static inline void rcu_user_enter_after_irq(void) { }
238 static inline void rcu_user_exit_after_irq(void) { } 238 static inline void rcu_user_exit_after_irq(void) { }
239 static inline void rcu_user_hooks_switch(struct task_struct *prev, 239 static inline void rcu_user_hooks_switch(struct task_struct *prev,
240 struct task_struct *next) { } 240 struct task_struct *next) { }
241 #endif /* CONFIG_RCU_USER_QS */ 241 #endif /* CONFIG_RCU_USER_QS */
242 242
243 extern void exit_rcu(void);
244
245 /** 243 /**
246 * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers 244 * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers
247 * @a: Code that RCU needs to pay attention to. 245 * @a: Code that RCU needs to pay attention to.
248 * 246 *
249 * RCU, RCU-bh, and RCU-sched read-side critical sections are forbidden 247 * RCU, RCU-bh, and RCU-sched read-side critical sections are forbidden
250 * in the inner idle loop, that is, between the rcu_idle_enter() and 248 * in the inner idle loop, that is, between the rcu_idle_enter() and
251 * the rcu_idle_exit() -- RCU will happily ignore any such read-side 249 * the rcu_idle_exit() -- RCU will happily ignore any such read-side
252 * critical sections. However, things like powertop need tracepoints 250 * critical sections. However, things like powertop need tracepoints
253 * in the inner idle loop. 251 * in the inner idle loop.
254 * 252 *
255 * This macro provides the way out: RCU_NONIDLE(do_something_with_RCU()) 253 * This macro provides the way out: RCU_NONIDLE(do_something_with_RCU())
256 * will tell RCU that it needs to pay attending, invoke its argument 254 * will tell RCU that it needs to pay attending, invoke its argument
257 * (in this example, a call to the do_something_with_RCU() function), 255 * (in this example, a call to the do_something_with_RCU() function),
258 * and then tell RCU to go back to ignoring this CPU. It is permissible 256 * and then tell RCU to go back to ignoring this CPU. It is permissible
259 * to nest RCU_NONIDLE() wrappers, but the nesting level is currently 257 * to nest RCU_NONIDLE() wrappers, but the nesting level is currently
260 * quite limited. If deeper nesting is required, it will be necessary 258 * quite limited. If deeper nesting is required, it will be necessary
261 * to adjust DYNTICK_TASK_NESTING_VALUE accordingly. 259 * to adjust DYNTICK_TASK_NESTING_VALUE accordingly.
262 */ 260 */
263 #define RCU_NONIDLE(a) \ 261 #define RCU_NONIDLE(a) \
264 do { \ 262 do { \
265 rcu_irq_enter(); \ 263 rcu_irq_enter(); \
266 do { a; } while (0); \ 264 do { a; } while (0); \
267 rcu_irq_exit(); \ 265 rcu_irq_exit(); \
268 } while (0) 266 } while (0)
269 267
270 /* 268 /*
271 * Infrastructure to implement the synchronize_() primitives in 269 * Infrastructure to implement the synchronize_() primitives in
272 * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. 270 * TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
273 */ 271 */
274 272
275 typedef void call_rcu_func_t(struct rcu_head *head, 273 typedef void call_rcu_func_t(struct rcu_head *head,
276 void (*func)(struct rcu_head *head)); 274 void (*func)(struct rcu_head *head));
277 void wait_rcu_gp(call_rcu_func_t crf); 275 void wait_rcu_gp(call_rcu_func_t crf);
278 276
279 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) 277 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
280 #include <linux/rcutree.h> 278 #include <linux/rcutree.h>
281 #elif defined(CONFIG_TINY_RCU) 279 #elif defined(CONFIG_TINY_RCU)
282 #include <linux/rcutiny.h> 280 #include <linux/rcutiny.h>
283 #else 281 #else
284 #error "Unknown RCU implementation specified to kernel configuration" 282 #error "Unknown RCU implementation specified to kernel configuration"
285 #endif 283 #endif
286 284
287 /* 285 /*
288 * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic 286 * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic
289 * initialization and destruction of rcu_head on the stack. rcu_head structures 287 * initialization and destruction of rcu_head on the stack. rcu_head structures
290 * allocated dynamically in the heap or defined statically don't need any 288 * allocated dynamically in the heap or defined statically don't need any
291 * initialization. 289 * initialization.
292 */ 290 */
293 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD 291 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
294 extern void init_rcu_head_on_stack(struct rcu_head *head); 292 extern void init_rcu_head_on_stack(struct rcu_head *head);
295 extern void destroy_rcu_head_on_stack(struct rcu_head *head); 293 extern void destroy_rcu_head_on_stack(struct rcu_head *head);
296 #else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 294 #else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
297 static inline void init_rcu_head_on_stack(struct rcu_head *head) 295 static inline void init_rcu_head_on_stack(struct rcu_head *head)
298 { 296 {
299 } 297 }
300 298
301 static inline void destroy_rcu_head_on_stack(struct rcu_head *head) 299 static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
302 { 300 {
303 } 301 }
304 #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 302 #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
305 303
306 #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) 304 #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP)
307 extern int rcu_is_cpu_idle(void); 305 extern int rcu_is_cpu_idle(void);
308 #endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) */ 306 #endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SMP) */
309 307
310 #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) 308 #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU)
311 bool rcu_lockdep_current_cpu_online(void); 309 bool rcu_lockdep_current_cpu_online(void);
312 #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ 310 #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
313 static inline bool rcu_lockdep_current_cpu_online(void) 311 static inline bool rcu_lockdep_current_cpu_online(void)
314 { 312 {
315 return 1; 313 return 1;
316 } 314 }
317 #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ 315 #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
318 316
319 #ifdef CONFIG_DEBUG_LOCK_ALLOC 317 #ifdef CONFIG_DEBUG_LOCK_ALLOC
320 318
321 static inline void rcu_lock_acquire(struct lockdep_map *map) 319 static inline void rcu_lock_acquire(struct lockdep_map *map)
322 { 320 {
323 lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_); 321 lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_);
324 } 322 }
325 323
326 static inline void rcu_lock_release(struct lockdep_map *map) 324 static inline void rcu_lock_release(struct lockdep_map *map)
327 { 325 {
328 lock_release(map, 1, _THIS_IP_); 326 lock_release(map, 1, _THIS_IP_);
329 } 327 }
330 328
331 extern struct lockdep_map rcu_lock_map; 329 extern struct lockdep_map rcu_lock_map;
332 extern struct lockdep_map rcu_bh_lock_map; 330 extern struct lockdep_map rcu_bh_lock_map;
333 extern struct lockdep_map rcu_sched_lock_map; 331 extern struct lockdep_map rcu_sched_lock_map;
334 extern int debug_lockdep_rcu_enabled(void); 332 extern int debug_lockdep_rcu_enabled(void);
335 333
336 /** 334 /**
337 * rcu_read_lock_held() - might we be in RCU read-side critical section? 335 * rcu_read_lock_held() - might we be in RCU read-side critical section?
338 * 336 *
339 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU 337 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
340 * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, 338 * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC,
341 * this assumes we are in an RCU read-side critical section unless it can 339 * this assumes we are in an RCU read-side critical section unless it can
342 * prove otherwise. This is useful for debug checks in functions that 340 * prove otherwise. This is useful for debug checks in functions that
343 * require that they be called within an RCU read-side critical section. 341 * require that they be called within an RCU read-side critical section.
344 * 342 *
345 * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot 343 * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
346 * and while lockdep is disabled. 344 * and while lockdep is disabled.
347 * 345 *
348 * Note that rcu_read_lock() and the matching rcu_read_unlock() must 346 * Note that rcu_read_lock() and the matching rcu_read_unlock() must
349 * occur in the same context, for example, it is illegal to invoke 347 * occur in the same context, for example, it is illegal to invoke
350 * rcu_read_unlock() in process context if the matching rcu_read_lock() 348 * rcu_read_unlock() in process context if the matching rcu_read_lock()
351 * was invoked from within an irq handler. 349 * was invoked from within an irq handler.
352 * 350 *
353 * Note that rcu_read_lock() is disallowed if the CPU is either idle or 351 * Note that rcu_read_lock() is disallowed if the CPU is either idle or
354 * offline from an RCU perspective, so check for those as well. 352 * offline from an RCU perspective, so check for those as well.
355 */ 353 */
356 static inline int rcu_read_lock_held(void) 354 static inline int rcu_read_lock_held(void)
357 { 355 {
358 if (!debug_lockdep_rcu_enabled()) 356 if (!debug_lockdep_rcu_enabled())
359 return 1; 357 return 1;
360 if (rcu_is_cpu_idle()) 358 if (rcu_is_cpu_idle())
361 return 0; 359 return 0;
362 if (!rcu_lockdep_current_cpu_online()) 360 if (!rcu_lockdep_current_cpu_online())
363 return 0; 361 return 0;
364 return lock_is_held(&rcu_lock_map); 362 return lock_is_held(&rcu_lock_map);
365 } 363 }
366 364
367 /* 365 /*
368 * rcu_read_lock_bh_held() is defined out of line to avoid #include-file 366 * rcu_read_lock_bh_held() is defined out of line to avoid #include-file
369 * hell. 367 * hell.
370 */ 368 */
371 extern int rcu_read_lock_bh_held(void); 369 extern int rcu_read_lock_bh_held(void);
372 370
373 /** 371 /**
374 * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? 372 * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
375 * 373 *
376 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an 374 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an
377 * RCU-sched read-side critical section. In absence of 375 * RCU-sched read-side critical section. In absence of
378 * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side 376 * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
379 * critical section unless it can prove otherwise. Note that disabling 377 * critical section unless it can prove otherwise. Note that disabling
380 * of preemption (including disabling irqs) counts as an RCU-sched 378 * of preemption (including disabling irqs) counts as an RCU-sched
381 * read-side critical section. This is useful for debug checks in functions 379 * read-side critical section. This is useful for debug checks in functions
382 * that required that they be called within an RCU-sched read-side 380 * that required that they be called within an RCU-sched read-side
383 * critical section. 381 * critical section.
384 * 382 *
385 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot 383 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot
386 * and while lockdep is disabled. 384 * and while lockdep is disabled.
387 * 385 *
388 * Note that if the CPU is in the idle loop from an RCU point of 386 * Note that if the CPU is in the idle loop from an RCU point of
389 * view (ie: that we are in the section between rcu_idle_enter() and 387 * view (ie: that we are in the section between rcu_idle_enter() and
390 * rcu_idle_exit()) then rcu_read_lock_held() returns false even if the CPU 388 * rcu_idle_exit()) then rcu_read_lock_held() returns false even if the CPU
391 * did an rcu_read_lock(). The reason for this is that RCU ignores CPUs 389 * did an rcu_read_lock(). The reason for this is that RCU ignores CPUs
392 * that are in such a section, considering these as in extended quiescent 390 * that are in such a section, considering these as in extended quiescent
393 * state, so such a CPU is effectively never in an RCU read-side critical 391 * state, so such a CPU is effectively never in an RCU read-side critical
394 * section regardless of what RCU primitives it invokes. This state of 392 * section regardless of what RCU primitives it invokes. This state of
395 * affairs is required --- we need to keep an RCU-free window in idle 393 * affairs is required --- we need to keep an RCU-free window in idle
396 * where the CPU may possibly enter into low power mode. This way we can 394 * where the CPU may possibly enter into low power mode. This way we can
397 * notice an extended quiescent state to other CPUs that started a grace 395 * notice an extended quiescent state to other CPUs that started a grace
398 * period. Otherwise we would delay any grace period as long as we run in 396 * period. Otherwise we would delay any grace period as long as we run in
399 * the idle task. 397 * the idle task.
400 * 398 *
401 * Similarly, we avoid claiming an SRCU read lock held if the current 399 * Similarly, we avoid claiming an SRCU read lock held if the current
402 * CPU is offline. 400 * CPU is offline.
403 */ 401 */
404 #ifdef CONFIG_PREEMPT_COUNT 402 #ifdef CONFIG_PREEMPT_COUNT
405 static inline int rcu_read_lock_sched_held(void) 403 static inline int rcu_read_lock_sched_held(void)
406 { 404 {
407 int lockdep_opinion = 0; 405 int lockdep_opinion = 0;
408 406
409 if (!debug_lockdep_rcu_enabled()) 407 if (!debug_lockdep_rcu_enabled())
410 return 1; 408 return 1;
411 if (rcu_is_cpu_idle()) 409 if (rcu_is_cpu_idle())
412 return 0; 410 return 0;
413 if (!rcu_lockdep_current_cpu_online()) 411 if (!rcu_lockdep_current_cpu_online())
414 return 0; 412 return 0;
415 if (debug_locks) 413 if (debug_locks)
416 lockdep_opinion = lock_is_held(&rcu_sched_lock_map); 414 lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
417 return lockdep_opinion || preempt_count() != 0 || irqs_disabled(); 415 return lockdep_opinion || preempt_count() != 0 || irqs_disabled();
418 } 416 }
419 #else /* #ifdef CONFIG_PREEMPT_COUNT */ 417 #else /* #ifdef CONFIG_PREEMPT_COUNT */
420 static inline int rcu_read_lock_sched_held(void) 418 static inline int rcu_read_lock_sched_held(void)
421 { 419 {
422 return 1; 420 return 1;
423 } 421 }
424 #endif /* #else #ifdef CONFIG_PREEMPT_COUNT */ 422 #endif /* #else #ifdef CONFIG_PREEMPT_COUNT */
425 423
426 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 424 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
427 425
428 # define rcu_lock_acquire(a) do { } while (0) 426 # define rcu_lock_acquire(a) do { } while (0)
429 # define rcu_lock_release(a) do { } while (0) 427 # define rcu_lock_release(a) do { } while (0)
430 428
431 static inline int rcu_read_lock_held(void) 429 static inline int rcu_read_lock_held(void)
432 { 430 {
433 return 1; 431 return 1;
434 } 432 }
435 433
436 static inline int rcu_read_lock_bh_held(void) 434 static inline int rcu_read_lock_bh_held(void)
437 { 435 {
438 return 1; 436 return 1;
439 } 437 }
440 438
441 #ifdef CONFIG_PREEMPT_COUNT 439 #ifdef CONFIG_PREEMPT_COUNT
442 static inline int rcu_read_lock_sched_held(void) 440 static inline int rcu_read_lock_sched_held(void)
443 { 441 {
444 return preempt_count() != 0 || irqs_disabled(); 442 return preempt_count() != 0 || irqs_disabled();
445 } 443 }
446 #else /* #ifdef CONFIG_PREEMPT_COUNT */ 444 #else /* #ifdef CONFIG_PREEMPT_COUNT */
447 static inline int rcu_read_lock_sched_held(void) 445 static inline int rcu_read_lock_sched_held(void)
448 { 446 {
449 return 1; 447 return 1;
450 } 448 }
451 #endif /* #else #ifdef CONFIG_PREEMPT_COUNT */ 449 #endif /* #else #ifdef CONFIG_PREEMPT_COUNT */
452 450
453 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 451 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
454 452
455 #ifdef CONFIG_PROVE_RCU 453 #ifdef CONFIG_PROVE_RCU
456 454
457 extern int rcu_my_thread_group_empty(void); 455 extern int rcu_my_thread_group_empty(void);
458 456
459 /** 457 /**
460 * rcu_lockdep_assert - emit lockdep splat if specified condition not met 458 * rcu_lockdep_assert - emit lockdep splat if specified condition not met
461 * @c: condition to check 459 * @c: condition to check
462 * @s: informative message 460 * @s: informative message
463 */ 461 */
464 #define rcu_lockdep_assert(c, s) \ 462 #define rcu_lockdep_assert(c, s) \
465 do { \ 463 do { \
466 static bool __section(.data.unlikely) __warned; \ 464 static bool __section(.data.unlikely) __warned; \
467 if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ 465 if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \
468 __warned = true; \ 466 __warned = true; \
469 lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ 467 lockdep_rcu_suspicious(__FILE__, __LINE__, s); \
470 } \ 468 } \
471 } while (0) 469 } while (0)
472 470
473 #if defined(CONFIG_PROVE_RCU) && !defined(CONFIG_PREEMPT_RCU) 471 #if defined(CONFIG_PROVE_RCU) && !defined(CONFIG_PREEMPT_RCU)
474 static inline void rcu_preempt_sleep_check(void) 472 static inline void rcu_preempt_sleep_check(void)
475 { 473 {
476 rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), 474 rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
477 "Illegal context switch in RCU read-side critical section"); 475 "Illegal context switch in RCU read-side critical section");
478 } 476 }
479 #else /* #ifdef CONFIG_PROVE_RCU */ 477 #else /* #ifdef CONFIG_PROVE_RCU */
480 static inline void rcu_preempt_sleep_check(void) 478 static inline void rcu_preempt_sleep_check(void)
481 { 479 {
482 } 480 }
483 #endif /* #else #ifdef CONFIG_PROVE_RCU */ 481 #endif /* #else #ifdef CONFIG_PROVE_RCU */
484 482
485 #define rcu_sleep_check() \ 483 #define rcu_sleep_check() \
486 do { \ 484 do { \
487 rcu_preempt_sleep_check(); \ 485 rcu_preempt_sleep_check(); \
488 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), \ 486 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), \
489 "Illegal context switch in RCU-bh" \ 487 "Illegal context switch in RCU-bh" \
490 " read-side critical section"); \ 488 " read-side critical section"); \
491 rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), \ 489 rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), \
492 "Illegal context switch in RCU-sched"\ 490 "Illegal context switch in RCU-sched"\
493 " read-side critical section"); \ 491 " read-side critical section"); \
494 } while (0) 492 } while (0)
495 493
496 #else /* #ifdef CONFIG_PROVE_RCU */ 494 #else /* #ifdef CONFIG_PROVE_RCU */
497 495
498 #define rcu_lockdep_assert(c, s) do { } while (0) 496 #define rcu_lockdep_assert(c, s) do { } while (0)
499 #define rcu_sleep_check() do { } while (0) 497 #define rcu_sleep_check() do { } while (0)
500 498
501 #endif /* #else #ifdef CONFIG_PROVE_RCU */ 499 #endif /* #else #ifdef CONFIG_PROVE_RCU */
502 500
503 /* 501 /*
504 * Helper functions for rcu_dereference_check(), rcu_dereference_protected() 502 * Helper functions for rcu_dereference_check(), rcu_dereference_protected()
505 * and rcu_assign_pointer(). Some of these could be folded into their 503 * and rcu_assign_pointer(). Some of these could be folded into their
506 * callers, but they are left separate in order to ease introduction of 504 * callers, but they are left separate in order to ease introduction of
507 * multiple flavors of pointers to match the multiple flavors of RCU 505 * multiple flavors of pointers to match the multiple flavors of RCU
508 * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in 506 * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in
509 * the future. 507 * the future.
510 */ 508 */
511 509
512 #ifdef __CHECKER__ 510 #ifdef __CHECKER__
513 #define rcu_dereference_sparse(p, space) \ 511 #define rcu_dereference_sparse(p, space) \
514 ((void)(((typeof(*p) space *)p) == p)) 512 ((void)(((typeof(*p) space *)p) == p))
515 #else /* #ifdef __CHECKER__ */ 513 #else /* #ifdef __CHECKER__ */
516 #define rcu_dereference_sparse(p, space) 514 #define rcu_dereference_sparse(p, space)
517 #endif /* #else #ifdef __CHECKER__ */ 515 #endif /* #else #ifdef __CHECKER__ */
518 516
519 #define __rcu_access_pointer(p, space) \ 517 #define __rcu_access_pointer(p, space) \
520 ({ \ 518 ({ \
521 typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ 519 typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
522 rcu_dereference_sparse(p, space); \ 520 rcu_dereference_sparse(p, space); \
523 ((typeof(*p) __force __kernel *)(_________p1)); \ 521 ((typeof(*p) __force __kernel *)(_________p1)); \
524 }) 522 })
525 #define __rcu_dereference_check(p, c, space) \ 523 #define __rcu_dereference_check(p, c, space) \
526 ({ \ 524 ({ \
527 typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ 525 typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
528 rcu_lockdep_assert(c, "suspicious rcu_dereference_check()" \ 526 rcu_lockdep_assert(c, "suspicious rcu_dereference_check()" \
529 " usage"); \ 527 " usage"); \
530 rcu_dereference_sparse(p, space); \ 528 rcu_dereference_sparse(p, space); \
531 smp_read_barrier_depends(); \ 529 smp_read_barrier_depends(); \
532 ((typeof(*p) __force __kernel *)(_________p1)); \ 530 ((typeof(*p) __force __kernel *)(_________p1)); \
533 }) 531 })
534 #define __rcu_dereference_protected(p, c, space) \ 532 #define __rcu_dereference_protected(p, c, space) \
535 ({ \ 533 ({ \
536 rcu_lockdep_assert(c, "suspicious rcu_dereference_protected()" \ 534 rcu_lockdep_assert(c, "suspicious rcu_dereference_protected()" \
537 " usage"); \ 535 " usage"); \
538 rcu_dereference_sparse(p, space); \ 536 rcu_dereference_sparse(p, space); \
539 ((typeof(*p) __force __kernel *)(p)); \ 537 ((typeof(*p) __force __kernel *)(p)); \
540 }) 538 })
541 539
542 #define __rcu_access_index(p, space) \ 540 #define __rcu_access_index(p, space) \
543 ({ \ 541 ({ \
544 typeof(p) _________p1 = ACCESS_ONCE(p); \ 542 typeof(p) _________p1 = ACCESS_ONCE(p); \
545 rcu_dereference_sparse(p, space); \ 543 rcu_dereference_sparse(p, space); \
546 (_________p1); \ 544 (_________p1); \
547 }) 545 })
548 #define __rcu_dereference_index_check(p, c) \ 546 #define __rcu_dereference_index_check(p, c) \
549 ({ \ 547 ({ \
550 typeof(p) _________p1 = ACCESS_ONCE(p); \ 548 typeof(p) _________p1 = ACCESS_ONCE(p); \
551 rcu_lockdep_assert(c, \ 549 rcu_lockdep_assert(c, \
552 "suspicious rcu_dereference_index_check()" \ 550 "suspicious rcu_dereference_index_check()" \
553 " usage"); \ 551 " usage"); \
554 smp_read_barrier_depends(); \ 552 smp_read_barrier_depends(); \
555 (_________p1); \ 553 (_________p1); \
556 }) 554 })
557 #define __rcu_assign_pointer(p, v, space) \ 555 #define __rcu_assign_pointer(p, v, space) \
558 do { \ 556 do { \
559 smp_wmb(); \ 557 smp_wmb(); \
560 (p) = (typeof(*v) __force space *)(v); \ 558 (p) = (typeof(*v) __force space *)(v); \
561 } while (0) 559 } while (0)
562 560
563 561
564 /** 562 /**
565 * rcu_access_pointer() - fetch RCU pointer with no dereferencing 563 * rcu_access_pointer() - fetch RCU pointer with no dereferencing
566 * @p: The pointer to read 564 * @p: The pointer to read
567 * 565 *
568 * Return the value of the specified RCU-protected pointer, but omit the 566 * Return the value of the specified RCU-protected pointer, but omit the
569 * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful 567 * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful
570 * when the value of this pointer is accessed, but the pointer is not 568 * when the value of this pointer is accessed, but the pointer is not
571 * dereferenced, for example, when testing an RCU-protected pointer against 569 * dereferenced, for example, when testing an RCU-protected pointer against
572 * NULL. Although rcu_access_pointer() may also be used in cases where 570 * NULL. Although rcu_access_pointer() may also be used in cases where
573 * update-side locks prevent the value of the pointer from changing, you 571 * update-side locks prevent the value of the pointer from changing, you
574 * should instead use rcu_dereference_protected() for this use case. 572 * should instead use rcu_dereference_protected() for this use case.
575 * 573 *
576 * It is also permissible to use rcu_access_pointer() when read-side 574 * It is also permissible to use rcu_access_pointer() when read-side
577 * access to the pointer was removed at least one grace period ago, as 575 * access to the pointer was removed at least one grace period ago, as
578 * is the case in the context of the RCU callback that is freeing up 576 * is the case in the context of the RCU callback that is freeing up
579 * the data, or after a synchronize_rcu() returns. This can be useful 577 * the data, or after a synchronize_rcu() returns. This can be useful
580 * when tearing down multi-linked structures after a grace period 578 * when tearing down multi-linked structures after a grace period
581 * has elapsed. 579 * has elapsed.
582 */ 580 */
583 #define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu) 581 #define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu)
584 582
585 /** 583 /**
586 * rcu_dereference_check() - rcu_dereference with debug checking 584 * rcu_dereference_check() - rcu_dereference with debug checking
587 * @p: The pointer to read, prior to dereferencing 585 * @p: The pointer to read, prior to dereferencing
588 * @c: The conditions under which the dereference will take place 586 * @c: The conditions under which the dereference will take place
589 * 587 *
590 * Do an rcu_dereference(), but check that the conditions under which the 588 * Do an rcu_dereference(), but check that the conditions under which the
591 * dereference will take place are correct. Typically the conditions 589 * dereference will take place are correct. Typically the conditions
592 * indicate the various locking conditions that should be held at that 590 * indicate the various locking conditions that should be held at that
593 * point. The check should return true if the conditions are satisfied. 591 * point. The check should return true if the conditions are satisfied.
594 * An implicit check for being in an RCU read-side critical section 592 * An implicit check for being in an RCU read-side critical section
595 * (rcu_read_lock()) is included. 593 * (rcu_read_lock()) is included.
596 * 594 *
597 * For example: 595 * For example:
598 * 596 *
599 * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock)); 597 * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock));
600 * 598 *
601 * could be used to indicate to lockdep that foo->bar may only be dereferenced 599 * could be used to indicate to lockdep that foo->bar may only be dereferenced
602 * if either rcu_read_lock() is held, or that the lock required to replace 600 * if either rcu_read_lock() is held, or that the lock required to replace
603 * the bar struct at foo->bar is held. 601 * the bar struct at foo->bar is held.
604 * 602 *
605 * Note that the list of conditions may also include indications of when a lock 603 * Note that the list of conditions may also include indications of when a lock
606 * need not be held, for example during initialisation or destruction of the 604 * need not be held, for example during initialisation or destruction of the
607 * target struct: 605 * target struct:
608 * 606 *
609 * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) || 607 * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) ||
610 * atomic_read(&foo->usage) == 0); 608 * atomic_read(&foo->usage) == 0);
611 * 609 *
612 * Inserts memory barriers on architectures that require them 610 * Inserts memory barriers on architectures that require them
613 * (currently only the Alpha), prevents the compiler from refetching 611 * (currently only the Alpha), prevents the compiler from refetching
614 * (and from merging fetches), and, more importantly, documents exactly 612 * (and from merging fetches), and, more importantly, documents exactly
615 * which pointers are protected by RCU and checks that the pointer is 613 * which pointers are protected by RCU and checks that the pointer is
616 * annotated as __rcu. 614 * annotated as __rcu.
617 */ 615 */
618 #define rcu_dereference_check(p, c) \ 616 #define rcu_dereference_check(p, c) \
619 __rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu) 617 __rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu)
620 618
621 /** 619 /**
622 * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking 620 * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking
623 * @p: The pointer to read, prior to dereferencing 621 * @p: The pointer to read, prior to dereferencing
624 * @c: The conditions under which the dereference will take place 622 * @c: The conditions under which the dereference will take place
625 * 623 *
626 * This is the RCU-bh counterpart to rcu_dereference_check(). 624 * This is the RCU-bh counterpart to rcu_dereference_check().
627 */ 625 */
628 #define rcu_dereference_bh_check(p, c) \ 626 #define rcu_dereference_bh_check(p, c) \
629 __rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu) 627 __rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu)
630 628
631 /** 629 /**
632 * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking 630 * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking
633 * @p: The pointer to read, prior to dereferencing 631 * @p: The pointer to read, prior to dereferencing
634 * @c: The conditions under which the dereference will take place 632 * @c: The conditions under which the dereference will take place
635 * 633 *
636 * This is the RCU-sched counterpart to rcu_dereference_check(). 634 * This is the RCU-sched counterpart to rcu_dereference_check().
637 */ 635 */
638 #define rcu_dereference_sched_check(p, c) \ 636 #define rcu_dereference_sched_check(p, c) \
639 __rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \ 637 __rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \
640 __rcu) 638 __rcu)
641 639
642 #define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/ 640 #define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/
643 641
644 /* 642 /*
645 * The tracing infrastructure traces RCU (we want that), but unfortunately 643 * The tracing infrastructure traces RCU (we want that), but unfortunately
646 * some of the RCU checks causes tracing to lock up the system. 644 * some of the RCU checks causes tracing to lock up the system.
647 * 645 *
648 * The tracing version of rcu_dereference_raw() must not call 646 * The tracing version of rcu_dereference_raw() must not call
649 * rcu_read_lock_held(). 647 * rcu_read_lock_held().
650 */ 648 */
651 #define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu) 649 #define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu)
652 650
653 /** 651 /**
654 * rcu_access_index() - fetch RCU index with no dereferencing 652 * rcu_access_index() - fetch RCU index with no dereferencing
655 * @p: The index to read 653 * @p: The index to read
656 * 654 *
657 * Return the value of the specified RCU-protected index, but omit the 655 * Return the value of the specified RCU-protected index, but omit the
658 * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful 656 * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful
659 * when the value of this index is accessed, but the index is not 657 * when the value of this index is accessed, but the index is not
660 * dereferenced, for example, when testing an RCU-protected index against 658 * dereferenced, for example, when testing an RCU-protected index against
661 * -1. Although rcu_access_index() may also be used in cases where 659 * -1. Although rcu_access_index() may also be used in cases where
662 * update-side locks prevent the value of the index from changing, you 660 * update-side locks prevent the value of the index from changing, you
663 * should instead use rcu_dereference_index_protected() for this use case. 661 * should instead use rcu_dereference_index_protected() for this use case.
664 */ 662 */
665 #define rcu_access_index(p) __rcu_access_index((p), __rcu) 663 #define rcu_access_index(p) __rcu_access_index((p), __rcu)
666 664
667 /** 665 /**
668 * rcu_dereference_index_check() - rcu_dereference for indices with debug checking 666 * rcu_dereference_index_check() - rcu_dereference for indices with debug checking
669 * @p: The pointer to read, prior to dereferencing 667 * @p: The pointer to read, prior to dereferencing
670 * @c: The conditions under which the dereference will take place 668 * @c: The conditions under which the dereference will take place
671 * 669 *
672 * Similar to rcu_dereference_check(), but omits the sparse checking. 670 * Similar to rcu_dereference_check(), but omits the sparse checking.
673 * This allows rcu_dereference_index_check() to be used on integers, 671 * This allows rcu_dereference_index_check() to be used on integers,
674 * which can then be used as array indices. Attempting to use 672 * which can then be used as array indices. Attempting to use
675 * rcu_dereference_check() on an integer will give compiler warnings 673 * rcu_dereference_check() on an integer will give compiler warnings
676 * because the sparse address-space mechanism relies on dereferencing 674 * because the sparse address-space mechanism relies on dereferencing
677 * the RCU-protected pointer. Dereferencing integers is not something 675 * the RCU-protected pointer. Dereferencing integers is not something
678 * that even gcc will put up with. 676 * that even gcc will put up with.
679 * 677 *
680 * Note that this function does not implicitly check for RCU read-side 678 * Note that this function does not implicitly check for RCU read-side
681 * critical sections. If this function gains lots of uses, it might 679 * critical sections. If this function gains lots of uses, it might
682 * make sense to provide versions for each flavor of RCU, but it does 680 * make sense to provide versions for each flavor of RCU, but it does
683 * not make sense as of early 2010. 681 * not make sense as of early 2010.
684 */ 682 */
685 #define rcu_dereference_index_check(p, c) \ 683 #define rcu_dereference_index_check(p, c) \
686 __rcu_dereference_index_check((p), (c)) 684 __rcu_dereference_index_check((p), (c))
687 685
688 /** 686 /**
689 * rcu_dereference_protected() - fetch RCU pointer when updates prevented 687 * rcu_dereference_protected() - fetch RCU pointer when updates prevented
690 * @p: The pointer to read, prior to dereferencing 688 * @p: The pointer to read, prior to dereferencing
691 * @c: The conditions under which the dereference will take place 689 * @c: The conditions under which the dereference will take place
692 * 690 *
693 * Return the value of the specified RCU-protected pointer, but omit 691 * Return the value of the specified RCU-protected pointer, but omit
694 * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This 692 * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This
695 * is useful in cases where update-side locks prevent the value of the 693 * is useful in cases where update-side locks prevent the value of the
696 * pointer from changing. Please note that this primitive does -not- 694 * pointer from changing. Please note that this primitive does -not-
697 * prevent the compiler from repeating this reference or combining it 695 * prevent the compiler from repeating this reference or combining it
698 * with other references, so it should not be used without protection 696 * with other references, so it should not be used without protection
699 * of appropriate locks. 697 * of appropriate locks.
700 * 698 *
701 * This function is only for update-side use. Using this function 699 * This function is only for update-side use. Using this function
702 * when protected only by rcu_read_lock() will result in infrequent 700 * when protected only by rcu_read_lock() will result in infrequent
703 * but very ugly failures. 701 * but very ugly failures.
704 */ 702 */
705 #define rcu_dereference_protected(p, c) \ 703 #define rcu_dereference_protected(p, c) \
706 __rcu_dereference_protected((p), (c), __rcu) 704 __rcu_dereference_protected((p), (c), __rcu)
707 705
708 706
709 /** 707 /**
710 * rcu_dereference() - fetch RCU-protected pointer for dereferencing 708 * rcu_dereference() - fetch RCU-protected pointer for dereferencing
711 * @p: The pointer to read, prior to dereferencing 709 * @p: The pointer to read, prior to dereferencing
712 * 710 *
713 * This is a simple wrapper around rcu_dereference_check(). 711 * This is a simple wrapper around rcu_dereference_check().
714 */ 712 */
715 #define rcu_dereference(p) rcu_dereference_check(p, 0) 713 #define rcu_dereference(p) rcu_dereference_check(p, 0)
716 714
717 /** 715 /**
718 * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing 716 * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing
719 * @p: The pointer to read, prior to dereferencing 717 * @p: The pointer to read, prior to dereferencing
720 * 718 *
721 * Makes rcu_dereference_check() do the dirty work. 719 * Makes rcu_dereference_check() do the dirty work.
722 */ 720 */
723 #define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0) 721 #define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0)
724 722
725 /** 723 /**
726 * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing 724 * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing
727 * @p: The pointer to read, prior to dereferencing 725 * @p: The pointer to read, prior to dereferencing
728 * 726 *
729 * Makes rcu_dereference_check() do the dirty work. 727 * Makes rcu_dereference_check() do the dirty work.
730 */ 728 */
731 #define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0) 729 #define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0)
732 730
733 /** 731 /**
734 * rcu_read_lock() - mark the beginning of an RCU read-side critical section 732 * rcu_read_lock() - mark the beginning of an RCU read-side critical section
735 * 733 *
736 * When synchronize_rcu() is invoked on one CPU while other CPUs 734 * When synchronize_rcu() is invoked on one CPU while other CPUs
737 * are within RCU read-side critical sections, then the 735 * are within RCU read-side critical sections, then the
738 * synchronize_rcu() is guaranteed to block until after all the other 736 * synchronize_rcu() is guaranteed to block until after all the other
739 * CPUs exit their critical sections. Similarly, if call_rcu() is invoked 737 * CPUs exit their critical sections. Similarly, if call_rcu() is invoked
740 * on one CPU while other CPUs are within RCU read-side critical 738 * on one CPU while other CPUs are within RCU read-side critical
741 * sections, invocation of the corresponding RCU callback is deferred 739 * sections, invocation of the corresponding RCU callback is deferred
742 * until after the all the other CPUs exit their critical sections. 740 * until after the all the other CPUs exit their critical sections.
743 * 741 *
744 * Note, however, that RCU callbacks are permitted to run concurrently 742 * Note, however, that RCU callbacks are permitted to run concurrently
745 * with new RCU read-side critical sections. One way that this can happen 743 * with new RCU read-side critical sections. One way that this can happen
746 * is via the following sequence of events: (1) CPU 0 enters an RCU 744 * is via the following sequence of events: (1) CPU 0 enters an RCU
747 * read-side critical section, (2) CPU 1 invokes call_rcu() to register 745 * read-side critical section, (2) CPU 1 invokes call_rcu() to register
748 * an RCU callback, (3) CPU 0 exits the RCU read-side critical section, 746 * an RCU callback, (3) CPU 0 exits the RCU read-side critical section,
749 * (4) CPU 2 enters a RCU read-side critical section, (5) the RCU 747 * (4) CPU 2 enters a RCU read-side critical section, (5) the RCU
750 * callback is invoked. This is legal, because the RCU read-side critical 748 * callback is invoked. This is legal, because the RCU read-side critical
751 * section that was running concurrently with the call_rcu() (and which 749 * section that was running concurrently with the call_rcu() (and which
752 * therefore might be referencing something that the corresponding RCU 750 * therefore might be referencing something that the corresponding RCU
753 * callback would free up) has completed before the corresponding 751 * callback would free up) has completed before the corresponding
754 * RCU callback is invoked. 752 * RCU callback is invoked.
755 * 753 *
756 * RCU read-side critical sections may be nested. Any deferred actions 754 * RCU read-side critical sections may be nested. Any deferred actions
757 * will be deferred until the outermost RCU read-side critical section 755 * will be deferred until the outermost RCU read-side critical section
758 * completes. 756 * completes.
759 * 757 *
760 * You can avoid reading and understanding the next paragraph by 758 * You can avoid reading and understanding the next paragraph by
761 * following this rule: don't put anything in an rcu_read_lock() RCU 759 * following this rule: don't put anything in an rcu_read_lock() RCU
762 * read-side critical section that would block in a !PREEMPT kernel. 760 * read-side critical section that would block in a !PREEMPT kernel.
763 * But if you want the full story, read on! 761 * But if you want the full story, read on!
764 * 762 *
765 * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), it 763 * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), it
766 * is illegal to block while in an RCU read-side critical section. In 764 * is illegal to block while in an RCU read-side critical section. In
767 * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU) 765 * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU)
768 * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may 766 * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may
769 * be preempted, but explicit blocking is illegal. Finally, in preemptible 767 * be preempted, but explicit blocking is illegal. Finally, in preemptible
770 * RCU implementations in real-time (with -rt patchset) kernel builds, 768 * RCU implementations in real-time (with -rt patchset) kernel builds,
771 * RCU read-side critical sections may be preempted and they may also 769 * RCU read-side critical sections may be preempted and they may also
772 * block, but only when acquiring spinlocks that are subject to priority 770 * block, but only when acquiring spinlocks that are subject to priority
773 * inheritance. 771 * inheritance.
774 */ 772 */
775 static inline void rcu_read_lock(void) 773 static inline void rcu_read_lock(void)
776 { 774 {
777 __rcu_read_lock(); 775 __rcu_read_lock();
778 __acquire(RCU); 776 __acquire(RCU);
779 rcu_lock_acquire(&rcu_lock_map); 777 rcu_lock_acquire(&rcu_lock_map);
780 rcu_lockdep_assert(!rcu_is_cpu_idle(), 778 rcu_lockdep_assert(!rcu_is_cpu_idle(),
781 "rcu_read_lock() used illegally while idle"); 779 "rcu_read_lock() used illegally while idle");
782 } 780 }
783 781
784 /* 782 /*
785 * So where is rcu_write_lock()? It does not exist, as there is no 783 * So where is rcu_write_lock()? It does not exist, as there is no
786 * way for writers to lock out RCU readers. This is a feature, not 784 * way for writers to lock out RCU readers. This is a feature, not
787 * a bug -- this property is what provides RCU's performance benefits. 785 * a bug -- this property is what provides RCU's performance benefits.
788 * Of course, writers must coordinate with each other. The normal 786 * Of course, writers must coordinate with each other. The normal
789 * spinlock primitives work well for this, but any other technique may be 787 * spinlock primitives work well for this, but any other technique may be
790 * used as well. RCU does not care how the writers keep out of each 788 * used as well. RCU does not care how the writers keep out of each
791 * others' way, as long as they do so. 789 * others' way, as long as they do so.
792 */ 790 */
793 791
794 /** 792 /**
795 * rcu_read_unlock() - marks the end of an RCU read-side critical section. 793 * rcu_read_unlock() - marks the end of an RCU read-side critical section.
796 * 794 *
797 * See rcu_read_lock() for more information. 795 * See rcu_read_lock() for more information.
798 */ 796 */
799 static inline void rcu_read_unlock(void) 797 static inline void rcu_read_unlock(void)
800 { 798 {
801 rcu_lockdep_assert(!rcu_is_cpu_idle(), 799 rcu_lockdep_assert(!rcu_is_cpu_idle(),
802 "rcu_read_unlock() used illegally while idle"); 800 "rcu_read_unlock() used illegally while idle");
803 rcu_lock_release(&rcu_lock_map); 801 rcu_lock_release(&rcu_lock_map);
804 __release(RCU); 802 __release(RCU);
805 __rcu_read_unlock(); 803 __rcu_read_unlock();
806 } 804 }
807 805
808 /** 806 /**
809 * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section 807 * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section
810 * 808 *
811 * This is equivalent of rcu_read_lock(), but to be used when updates 809 * This is equivalent of rcu_read_lock(), but to be used when updates
812 * are being done using call_rcu_bh() or synchronize_rcu_bh(). Since 810 * are being done using call_rcu_bh() or synchronize_rcu_bh(). Since
813 * both call_rcu_bh() and synchronize_rcu_bh() consider completion of a 811 * both call_rcu_bh() and synchronize_rcu_bh() consider completion of a
814 * softirq handler to be a quiescent state, a process in RCU read-side 812 * softirq handler to be a quiescent state, a process in RCU read-side
815 * critical section must be protected by disabling softirqs. Read-side 813 * critical section must be protected by disabling softirqs. Read-side
816 * critical sections in interrupt context can use just rcu_read_lock(), 814 * critical sections in interrupt context can use just rcu_read_lock(),
817 * though this should at least be commented to avoid confusing people 815 * though this should at least be commented to avoid confusing people
818 * reading the code. 816 * reading the code.
819 * 817 *
820 * Note that rcu_read_lock_bh() and the matching rcu_read_unlock_bh() 818 * Note that rcu_read_lock_bh() and the matching rcu_read_unlock_bh()
821 * must occur in the same context, for example, it is illegal to invoke 819 * must occur in the same context, for example, it is illegal to invoke
822 * rcu_read_unlock_bh() from one task if the matching rcu_read_lock_bh() 820 * rcu_read_unlock_bh() from one task if the matching rcu_read_lock_bh()
823 * was invoked from some other task. 821 * was invoked from some other task.
824 */ 822 */
825 static inline void rcu_read_lock_bh(void) 823 static inline void rcu_read_lock_bh(void)
826 { 824 {
827 local_bh_disable(); 825 local_bh_disable();
828 __acquire(RCU_BH); 826 __acquire(RCU_BH);
829 rcu_lock_acquire(&rcu_bh_lock_map); 827 rcu_lock_acquire(&rcu_bh_lock_map);
830 rcu_lockdep_assert(!rcu_is_cpu_idle(), 828 rcu_lockdep_assert(!rcu_is_cpu_idle(),
831 "rcu_read_lock_bh() used illegally while idle"); 829 "rcu_read_lock_bh() used illegally while idle");
832 } 830 }
833 831
834 /* 832 /*
835 * rcu_read_unlock_bh - marks the end of a softirq-only RCU critical section 833 * rcu_read_unlock_bh - marks the end of a softirq-only RCU critical section
836 * 834 *
837 * See rcu_read_lock_bh() for more information. 835 * See rcu_read_lock_bh() for more information.
838 */ 836 */
839 static inline void rcu_read_unlock_bh(void) 837 static inline void rcu_read_unlock_bh(void)
840 { 838 {
841 rcu_lockdep_assert(!rcu_is_cpu_idle(), 839 rcu_lockdep_assert(!rcu_is_cpu_idle(),
842 "rcu_read_unlock_bh() used illegally while idle"); 840 "rcu_read_unlock_bh() used illegally while idle");
843 rcu_lock_release(&rcu_bh_lock_map); 841 rcu_lock_release(&rcu_bh_lock_map);
844 __release(RCU_BH); 842 __release(RCU_BH);
845 local_bh_enable(); 843 local_bh_enable();
846 } 844 }
847 845
848 /** 846 /**
849 * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section 847 * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section
850 * 848 *
851 * This is equivalent of rcu_read_lock(), but to be used when updates 849 * This is equivalent of rcu_read_lock(), but to be used when updates
852 * are being done using call_rcu_sched() or synchronize_rcu_sched(). 850 * are being done using call_rcu_sched() or synchronize_rcu_sched().
853 * Read-side critical sections can also be introduced by anything that 851 * Read-side critical sections can also be introduced by anything that
854 * disables preemption, including local_irq_disable() and friends. 852 * disables preemption, including local_irq_disable() and friends.
855 * 853 *
856 * Note that rcu_read_lock_sched() and the matching rcu_read_unlock_sched() 854 * Note that rcu_read_lock_sched() and the matching rcu_read_unlock_sched()
857 * must occur in the same context, for example, it is illegal to invoke 855 * must occur in the same context, for example, it is illegal to invoke
858 * rcu_read_unlock_sched() from process context if the matching 856 * rcu_read_unlock_sched() from process context if the matching
859 * rcu_read_lock_sched() was invoked from an NMI handler. 857 * rcu_read_lock_sched() was invoked from an NMI handler.
860 */ 858 */
861 static inline void rcu_read_lock_sched(void) 859 static inline void rcu_read_lock_sched(void)
862 { 860 {
863 preempt_disable(); 861 preempt_disable();
864 __acquire(RCU_SCHED); 862 __acquire(RCU_SCHED);
865 rcu_lock_acquire(&rcu_sched_lock_map); 863 rcu_lock_acquire(&rcu_sched_lock_map);
866 rcu_lockdep_assert(!rcu_is_cpu_idle(), 864 rcu_lockdep_assert(!rcu_is_cpu_idle(),
867 "rcu_read_lock_sched() used illegally while idle"); 865 "rcu_read_lock_sched() used illegally while idle");
868 } 866 }
869 867
870 /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ 868 /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */
871 static inline notrace void rcu_read_lock_sched_notrace(void) 869 static inline notrace void rcu_read_lock_sched_notrace(void)
872 { 870 {
873 preempt_disable_notrace(); 871 preempt_disable_notrace();
874 __acquire(RCU_SCHED); 872 __acquire(RCU_SCHED);
875 } 873 }
876 874
877 /* 875 /*
878 * rcu_read_unlock_sched - marks the end of a RCU-classic critical section 876 * rcu_read_unlock_sched - marks the end of a RCU-classic critical section
879 * 877 *
880 * See rcu_read_lock_sched for more information. 878 * See rcu_read_lock_sched for more information.
881 */ 879 */
882 static inline void rcu_read_unlock_sched(void) 880 static inline void rcu_read_unlock_sched(void)
883 { 881 {
884 rcu_lockdep_assert(!rcu_is_cpu_idle(), 882 rcu_lockdep_assert(!rcu_is_cpu_idle(),
885 "rcu_read_unlock_sched() used illegally while idle"); 883 "rcu_read_unlock_sched() used illegally while idle");
886 rcu_lock_release(&rcu_sched_lock_map); 884 rcu_lock_release(&rcu_sched_lock_map);
887 __release(RCU_SCHED); 885 __release(RCU_SCHED);
888 preempt_enable(); 886 preempt_enable();
889 } 887 }
890 888
891 /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ 889 /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */
892 static inline notrace void rcu_read_unlock_sched_notrace(void) 890 static inline notrace void rcu_read_unlock_sched_notrace(void)
893 { 891 {
894 __release(RCU_SCHED); 892 __release(RCU_SCHED);
895 preempt_enable_notrace(); 893 preempt_enable_notrace();
896 } 894 }
897 895
898 /** 896 /**
899 * rcu_assign_pointer() - assign to RCU-protected pointer 897 * rcu_assign_pointer() - assign to RCU-protected pointer
900 * @p: pointer to assign to 898 * @p: pointer to assign to
901 * @v: value to assign (publish) 899 * @v: value to assign (publish)
902 * 900 *
903 * Assigns the specified value to the specified RCU-protected 901 * Assigns the specified value to the specified RCU-protected
904 * pointer, ensuring that any concurrent RCU readers will see 902 * pointer, ensuring that any concurrent RCU readers will see
905 * any prior initialization. 903 * any prior initialization.
906 * 904 *
907 * Inserts memory barriers on architectures that require them 905 * Inserts memory barriers on architectures that require them
908 * (which is most of them), and also prevents the compiler from 906 * (which is most of them), and also prevents the compiler from
909 * reordering the code that initializes the structure after the pointer 907 * reordering the code that initializes the structure after the pointer
910 * assignment. More importantly, this call documents which pointers 908 * assignment. More importantly, this call documents which pointers
911 * will be dereferenced by RCU read-side code. 909 * will be dereferenced by RCU read-side code.
912 * 910 *
913 * In some special cases, you may use RCU_INIT_POINTER() instead 911 * In some special cases, you may use RCU_INIT_POINTER() instead
914 * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due 912 * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due
915 * to the fact that it does not constrain either the CPU or the compiler. 913 * to the fact that it does not constrain either the CPU or the compiler.
916 * That said, using RCU_INIT_POINTER() when you should have used 914 * That said, using RCU_INIT_POINTER() when you should have used
917 * rcu_assign_pointer() is a very bad thing that results in 915 * rcu_assign_pointer() is a very bad thing that results in
918 * impossible-to-diagnose memory corruption. So please be careful. 916 * impossible-to-diagnose memory corruption. So please be careful.
919 * See the RCU_INIT_POINTER() comment header for details. 917 * See the RCU_INIT_POINTER() comment header for details.
920 */ 918 */
921 #define rcu_assign_pointer(p, v) \ 919 #define rcu_assign_pointer(p, v) \
922 __rcu_assign_pointer((p), (v), __rcu) 920 __rcu_assign_pointer((p), (v), __rcu)
923 921
924 /** 922 /**
925 * RCU_INIT_POINTER() - initialize an RCU protected pointer 923 * RCU_INIT_POINTER() - initialize an RCU protected pointer
926 * 924 *
927 * Initialize an RCU-protected pointer in special cases where readers 925 * Initialize an RCU-protected pointer in special cases where readers
928 * do not need ordering constraints on the CPU or the compiler. These 926 * do not need ordering constraints on the CPU or the compiler. These
929 * special cases are: 927 * special cases are:
930 * 928 *
931 * 1. This use of RCU_INIT_POINTER() is NULLing out the pointer -or- 929 * 1. This use of RCU_INIT_POINTER() is NULLing out the pointer -or-
932 * 2. The caller has taken whatever steps are required to prevent 930 * 2. The caller has taken whatever steps are required to prevent
933 * RCU readers from concurrently accessing this pointer -or- 931 * RCU readers from concurrently accessing this pointer -or-
934 * 3. The referenced data structure has already been exposed to 932 * 3. The referenced data structure has already been exposed to
935 * readers either at compile time or via rcu_assign_pointer() -and- 933 * readers either at compile time or via rcu_assign_pointer() -and-
936 * a. You have not made -any- reader-visible changes to 934 * a. You have not made -any- reader-visible changes to
937 * this structure since then -or- 935 * this structure since then -or-
938 * b. It is OK for readers accessing this structure from its 936 * b. It is OK for readers accessing this structure from its
939 * new location to see the old state of the structure. (For 937 * new location to see the old state of the structure. (For
940 * example, the changes were to statistical counters or to 938 * example, the changes were to statistical counters or to
941 * other state where exact synchronization is not required.) 939 * other state where exact synchronization is not required.)
942 * 940 *
943 * Failure to follow these rules governing use of RCU_INIT_POINTER() will 941 * Failure to follow these rules governing use of RCU_INIT_POINTER() will
944 * result in impossible-to-diagnose memory corruption. As in the structures 942 * result in impossible-to-diagnose memory corruption. As in the structures
945 * will look OK in crash dumps, but any concurrent RCU readers might 943 * will look OK in crash dumps, but any concurrent RCU readers might
946 * see pre-initialized values of the referenced data structure. So 944 * see pre-initialized values of the referenced data structure. So
947 * please be very careful how you use RCU_INIT_POINTER()!!! 945 * please be very careful how you use RCU_INIT_POINTER()!!!
948 * 946 *
949 * If you are creating an RCU-protected linked structure that is accessed 947 * If you are creating an RCU-protected linked structure that is accessed
950 * by a single external-to-structure RCU-protected pointer, then you may 948 * by a single external-to-structure RCU-protected pointer, then you may
951 * use RCU_INIT_POINTER() to initialize the internal RCU-protected 949 * use RCU_INIT_POINTER() to initialize the internal RCU-protected
952 * pointers, but you must use rcu_assign_pointer() to initialize the 950 * pointers, but you must use rcu_assign_pointer() to initialize the
953 * external-to-structure pointer -after- you have completely initialized 951 * external-to-structure pointer -after- you have completely initialized
954 * the reader-accessible portions of the linked structure. 952 * the reader-accessible portions of the linked structure.
955 */ 953 */
956 #define RCU_INIT_POINTER(p, v) \ 954 #define RCU_INIT_POINTER(p, v) \
957 do { \ 955 do { \
958 p = (typeof(*v) __force __rcu *)(v); \ 956 p = (typeof(*v) __force __rcu *)(v); \
959 } while (0) 957 } while (0)
960 958
961 /** 959 /**
962 * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer 960 * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer
963 * 961 *
964 * GCC-style initialization for an RCU-protected pointer in a structure field. 962 * GCC-style initialization for an RCU-protected pointer in a structure field.
965 */ 963 */
966 #define RCU_POINTER_INITIALIZER(p, v) \ 964 #define RCU_POINTER_INITIALIZER(p, v) \
967 .p = (typeof(*v) __force __rcu *)(v) 965 .p = (typeof(*v) __force __rcu *)(v)
968 966
969 /* 967 /*
970 * Does the specified offset indicate that the corresponding rcu_head 968 * Does the specified offset indicate that the corresponding rcu_head
971 * structure can be handled by kfree_rcu()? 969 * structure can be handled by kfree_rcu()?
972 */ 970 */
973 #define __is_kfree_rcu_offset(offset) ((offset) < 4096) 971 #define __is_kfree_rcu_offset(offset) ((offset) < 4096)
974 972
975 /* 973 /*
976 * Helper macro for kfree_rcu() to prevent argument-expansion eyestrain. 974 * Helper macro for kfree_rcu() to prevent argument-expansion eyestrain.
977 */ 975 */
978 #define __kfree_rcu(head, offset) \ 976 #define __kfree_rcu(head, offset) \
979 do { \ 977 do { \
980 BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \ 978 BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
981 kfree_call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \ 979 kfree_call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \
982 } while (0) 980 } while (0)
983 981
984 /** 982 /**
985 * kfree_rcu() - kfree an object after a grace period. 983 * kfree_rcu() - kfree an object after a grace period.
986 * @ptr: pointer to kfree 984 * @ptr: pointer to kfree
987 * @rcu_head: the name of the struct rcu_head within the type of @ptr. 985 * @rcu_head: the name of the struct rcu_head within the type of @ptr.
988 * 986 *
989 * Many rcu callbacks functions just call kfree() on the base structure. 987 * Many rcu callbacks functions just call kfree() on the base structure.
990 * These functions are trivial, but their size adds up, and furthermore 988 * These functions are trivial, but their size adds up, and furthermore
991 * when they are used in a kernel module, that module must invoke the 989 * when they are used in a kernel module, that module must invoke the
992 * high-latency rcu_barrier() function at module-unload time. 990 * high-latency rcu_barrier() function at module-unload time.
993 * 991 *
994 * The kfree_rcu() function handles this issue. Rather than encoding a 992 * The kfree_rcu() function handles this issue. Rather than encoding a
995 * function address in the embedded rcu_head structure, kfree_rcu() instead 993 * function address in the embedded rcu_head structure, kfree_rcu() instead
996 * encodes the offset of the rcu_head structure within the base structure. 994 * encodes the offset of the rcu_head structure within the base structure.
997 * Because the functions are not allowed in the low-order 4096 bytes of 995 * Because the functions are not allowed in the low-order 4096 bytes of
998 * kernel virtual memory, offsets up to 4095 bytes can be accommodated. 996 * kernel virtual memory, offsets up to 4095 bytes can be accommodated.
999 * If the offset is larger than 4095 bytes, a compile-time error will 997 * If the offset is larger than 4095 bytes, a compile-time error will
1000 * be generated in __kfree_rcu(). If this error is triggered, you can 998 * be generated in __kfree_rcu(). If this error is triggered, you can
1001 * either fall back to use of call_rcu() or rearrange the structure to 999 * either fall back to use of call_rcu() or rearrange the structure to
1002 * position the rcu_head structure into the first 4096 bytes. 1000 * position the rcu_head structure into the first 4096 bytes.
1003 * 1001 *
1004 * Note that the allowable offset might decrease in the future, for example, 1002 * Note that the allowable offset might decrease in the future, for example,
1005 * to allow something like kmem_cache_free_rcu(). 1003 * to allow something like kmem_cache_free_rcu().
1006 * 1004 *
1007 * The BUILD_BUG_ON check must not involve any function calls, hence the 1005 * The BUILD_BUG_ON check must not involve any function calls, hence the
1008 * checks are done in macros here. 1006 * checks are done in macros here.
1009 */ 1007 */
1010 #define kfree_rcu(ptr, rcu_head) \ 1008 #define kfree_rcu(ptr, rcu_head) \
1011 __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) 1009 __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
1012 1010
1013 #ifdef CONFIG_RCU_NOCB_CPU 1011 #ifdef CONFIG_RCU_NOCB_CPU
1014 extern bool rcu_is_nocb_cpu(int cpu); 1012 extern bool rcu_is_nocb_cpu(int cpu);
1015 #else 1013 #else
1016 static inline bool rcu_is_nocb_cpu(int cpu) { return false; } 1014 static inline bool rcu_is_nocb_cpu(int cpu) { return false; }
1017 #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ 1015 #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
1018 1016
1019 1017
1020 #endif /* __LINUX_RCUPDATE_H */ 1018 #endif /* __LINUX_RCUPDATE_H */
1021 1019
include/linux/rcutiny.h
1 /* 1 /*
2 * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition. 2 * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * 17 *
18 * Copyright IBM Corporation, 2008 18 * Copyright IBM Corporation, 2008
19 * 19 *
20 * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> 20 * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
21 * 21 *
22 * For detailed explanation of Read-Copy Update mechanism see - 22 * For detailed explanation of Read-Copy Update mechanism see -
23 * Documentation/RCU 23 * Documentation/RCU
24 */ 24 */
25 #ifndef __LINUX_TINY_H 25 #ifndef __LINUX_TINY_H
26 #define __LINUX_TINY_H 26 #define __LINUX_TINY_H
27 27
28 #include <linux/cache.h> 28 #include <linux/cache.h>
29 29
30 static inline void rcu_barrier_bh(void) 30 static inline void rcu_barrier_bh(void)
31 { 31 {
32 wait_rcu_gp(call_rcu_bh); 32 wait_rcu_gp(call_rcu_bh);
33 } 33 }
34 34
35 static inline void rcu_barrier_sched(void) 35 static inline void rcu_barrier_sched(void)
36 { 36 {
37 wait_rcu_gp(call_rcu_sched); 37 wait_rcu_gp(call_rcu_sched);
38 } 38 }
39 39
40 static inline void synchronize_rcu_expedited(void) 40 static inline void synchronize_rcu_expedited(void)
41 { 41 {
42 synchronize_sched(); /* Only one CPU, so pretty fast anyway!!! */ 42 synchronize_sched(); /* Only one CPU, so pretty fast anyway!!! */
43 } 43 }
44 44
45 static inline void rcu_barrier(void) 45 static inline void rcu_barrier(void)
46 { 46 {
47 rcu_barrier_sched(); /* Only one CPU, so only one list of callbacks! */ 47 rcu_barrier_sched(); /* Only one CPU, so only one list of callbacks! */
48 } 48 }
49 49
50 static inline void synchronize_rcu_bh(void) 50 static inline void synchronize_rcu_bh(void)
51 { 51 {
52 synchronize_sched(); 52 synchronize_sched();
53 } 53 }
54 54
55 static inline void synchronize_rcu_bh_expedited(void) 55 static inline void synchronize_rcu_bh_expedited(void)
56 { 56 {
57 synchronize_sched(); 57 synchronize_sched();
58 } 58 }
59 59
60 static inline void synchronize_sched_expedited(void) 60 static inline void synchronize_sched_expedited(void)
61 { 61 {
62 synchronize_sched(); 62 synchronize_sched();
63 } 63 }
64 64
65 static inline void kfree_call_rcu(struct rcu_head *head, 65 static inline void kfree_call_rcu(struct rcu_head *head,
66 void (*func)(struct rcu_head *rcu)) 66 void (*func)(struct rcu_head *rcu))
67 { 67 {
68 call_rcu(head, func); 68 call_rcu(head, func);
69 } 69 }
70 70
71 static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) 71 static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
72 { 72 {
73 *delta_jiffies = ULONG_MAX; 73 *delta_jiffies = ULONG_MAX;
74 return 0; 74 return 0;
75 } 75 }
76 76
77 static inline void rcu_note_context_switch(int cpu) 77 static inline void rcu_note_context_switch(int cpu)
78 { 78 {
79 rcu_sched_qs(cpu); 79 rcu_sched_qs(cpu);
80 } 80 }
81 81
82 /* 82 /*
83 * Take advantage of the fact that there is only one CPU, which 83 * Take advantage of the fact that there is only one CPU, which
84 * allows us to ignore virtualization-based context switches. 84 * allows us to ignore virtualization-based context switches.
85 */ 85 */
86 static inline void rcu_virt_note_context_switch(int cpu) 86 static inline void rcu_virt_note_context_switch(int cpu)
87 { 87 {
88 } 88 }
89 89
90 /* 90 /*
91 * Return the number of grace periods. 91 * Return the number of grace periods.
92 */ 92 */
93 static inline long rcu_batches_completed(void) 93 static inline long rcu_batches_completed(void)
94 { 94 {
95 return 0; 95 return 0;
96 } 96 }
97 97
98 /* 98 /*
99 * Return the number of bottom-half grace periods. 99 * Return the number of bottom-half grace periods.
100 */ 100 */
101 static inline long rcu_batches_completed_bh(void) 101 static inline long rcu_batches_completed_bh(void)
102 { 102 {
103 return 0; 103 return 0;
104 } 104 }
105 105
106 static inline void rcu_force_quiescent_state(void) 106 static inline void rcu_force_quiescent_state(void)
107 { 107 {
108 } 108 }
109 109
110 static inline void rcu_bh_force_quiescent_state(void) 110 static inline void rcu_bh_force_quiescent_state(void)
111 { 111 {
112 } 112 }
113 113
114 static inline void rcu_sched_force_quiescent_state(void) 114 static inline void rcu_sched_force_quiescent_state(void)
115 { 115 {
116 } 116 }
117 117
118 static inline void rcu_cpu_stall_reset(void) 118 static inline void rcu_cpu_stall_reset(void)
119 { 119 {
120 } 120 }
121 121
122 static inline void exit_rcu(void)
123 {
124 }
125
122 #ifdef CONFIG_DEBUG_LOCK_ALLOC 126 #ifdef CONFIG_DEBUG_LOCK_ALLOC
123 extern int rcu_scheduler_active __read_mostly; 127 extern int rcu_scheduler_active __read_mostly;
124 extern void rcu_scheduler_starting(void); 128 extern void rcu_scheduler_starting(void);
125 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 129 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
126 static inline void rcu_scheduler_starting(void) 130 static inline void rcu_scheduler_starting(void)
127 { 131 {
128 } 132 }
129 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 133 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
130 134
131 #endif /* __LINUX_RCUTINY_H */ 135 #endif /* __LINUX_RCUTINY_H */
132 136
include/linux/rcutree.h
1 /* 1 /*
2 * Read-Copy Update mechanism for mutual exclusion (tree-based version) 2 * Read-Copy Update mechanism for mutual exclusion (tree-based version)
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * 17 *
18 * Copyright IBM Corporation, 2008 18 * Copyright IBM Corporation, 2008
19 * 19 *
20 * Author: Dipankar Sarma <dipankar@in.ibm.com> 20 * Author: Dipankar Sarma <dipankar@in.ibm.com>
21 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical algorithm 21 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical algorithm
22 * 22 *
23 * Based on the original work by Paul McKenney <paulmck@us.ibm.com> 23 * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
24 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 24 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
25 * 25 *
26 * For detailed explanation of Read-Copy Update mechanism see - 26 * For detailed explanation of Read-Copy Update mechanism see -
27 * Documentation/RCU 27 * Documentation/RCU
28 */ 28 */
29 29
30 #ifndef __LINUX_RCUTREE_H 30 #ifndef __LINUX_RCUTREE_H
31 #define __LINUX_RCUTREE_H 31 #define __LINUX_RCUTREE_H
32 32
33 extern void rcu_note_context_switch(int cpu); 33 extern void rcu_note_context_switch(int cpu);
34 extern int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies); 34 extern int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies);
35 extern void rcu_cpu_stall_reset(void); 35 extern void rcu_cpu_stall_reset(void);
36 36
37 /* 37 /*
38 * Note a virtualization-based context switch. This is simply a 38 * Note a virtualization-based context switch. This is simply a
39 * wrapper around rcu_note_context_switch(), which allows TINY_RCU 39 * wrapper around rcu_note_context_switch(), which allows TINY_RCU
40 * to save a few bytes. 40 * to save a few bytes.
41 */ 41 */
42 static inline void rcu_virt_note_context_switch(int cpu) 42 static inline void rcu_virt_note_context_switch(int cpu)
43 { 43 {
44 rcu_note_context_switch(cpu); 44 rcu_note_context_switch(cpu);
45 } 45 }
46 46
47 extern void synchronize_rcu_bh(void); 47 extern void synchronize_rcu_bh(void);
48 extern void synchronize_sched_expedited(void); 48 extern void synchronize_sched_expedited(void);
49 extern void synchronize_rcu_expedited(void); 49 extern void synchronize_rcu_expedited(void);
50 50
51 void kfree_call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); 51 void kfree_call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
52 52
53 /** 53 /**
54 * synchronize_rcu_bh_expedited - Brute-force RCU-bh grace period 54 * synchronize_rcu_bh_expedited - Brute-force RCU-bh grace period
55 * 55 *
56 * Wait for an RCU-bh grace period to elapse, but use a "big hammer" 56 * Wait for an RCU-bh grace period to elapse, but use a "big hammer"
57 * approach to force the grace period to end quickly. This consumes 57 * approach to force the grace period to end quickly. This consumes
58 * significant time on all CPUs and is unfriendly to real-time workloads, 58 * significant time on all CPUs and is unfriendly to real-time workloads,
59 * so is thus not recommended for any sort of common-case code. In fact, 59 * so is thus not recommended for any sort of common-case code. In fact,
60 * if you are using synchronize_rcu_bh_expedited() in a loop, please 60 * if you are using synchronize_rcu_bh_expedited() in a loop, please
61 * restructure your code to batch your updates, and then use a single 61 * restructure your code to batch your updates, and then use a single
62 * synchronize_rcu_bh() instead. 62 * synchronize_rcu_bh() instead.
63 * 63 *
64 * Note that it is illegal to call this function while holding any lock 64 * Note that it is illegal to call this function while holding any lock
65 * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal 65 * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal
66 * to call this function from a CPU-hotplug notifier. Failing to observe 66 * to call this function from a CPU-hotplug notifier. Failing to observe
67 * these restriction will result in deadlock. 67 * these restriction will result in deadlock.
68 */ 68 */
69 static inline void synchronize_rcu_bh_expedited(void) 69 static inline void synchronize_rcu_bh_expedited(void)
70 { 70 {
71 synchronize_sched_expedited(); 71 synchronize_sched_expedited();
72 } 72 }
73 73
74 extern void rcu_barrier(void); 74 extern void rcu_barrier(void);
75 extern void rcu_barrier_bh(void); 75 extern void rcu_barrier_bh(void);
76 extern void rcu_barrier_sched(void); 76 extern void rcu_barrier_sched(void);
77 77
78 extern unsigned long rcutorture_testseq; 78 extern unsigned long rcutorture_testseq;
79 extern unsigned long rcutorture_vernum; 79 extern unsigned long rcutorture_vernum;
80 extern long rcu_batches_completed(void); 80 extern long rcu_batches_completed(void);
81 extern long rcu_batches_completed_bh(void); 81 extern long rcu_batches_completed_bh(void);
82 extern long rcu_batches_completed_sched(void); 82 extern long rcu_batches_completed_sched(void);
83 83
84 extern void rcu_force_quiescent_state(void); 84 extern void rcu_force_quiescent_state(void);
85 extern void rcu_bh_force_quiescent_state(void); 85 extern void rcu_bh_force_quiescent_state(void);
86 extern void rcu_sched_force_quiescent_state(void); 86 extern void rcu_sched_force_quiescent_state(void);
87 87
88 extern void exit_rcu(void);
89
88 extern void rcu_scheduler_starting(void); 90 extern void rcu_scheduler_starting(void);
89 extern int rcu_scheduler_active __read_mostly; 91 extern int rcu_scheduler_active __read_mostly;
90 92
91 #endif /* __LINUX_RCUTREE_H */ 93 #endif /* __LINUX_RCUTREE_H */
92 94
1 /* 1 /*
2 * Read-Copy Update mechanism for mutual exclusion 2 * Read-Copy Update mechanism for mutual exclusion
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version. 7 * (at your option) any later version.
8 * 8 *
9 * This program is distributed in the hope that it will be useful, 9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * 17 *
18 * Copyright IBM Corporation, 2001 18 * Copyright IBM Corporation, 2001
19 * 19 *
20 * Authors: Dipankar Sarma <dipankar@in.ibm.com> 20 * Authors: Dipankar Sarma <dipankar@in.ibm.com>
21 * Manfred Spraul <manfred@colorfullife.com> 21 * Manfred Spraul <manfred@colorfullife.com>
22 * 22 *
23 * Based on the original work by Paul McKenney <paulmck@us.ibm.com> 23 * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
24 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 24 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
25 * Papers: 25 * Papers:
26 * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf 26 * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
27 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) 27 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
28 * 28 *
29 * For detailed explanation of Read-Copy Update mechanism see - 29 * For detailed explanation of Read-Copy Update mechanism see -
30 * http://lse.sourceforge.net/locking/rcupdate.html 30 * http://lse.sourceforge.net/locking/rcupdate.html
31 * 31 *
32 */ 32 */
33 #include <linux/types.h> 33 #include <linux/types.h>
34 #include <linux/kernel.h> 34 #include <linux/kernel.h>
35 #include <linux/init.h> 35 #include <linux/init.h>
36 #include <linux/spinlock.h> 36 #include <linux/spinlock.h>
37 #include <linux/smp.h> 37 #include <linux/smp.h>
38 #include <linux/interrupt.h> 38 #include <linux/interrupt.h>
39 #include <linux/sched.h> 39 #include <linux/sched.h>
40 #include <linux/atomic.h> 40 #include <linux/atomic.h>
41 #include <linux/bitops.h> 41 #include <linux/bitops.h>
42 #include <linux/percpu.h> 42 #include <linux/percpu.h>
43 #include <linux/notifier.h> 43 #include <linux/notifier.h>
44 #include <linux/cpu.h> 44 #include <linux/cpu.h>
45 #include <linux/mutex.h> 45 #include <linux/mutex.h>
46 #include <linux/export.h> 46 #include <linux/export.h>
47 #include <linux/hardirq.h> 47 #include <linux/hardirq.h>
48 #include <linux/delay.h> 48 #include <linux/delay.h>
49 #include <linux/module.h> 49 #include <linux/module.h>
50 50
51 #define CREATE_TRACE_POINTS 51 #define CREATE_TRACE_POINTS
52 #include <trace/events/rcu.h> 52 #include <trace/events/rcu.h>
53 53
54 #include "rcu.h" 54 #include "rcu.h"
55 55
56 module_param(rcu_expedited, int, 0); 56 module_param(rcu_expedited, int, 0);
57 57
58 #ifdef CONFIG_PREEMPT_RCU 58 #ifdef CONFIG_PREEMPT_RCU
59 59
60 /* 60 /*
61 * Preemptible RCU implementation for rcu_read_lock(). 61 * Preemptible RCU implementation for rcu_read_lock().
62 * Just increment ->rcu_read_lock_nesting, shared state will be updated 62 * Just increment ->rcu_read_lock_nesting, shared state will be updated
63 * if we block. 63 * if we block.
64 */ 64 */
65 void __rcu_read_lock(void) 65 void __rcu_read_lock(void)
66 { 66 {
67 current->rcu_read_lock_nesting++; 67 current->rcu_read_lock_nesting++;
68 barrier(); /* critical section after entry code. */ 68 barrier(); /* critical section after entry code. */
69 } 69 }
70 EXPORT_SYMBOL_GPL(__rcu_read_lock); 70 EXPORT_SYMBOL_GPL(__rcu_read_lock);
71 71
72 /* 72 /*
73 * Preemptible RCU implementation for rcu_read_unlock(). 73 * Preemptible RCU implementation for rcu_read_unlock().
74 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost 74 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
75 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then 75 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
76 * invoke rcu_read_unlock_special() to clean up after a context switch 76 * invoke rcu_read_unlock_special() to clean up after a context switch
77 * in an RCU read-side critical section and other special cases. 77 * in an RCU read-side critical section and other special cases.
78 */ 78 */
79 void __rcu_read_unlock(void) 79 void __rcu_read_unlock(void)
80 { 80 {
81 struct task_struct *t = current; 81 struct task_struct *t = current;
82 82
83 if (t->rcu_read_lock_nesting != 1) { 83 if (t->rcu_read_lock_nesting != 1) {
84 --t->rcu_read_lock_nesting; 84 --t->rcu_read_lock_nesting;
85 } else { 85 } else {
86 barrier(); /* critical section before exit code. */ 86 barrier(); /* critical section before exit code. */
87 t->rcu_read_lock_nesting = INT_MIN; 87 t->rcu_read_lock_nesting = INT_MIN;
88 #ifdef CONFIG_PROVE_RCU_DELAY 88 #ifdef CONFIG_PROVE_RCU_DELAY
89 udelay(10); /* Make preemption more probable. */ 89 udelay(10); /* Make preemption more probable. */
90 #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ 90 #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
91 barrier(); /* assign before ->rcu_read_unlock_special load */ 91 barrier(); /* assign before ->rcu_read_unlock_special load */
92 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) 92 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
93 rcu_read_unlock_special(t); 93 rcu_read_unlock_special(t);
94 barrier(); /* ->rcu_read_unlock_special load before assign */ 94 barrier(); /* ->rcu_read_unlock_special load before assign */
95 t->rcu_read_lock_nesting = 0; 95 t->rcu_read_lock_nesting = 0;
96 } 96 }
97 #ifdef CONFIG_PROVE_LOCKING 97 #ifdef CONFIG_PROVE_LOCKING
98 { 98 {
99 int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting); 99 int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
100 100
101 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); 101 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
102 } 102 }
103 #endif /* #ifdef CONFIG_PROVE_LOCKING */ 103 #endif /* #ifdef CONFIG_PROVE_LOCKING */
104 } 104 }
105 EXPORT_SYMBOL_GPL(__rcu_read_unlock); 105 EXPORT_SYMBOL_GPL(__rcu_read_unlock);
106 106
107 /* 107 #endif /* #ifdef CONFIG_PREEMPT_RCU */
108 * Check for a task exiting while in a preemptible-RCU read-side
109 * critical section, clean up if so. No need to issue warnings,
110 * as debug_check_no_locks_held() already does this if lockdep
111 * is enabled.
112 */
113 void exit_rcu(void)
114 {
115 struct task_struct *t = current;
116
117 if (likely(list_empty(&current->rcu_node_entry)))
118 return;
119 t->rcu_read_lock_nesting = 1;
120 barrier();
121 t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED;
122 __rcu_read_unlock();
123 }
124
125 #else /* #ifdef CONFIG_PREEMPT_RCU */
126
127 void exit_rcu(void)
128 {
129 }
130
131 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
132 108
133 #ifdef CONFIG_DEBUG_LOCK_ALLOC 109 #ifdef CONFIG_DEBUG_LOCK_ALLOC
134 static struct lock_class_key rcu_lock_key; 110 static struct lock_class_key rcu_lock_key;
135 struct lockdep_map rcu_lock_map = 111 struct lockdep_map rcu_lock_map =
136 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key); 112 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
137 EXPORT_SYMBOL_GPL(rcu_lock_map); 113 EXPORT_SYMBOL_GPL(rcu_lock_map);
138 114
139 static struct lock_class_key rcu_bh_lock_key; 115 static struct lock_class_key rcu_bh_lock_key;
140 struct lockdep_map rcu_bh_lock_map = 116 struct lockdep_map rcu_bh_lock_map =
141 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_bh", &rcu_bh_lock_key); 117 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_bh", &rcu_bh_lock_key);
142 EXPORT_SYMBOL_GPL(rcu_bh_lock_map); 118 EXPORT_SYMBOL_GPL(rcu_bh_lock_map);
143 119
144 static struct lock_class_key rcu_sched_lock_key; 120 static struct lock_class_key rcu_sched_lock_key;
145 struct lockdep_map rcu_sched_lock_map = 121 struct lockdep_map rcu_sched_lock_map =
146 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key); 122 STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key);
147 EXPORT_SYMBOL_GPL(rcu_sched_lock_map); 123 EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
148 #endif 124 #endif
149 125
150 #ifdef CONFIG_DEBUG_LOCK_ALLOC 126 #ifdef CONFIG_DEBUG_LOCK_ALLOC
151 127
152 int debug_lockdep_rcu_enabled(void) 128 int debug_lockdep_rcu_enabled(void)
153 { 129 {
154 return rcu_scheduler_active && debug_locks && 130 return rcu_scheduler_active && debug_locks &&
155 current->lockdep_recursion == 0; 131 current->lockdep_recursion == 0;
156 } 132 }
157 EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); 133 EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
158 134
159 /** 135 /**
160 * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? 136 * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
161 * 137 *
162 * Check for bottom half being disabled, which covers both the 138 * Check for bottom half being disabled, which covers both the
163 * CONFIG_PROVE_RCU and not cases. Note that if someone uses 139 * CONFIG_PROVE_RCU and not cases. Note that if someone uses
164 * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled) 140 * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled)
165 * will show the situation. This is useful for debug checks in functions 141 * will show the situation. This is useful for debug checks in functions
166 * that require that they be called within an RCU read-side critical 142 * that require that they be called within an RCU read-side critical
167 * section. 143 * section.
168 * 144 *
169 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. 145 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot.
170 * 146 *
171 * Note that rcu_read_lock() is disallowed if the CPU is either idle or 147 * Note that rcu_read_lock() is disallowed if the CPU is either idle or
172 * offline from an RCU perspective, so check for those as well. 148 * offline from an RCU perspective, so check for those as well.
173 */ 149 */
174 int rcu_read_lock_bh_held(void) 150 int rcu_read_lock_bh_held(void)
175 { 151 {
176 if (!debug_lockdep_rcu_enabled()) 152 if (!debug_lockdep_rcu_enabled())
177 return 1; 153 return 1;
178 if (rcu_is_cpu_idle()) 154 if (rcu_is_cpu_idle())
179 return 0; 155 return 0;
180 if (!rcu_lockdep_current_cpu_online()) 156 if (!rcu_lockdep_current_cpu_online())
181 return 0; 157 return 0;
182 return in_softirq() || irqs_disabled(); 158 return in_softirq() || irqs_disabled();
183 } 159 }
184 EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); 160 EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
185 161
186 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 162 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
187 163
188 struct rcu_synchronize { 164 struct rcu_synchronize {
189 struct rcu_head head; 165 struct rcu_head head;
190 struct completion completion; 166 struct completion completion;
191 }; 167 };
192 168
193 /* 169 /*
194 * Awaken the corresponding synchronize_rcu() instance now that a 170 * Awaken the corresponding synchronize_rcu() instance now that a
195 * grace period has elapsed. 171 * grace period has elapsed.
196 */ 172 */
197 static void wakeme_after_rcu(struct rcu_head *head) 173 static void wakeme_after_rcu(struct rcu_head *head)
198 { 174 {
199 struct rcu_synchronize *rcu; 175 struct rcu_synchronize *rcu;
200 176
201 rcu = container_of(head, struct rcu_synchronize, head); 177 rcu = container_of(head, struct rcu_synchronize, head);
202 complete(&rcu->completion); 178 complete(&rcu->completion);
203 } 179 }
204 180
205 void wait_rcu_gp(call_rcu_func_t crf) 181 void wait_rcu_gp(call_rcu_func_t crf)
206 { 182 {
207 struct rcu_synchronize rcu; 183 struct rcu_synchronize rcu;
208 184
209 init_rcu_head_on_stack(&rcu.head); 185 init_rcu_head_on_stack(&rcu.head);
210 init_completion(&rcu.completion); 186 init_completion(&rcu.completion);
211 /* Will wake me after RCU finished. */ 187 /* Will wake me after RCU finished. */
212 crf(&rcu.head, wakeme_after_rcu); 188 crf(&rcu.head, wakeme_after_rcu);
213 /* Wait for it. */ 189 /* Wait for it. */
214 wait_for_completion(&rcu.completion); 190 wait_for_completion(&rcu.completion);
215 destroy_rcu_head_on_stack(&rcu.head); 191 destroy_rcu_head_on_stack(&rcu.head);
216 } 192 }
217 EXPORT_SYMBOL_GPL(wait_rcu_gp); 193 EXPORT_SYMBOL_GPL(wait_rcu_gp);
218 194
219 #ifdef CONFIG_PROVE_RCU 195 #ifdef CONFIG_PROVE_RCU
220 /* 196 /*
221 * wrapper function to avoid #include problems. 197 * wrapper function to avoid #include problems.
222 */ 198 */
223 int rcu_my_thread_group_empty(void) 199 int rcu_my_thread_group_empty(void)
224 { 200 {
225 return thread_group_empty(current); 201 return thread_group_empty(current);
226 } 202 }
227 EXPORT_SYMBOL_GPL(rcu_my_thread_group_empty); 203 EXPORT_SYMBOL_GPL(rcu_my_thread_group_empty);
228 #endif /* #ifdef CONFIG_PROVE_RCU */ 204 #endif /* #ifdef CONFIG_PROVE_RCU */
229 205
230 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD 206 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
231 static inline void debug_init_rcu_head(struct rcu_head *head) 207 static inline void debug_init_rcu_head(struct rcu_head *head)
232 { 208 {
233 debug_object_init(head, &rcuhead_debug_descr); 209 debug_object_init(head, &rcuhead_debug_descr);
234 } 210 }
235 211
236 static inline void debug_rcu_head_free(struct rcu_head *head) 212 static inline void debug_rcu_head_free(struct rcu_head *head)
237 { 213 {
238 debug_object_free(head, &rcuhead_debug_descr); 214 debug_object_free(head, &rcuhead_debug_descr);
239 } 215 }
240 216
241 /* 217 /*
242 * fixup_init is called when: 218 * fixup_init is called when:
243 * - an active object is initialized 219 * - an active object is initialized
244 */ 220 */
245 static int rcuhead_fixup_init(void *addr, enum debug_obj_state state) 221 static int rcuhead_fixup_init(void *addr, enum debug_obj_state state)
246 { 222 {
247 struct rcu_head *head = addr; 223 struct rcu_head *head = addr;
248 224
249 switch (state) { 225 switch (state) {
250 case ODEBUG_STATE_ACTIVE: 226 case ODEBUG_STATE_ACTIVE:
251 /* 227 /*
252 * Ensure that queued callbacks are all executed. 228 * Ensure that queued callbacks are all executed.
253 * If we detect that we are nested in a RCU read-side critical 229 * If we detect that we are nested in a RCU read-side critical
254 * section, we should simply fail, otherwise we would deadlock. 230 * section, we should simply fail, otherwise we would deadlock.
255 * In !PREEMPT configurations, there is no way to tell if we are 231 * In !PREEMPT configurations, there is no way to tell if we are
256 * in a RCU read-side critical section or not, so we never 232 * in a RCU read-side critical section or not, so we never
257 * attempt any fixup and just print a warning. 233 * attempt any fixup and just print a warning.
258 */ 234 */
259 #ifndef CONFIG_PREEMPT 235 #ifndef CONFIG_PREEMPT
260 WARN_ON_ONCE(1); 236 WARN_ON_ONCE(1);
261 return 0; 237 return 0;
262 #endif 238 #endif
263 if (rcu_preempt_depth() != 0 || preempt_count() != 0 || 239 if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
264 irqs_disabled()) { 240 irqs_disabled()) {
265 WARN_ON_ONCE(1); 241 WARN_ON_ONCE(1);
266 return 0; 242 return 0;
267 } 243 }
268 rcu_barrier(); 244 rcu_barrier();
269 rcu_barrier_sched(); 245 rcu_barrier_sched();
270 rcu_barrier_bh(); 246 rcu_barrier_bh();
271 debug_object_init(head, &rcuhead_debug_descr); 247 debug_object_init(head, &rcuhead_debug_descr);
272 return 1; 248 return 1;
273 default: 249 default:
274 return 0; 250 return 0;
275 } 251 }
276 } 252 }
277 253
278 /* 254 /*
279 * fixup_activate is called when: 255 * fixup_activate is called when:
280 * - an active object is activated 256 * - an active object is activated
281 * - an unknown object is activated (might be a statically initialized object) 257 * - an unknown object is activated (might be a statically initialized object)
282 * Activation is performed internally by call_rcu(). 258 * Activation is performed internally by call_rcu().
283 */ 259 */
284 static int rcuhead_fixup_activate(void *addr, enum debug_obj_state state) 260 static int rcuhead_fixup_activate(void *addr, enum debug_obj_state state)
285 { 261 {
286 struct rcu_head *head = addr; 262 struct rcu_head *head = addr;
287 263
288 switch (state) { 264 switch (state) {
289 265
290 case ODEBUG_STATE_NOTAVAILABLE: 266 case ODEBUG_STATE_NOTAVAILABLE:
291 /* 267 /*
292 * This is not really a fixup. We just make sure that it is 268 * This is not really a fixup. We just make sure that it is
293 * tracked in the object tracker. 269 * tracked in the object tracker.
294 */ 270 */
295 debug_object_init(head, &rcuhead_debug_descr); 271 debug_object_init(head, &rcuhead_debug_descr);
296 debug_object_activate(head, &rcuhead_debug_descr); 272 debug_object_activate(head, &rcuhead_debug_descr);
297 return 0; 273 return 0;
298 274
299 case ODEBUG_STATE_ACTIVE: 275 case ODEBUG_STATE_ACTIVE:
300 /* 276 /*
301 * Ensure that queued callbacks are all executed. 277 * Ensure that queued callbacks are all executed.
302 * If we detect that we are nested in a RCU read-side critical 278 * If we detect that we are nested in a RCU read-side critical
303 * section, we should simply fail, otherwise we would deadlock. 279 * section, we should simply fail, otherwise we would deadlock.
304 * In !PREEMPT configurations, there is no way to tell if we are 280 * In !PREEMPT configurations, there is no way to tell if we are
305 * in a RCU read-side critical section or not, so we never 281 * in a RCU read-side critical section or not, so we never
306 * attempt any fixup and just print a warning. 282 * attempt any fixup and just print a warning.
307 */ 283 */
308 #ifndef CONFIG_PREEMPT 284 #ifndef CONFIG_PREEMPT
309 WARN_ON_ONCE(1); 285 WARN_ON_ONCE(1);
310 return 0; 286 return 0;
311 #endif 287 #endif
312 if (rcu_preempt_depth() != 0 || preempt_count() != 0 || 288 if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
313 irqs_disabled()) { 289 irqs_disabled()) {
314 WARN_ON_ONCE(1); 290 WARN_ON_ONCE(1);
315 return 0; 291 return 0;
316 } 292 }
317 rcu_barrier(); 293 rcu_barrier();
318 rcu_barrier_sched(); 294 rcu_barrier_sched();
319 rcu_barrier_bh(); 295 rcu_barrier_bh();
320 debug_object_activate(head, &rcuhead_debug_descr); 296 debug_object_activate(head, &rcuhead_debug_descr);
321 return 1; 297 return 1;
322 default: 298 default:
323 return 0; 299 return 0;
324 } 300 }
325 } 301 }
326 302
327 /* 303 /*
328 * fixup_free is called when: 304 * fixup_free is called when:
329 * - an active object is freed 305 * - an active object is freed
330 */ 306 */
331 static int rcuhead_fixup_free(void *addr, enum debug_obj_state state) 307 static int rcuhead_fixup_free(void *addr, enum debug_obj_state state)
332 { 308 {
333 struct rcu_head *head = addr; 309 struct rcu_head *head = addr;
334 310
335 switch (state) { 311 switch (state) {
336 case ODEBUG_STATE_ACTIVE: 312 case ODEBUG_STATE_ACTIVE:
337 /* 313 /*
338 * Ensure that queued callbacks are all executed. 314 * Ensure that queued callbacks are all executed.
339 * If we detect that we are nested in a RCU read-side critical 315 * If we detect that we are nested in a RCU read-side critical
340 * section, we should simply fail, otherwise we would deadlock. 316 * section, we should simply fail, otherwise we would deadlock.
341 * In !PREEMPT configurations, there is no way to tell if we are 317 * In !PREEMPT configurations, there is no way to tell if we are
342 * in a RCU read-side critical section or not, so we never 318 * in a RCU read-side critical section or not, so we never
343 * attempt any fixup and just print a warning. 319 * attempt any fixup and just print a warning.
344 */ 320 */
345 #ifndef CONFIG_PREEMPT 321 #ifndef CONFIG_PREEMPT
346 WARN_ON_ONCE(1); 322 WARN_ON_ONCE(1);
347 return 0; 323 return 0;
348 #endif 324 #endif
349 if (rcu_preempt_depth() != 0 || preempt_count() != 0 || 325 if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
350 irqs_disabled()) { 326 irqs_disabled()) {
351 WARN_ON_ONCE(1); 327 WARN_ON_ONCE(1);
352 return 0; 328 return 0;
353 } 329 }
354 rcu_barrier(); 330 rcu_barrier();
355 rcu_barrier_sched(); 331 rcu_barrier_sched();
356 rcu_barrier_bh(); 332 rcu_barrier_bh();
357 debug_object_free(head, &rcuhead_debug_descr); 333 debug_object_free(head, &rcuhead_debug_descr);
358 return 1; 334 return 1;
359 default: 335 default:
360 return 0; 336 return 0;
361 } 337 }
362 } 338 }
363 339
364 /** 340 /**
365 * init_rcu_head_on_stack() - initialize on-stack rcu_head for debugobjects 341 * init_rcu_head_on_stack() - initialize on-stack rcu_head for debugobjects
366 * @head: pointer to rcu_head structure to be initialized 342 * @head: pointer to rcu_head structure to be initialized
367 * 343 *
368 * This function informs debugobjects of a new rcu_head structure that 344 * This function informs debugobjects of a new rcu_head structure that
369 * has been allocated as an auto variable on the stack. This function 345 * has been allocated as an auto variable on the stack. This function
370 * is not required for rcu_head structures that are statically defined or 346 * is not required for rcu_head structures that are statically defined or
371 * that are dynamically allocated on the heap. This function has no 347 * that are dynamically allocated on the heap. This function has no
372 * effect for !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds. 348 * effect for !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds.
373 */ 349 */
374 void init_rcu_head_on_stack(struct rcu_head *head) 350 void init_rcu_head_on_stack(struct rcu_head *head)
375 { 351 {
376 debug_object_init_on_stack(head, &rcuhead_debug_descr); 352 debug_object_init_on_stack(head, &rcuhead_debug_descr);
377 } 353 }
378 EXPORT_SYMBOL_GPL(init_rcu_head_on_stack); 354 EXPORT_SYMBOL_GPL(init_rcu_head_on_stack);
379 355
380 /** 356 /**
381 * destroy_rcu_head_on_stack() - destroy on-stack rcu_head for debugobjects 357 * destroy_rcu_head_on_stack() - destroy on-stack rcu_head for debugobjects
382 * @head: pointer to rcu_head structure to be initialized 358 * @head: pointer to rcu_head structure to be initialized
383 * 359 *
384 * This function informs debugobjects that an on-stack rcu_head structure 360 * This function informs debugobjects that an on-stack rcu_head structure
385 * is about to go out of scope. As with init_rcu_head_on_stack(), this 361 * is about to go out of scope. As with init_rcu_head_on_stack(), this
386 * function is not required for rcu_head structures that are statically 362 * function is not required for rcu_head structures that are statically
387 * defined or that are dynamically allocated on the heap. Also as with 363 * defined or that are dynamically allocated on the heap. Also as with
388 * init_rcu_head_on_stack(), this function has no effect for 364 * init_rcu_head_on_stack(), this function has no effect for
389 * !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds. 365 * !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds.
390 */ 366 */
391 void destroy_rcu_head_on_stack(struct rcu_head *head) 367 void destroy_rcu_head_on_stack(struct rcu_head *head)
392 { 368 {
393 debug_object_free(head, &rcuhead_debug_descr); 369 debug_object_free(head, &rcuhead_debug_descr);
394 } 370 }
395 EXPORT_SYMBOL_GPL(destroy_rcu_head_on_stack); 371 EXPORT_SYMBOL_GPL(destroy_rcu_head_on_stack);
396 372
397 struct debug_obj_descr rcuhead_debug_descr = { 373 struct debug_obj_descr rcuhead_debug_descr = {
398 .name = "rcu_head", 374 .name = "rcu_head",
399 .fixup_init = rcuhead_fixup_init, 375 .fixup_init = rcuhead_fixup_init,
400 .fixup_activate = rcuhead_fixup_activate, 376 .fixup_activate = rcuhead_fixup_activate,
401 .fixup_free = rcuhead_fixup_free, 377 .fixup_free = rcuhead_fixup_free,
402 }; 378 };
403 EXPORT_SYMBOL_GPL(rcuhead_debug_descr); 379 EXPORT_SYMBOL_GPL(rcuhead_debug_descr);
404 #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 380 #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
405 381
406 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE) 382 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE)
407 void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp, 383 void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp,
408 unsigned long secs, 384 unsigned long secs,
409 unsigned long c_old, unsigned long c) 385 unsigned long c_old, unsigned long c)
410 { 386 {
411 trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c); 387 trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c);
412 } 388 }
413 EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read); 389 EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
414 #else 390 #else
415 #define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \ 391 #define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
416 do { } while (0) 392 do { } while (0)
417 #endif 393 #endif
418 394
419 #ifdef CONFIG_RCU_STALL_COMMON 395 #ifdef CONFIG_RCU_STALL_COMMON
420 396
421 #ifdef CONFIG_PROVE_RCU 397 #ifdef CONFIG_PROVE_RCU
422 #define RCU_STALL_DELAY_DELTA (5 * HZ) 398 #define RCU_STALL_DELAY_DELTA (5 * HZ)
423 #else 399 #else
424 #define RCU_STALL_DELAY_DELTA 0 400 #define RCU_STALL_DELAY_DELTA 0
425 #endif 401 #endif
426 402
427 int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */ 403 int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
428 int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; 404 int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
429 405
430 module_param(rcu_cpu_stall_suppress, int, 0644); 406 module_param(rcu_cpu_stall_suppress, int, 0644);
431 module_param(rcu_cpu_stall_timeout, int, 0644); 407 module_param(rcu_cpu_stall_timeout, int, 0644);
432 408
433 int rcu_jiffies_till_stall_check(void) 409 int rcu_jiffies_till_stall_check(void)
434 { 410 {
435 int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout); 411 int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
436 412
437 /* 413 /*
438 * Limit check must be consistent with the Kconfig limits 414 * Limit check must be consistent with the Kconfig limits
439 * for CONFIG_RCU_CPU_STALL_TIMEOUT. 415 * for CONFIG_RCU_CPU_STALL_TIMEOUT.
440 */ 416 */
441 if (till_stall_check < 3) { 417 if (till_stall_check < 3) {
442 ACCESS_ONCE(rcu_cpu_stall_timeout) = 3; 418 ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
443 till_stall_check = 3; 419 till_stall_check = 3;
444 } else if (till_stall_check > 300) { 420 } else if (till_stall_check > 300) {
445 ACCESS_ONCE(rcu_cpu_stall_timeout) = 300; 421 ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
446 till_stall_check = 300; 422 till_stall_check = 300;
447 } 423 }
448 return till_stall_check * HZ + RCU_STALL_DELAY_DELTA; 424 return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
449 } 425 }
450 426
451 static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) 427 static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
452 { 428 {
453 rcu_cpu_stall_suppress = 1; 429 rcu_cpu_stall_suppress = 1;
454 return NOTIFY_DONE; 430 return NOTIFY_DONE;
455 } 431 }
456 432
457 static struct notifier_block rcu_panic_block = { 433 static struct notifier_block rcu_panic_block = {
458 .notifier_call = rcu_panic, 434 .notifier_call = rcu_panic,
459 }; 435 };
460 436
461 static int __init check_cpu_stall_init(void) 437 static int __init check_cpu_stall_init(void)
462 { 438 {
463 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); 439 atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
464 return 0; 440 return 0;
465 } 441 }
466 early_initcall(check_cpu_stall_init); 442 early_initcall(check_cpu_stall_init);
467 443
468 #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ 444 #endif /* #ifdef CONFIG_RCU_STALL_COMMON */
469 445
kernel/rcutree_plugin.h
1 /* 1 /*
2 * Read-Copy Update mechanism for mutual exclusion (tree-based version) 2 * Read-Copy Update mechanism for mutual exclusion (tree-based version)
3 * Internal non-public definitions that provide either classic 3 * Internal non-public definitions that provide either classic
4 * or preemptible semantics. 4 * or preemptible semantics.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or 8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version. 9 * (at your option) any later version.
10 * 10 *
11 * This program is distributed in the hope that it will be useful, 11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details. 14 * GNU General Public License for more details.
15 * 15 *
16 * You should have received a copy of the GNU General Public License 16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software 17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 * 19 *
20 * Copyright Red Hat, 2009 20 * Copyright Red Hat, 2009
21 * Copyright IBM Corporation, 2009 21 * Copyright IBM Corporation, 2009
22 * 22 *
23 * Author: Ingo Molnar <mingo@elte.hu> 23 * Author: Ingo Molnar <mingo@elte.hu>
24 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> 24 * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
25 */ 25 */
26 26
27 #include <linux/delay.h> 27 #include <linux/delay.h>
28 #include <linux/gfp.h> 28 #include <linux/gfp.h>
29 #include <linux/oom.h> 29 #include <linux/oom.h>
30 #include <linux/smpboot.h> 30 #include <linux/smpboot.h>
31 #include <linux/tick.h> 31 #include <linux/tick.h>
32 32
33 #define RCU_KTHREAD_PRIO 1 33 #define RCU_KTHREAD_PRIO 1
34 34
35 #ifdef CONFIG_RCU_BOOST 35 #ifdef CONFIG_RCU_BOOST
36 #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO 36 #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
37 #else 37 #else
38 #define RCU_BOOST_PRIO RCU_KTHREAD_PRIO 38 #define RCU_BOOST_PRIO RCU_KTHREAD_PRIO
39 #endif 39 #endif
40 40
41 #ifdef CONFIG_RCU_NOCB_CPU 41 #ifdef CONFIG_RCU_NOCB_CPU
42 static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ 42 static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
43 static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ 43 static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */
44 static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */ 44 static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */
45 static char __initdata nocb_buf[NR_CPUS * 5]; 45 static char __initdata nocb_buf[NR_CPUS * 5];
46 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ 46 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
47 47
48 /* 48 /*
49 * Check the RCU kernel configuration parameters and print informative 49 * Check the RCU kernel configuration parameters and print informative
50 * messages about anything out of the ordinary. If you like #ifdef, you 50 * messages about anything out of the ordinary. If you like #ifdef, you
51 * will love this function. 51 * will love this function.
52 */ 52 */
53 static void __init rcu_bootup_announce_oddness(void) 53 static void __init rcu_bootup_announce_oddness(void)
54 { 54 {
55 #ifdef CONFIG_RCU_TRACE 55 #ifdef CONFIG_RCU_TRACE
56 pr_info("\tRCU debugfs-based tracing is enabled.\n"); 56 pr_info("\tRCU debugfs-based tracing is enabled.\n");
57 #endif 57 #endif
58 #if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32) 58 #if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)
59 pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n", 59 pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
60 CONFIG_RCU_FANOUT); 60 CONFIG_RCU_FANOUT);
61 #endif 61 #endif
62 #ifdef CONFIG_RCU_FANOUT_EXACT 62 #ifdef CONFIG_RCU_FANOUT_EXACT
63 pr_info("\tHierarchical RCU autobalancing is disabled.\n"); 63 pr_info("\tHierarchical RCU autobalancing is disabled.\n");
64 #endif 64 #endif
65 #ifdef CONFIG_RCU_FAST_NO_HZ 65 #ifdef CONFIG_RCU_FAST_NO_HZ
66 pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n"); 66 pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
67 #endif 67 #endif
68 #ifdef CONFIG_PROVE_RCU 68 #ifdef CONFIG_PROVE_RCU
69 pr_info("\tRCU lockdep checking is enabled.\n"); 69 pr_info("\tRCU lockdep checking is enabled.\n");
70 #endif 70 #endif
71 #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE 71 #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE
72 pr_info("\tRCU torture testing starts during boot.\n"); 72 pr_info("\tRCU torture testing starts during boot.\n");
73 #endif 73 #endif
74 #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) 74 #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE)
75 pr_info("\tDump stacks of tasks blocking RCU-preempt GP.\n"); 75 pr_info("\tDump stacks of tasks blocking RCU-preempt GP.\n");
76 #endif 76 #endif
77 #if defined(CONFIG_RCU_CPU_STALL_INFO) 77 #if defined(CONFIG_RCU_CPU_STALL_INFO)
78 pr_info("\tAdditional per-CPU info printed with stalls.\n"); 78 pr_info("\tAdditional per-CPU info printed with stalls.\n");
79 #endif 79 #endif
80 #if NUM_RCU_LVL_4 != 0 80 #if NUM_RCU_LVL_4 != 0
81 pr_info("\tFour-level hierarchy is enabled.\n"); 81 pr_info("\tFour-level hierarchy is enabled.\n");
82 #endif 82 #endif
83 if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) 83 if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
84 pr_info("\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); 84 pr_info("\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
85 if (nr_cpu_ids != NR_CPUS) 85 if (nr_cpu_ids != NR_CPUS)
86 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); 86 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
87 #ifdef CONFIG_RCU_NOCB_CPU 87 #ifdef CONFIG_RCU_NOCB_CPU
88 #ifndef CONFIG_RCU_NOCB_CPU_NONE 88 #ifndef CONFIG_RCU_NOCB_CPU_NONE
89 if (!have_rcu_nocb_mask) { 89 if (!have_rcu_nocb_mask) {
90 zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL); 90 zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL);
91 have_rcu_nocb_mask = true; 91 have_rcu_nocb_mask = true;
92 } 92 }
93 #ifdef CONFIG_RCU_NOCB_CPU_ZERO 93 #ifdef CONFIG_RCU_NOCB_CPU_ZERO
94 pr_info("\tExperimental no-CBs CPU 0\n"); 94 pr_info("\tExperimental no-CBs CPU 0\n");
95 cpumask_set_cpu(0, rcu_nocb_mask); 95 cpumask_set_cpu(0, rcu_nocb_mask);
96 #endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */ 96 #endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
97 #ifdef CONFIG_RCU_NOCB_CPU_ALL 97 #ifdef CONFIG_RCU_NOCB_CPU_ALL
98 pr_info("\tExperimental no-CBs for all CPUs\n"); 98 pr_info("\tExperimental no-CBs for all CPUs\n");
99 cpumask_setall(rcu_nocb_mask); 99 cpumask_setall(rcu_nocb_mask);
100 #endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */ 100 #endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
101 #endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */ 101 #endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
102 if (have_rcu_nocb_mask) { 102 if (have_rcu_nocb_mask) {
103 cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); 103 cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
104 pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf); 104 pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf);
105 if (rcu_nocb_poll) 105 if (rcu_nocb_poll)
106 pr_info("\tExperimental polled no-CBs CPUs.\n"); 106 pr_info("\tExperimental polled no-CBs CPUs.\n");
107 } 107 }
108 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ 108 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
109 } 109 }
110 110
111 #ifdef CONFIG_TREE_PREEMPT_RCU 111 #ifdef CONFIG_TREE_PREEMPT_RCU
112 112
113 struct rcu_state rcu_preempt_state = 113 struct rcu_state rcu_preempt_state =
114 RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu); 114 RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
115 DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); 115 DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
116 static struct rcu_state *rcu_state = &rcu_preempt_state; 116 static struct rcu_state *rcu_state = &rcu_preempt_state;
117 117
118 static int rcu_preempted_readers_exp(struct rcu_node *rnp); 118 static int rcu_preempted_readers_exp(struct rcu_node *rnp);
119 119
120 /* 120 /*
121 * Tell them what RCU they are running. 121 * Tell them what RCU they are running.
122 */ 122 */
123 static void __init rcu_bootup_announce(void) 123 static void __init rcu_bootup_announce(void)
124 { 124 {
125 pr_info("Preemptible hierarchical RCU implementation.\n"); 125 pr_info("Preemptible hierarchical RCU implementation.\n");
126 rcu_bootup_announce_oddness(); 126 rcu_bootup_announce_oddness();
127 } 127 }
128 128
129 /* 129 /*
130 * Return the number of RCU-preempt batches processed thus far 130 * Return the number of RCU-preempt batches processed thus far
131 * for debug and statistics. 131 * for debug and statistics.
132 */ 132 */
133 long rcu_batches_completed_preempt(void) 133 long rcu_batches_completed_preempt(void)
134 { 134 {
135 return rcu_preempt_state.completed; 135 return rcu_preempt_state.completed;
136 } 136 }
137 EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt); 137 EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
138 138
139 /* 139 /*
140 * Return the number of RCU batches processed thus far for debug & stats. 140 * Return the number of RCU batches processed thus far for debug & stats.
141 */ 141 */
142 long rcu_batches_completed(void) 142 long rcu_batches_completed(void)
143 { 143 {
144 return rcu_batches_completed_preempt(); 144 return rcu_batches_completed_preempt();
145 } 145 }
146 EXPORT_SYMBOL_GPL(rcu_batches_completed); 146 EXPORT_SYMBOL_GPL(rcu_batches_completed);
147 147
148 /* 148 /*
149 * Force a quiescent state for preemptible RCU. 149 * Force a quiescent state for preemptible RCU.
150 */ 150 */
151 void rcu_force_quiescent_state(void) 151 void rcu_force_quiescent_state(void)
152 { 152 {
153 force_quiescent_state(&rcu_preempt_state); 153 force_quiescent_state(&rcu_preempt_state);
154 } 154 }
155 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 155 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
156 156
157 /* 157 /*
158 * Record a preemptible-RCU quiescent state for the specified CPU. Note 158 * Record a preemptible-RCU quiescent state for the specified CPU. Note
159 * that this just means that the task currently running on the CPU is 159 * that this just means that the task currently running on the CPU is
160 * not in a quiescent state. There might be any number of tasks blocked 160 * not in a quiescent state. There might be any number of tasks blocked
161 * while in an RCU read-side critical section. 161 * while in an RCU read-side critical section.
162 * 162 *
163 * Unlike the other rcu_*_qs() functions, callers to this function 163 * Unlike the other rcu_*_qs() functions, callers to this function
164 * must disable irqs in order to protect the assignment to 164 * must disable irqs in order to protect the assignment to
165 * ->rcu_read_unlock_special. 165 * ->rcu_read_unlock_special.
166 */ 166 */
167 static void rcu_preempt_qs(int cpu) 167 static void rcu_preempt_qs(int cpu)
168 { 168 {
169 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 169 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
170 170
171 if (rdp->passed_quiesce == 0) 171 if (rdp->passed_quiesce == 0)
172 trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs"); 172 trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs");
173 rdp->passed_quiesce = 1; 173 rdp->passed_quiesce = 1;
174 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 174 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
175 } 175 }
176 176
177 /* 177 /*
178 * We have entered the scheduler, and the current task might soon be 178 * We have entered the scheduler, and the current task might soon be
179 * context-switched away from. If this task is in an RCU read-side 179 * context-switched away from. If this task is in an RCU read-side
180 * critical section, we will no longer be able to rely on the CPU to 180 * critical section, we will no longer be able to rely on the CPU to
181 * record that fact, so we enqueue the task on the blkd_tasks list. 181 * record that fact, so we enqueue the task on the blkd_tasks list.
182 * The task will dequeue itself when it exits the outermost enclosing 182 * The task will dequeue itself when it exits the outermost enclosing
183 * RCU read-side critical section. Therefore, the current grace period 183 * RCU read-side critical section. Therefore, the current grace period
184 * cannot be permitted to complete until the blkd_tasks list entries 184 * cannot be permitted to complete until the blkd_tasks list entries
185 * predating the current grace period drain, in other words, until 185 * predating the current grace period drain, in other words, until
186 * rnp->gp_tasks becomes NULL. 186 * rnp->gp_tasks becomes NULL.
187 * 187 *
188 * Caller must disable preemption. 188 * Caller must disable preemption.
189 */ 189 */
190 static void rcu_preempt_note_context_switch(int cpu) 190 static void rcu_preempt_note_context_switch(int cpu)
191 { 191 {
192 struct task_struct *t = current; 192 struct task_struct *t = current;
193 unsigned long flags; 193 unsigned long flags;
194 struct rcu_data *rdp; 194 struct rcu_data *rdp;
195 struct rcu_node *rnp; 195 struct rcu_node *rnp;
196 196
197 if (t->rcu_read_lock_nesting > 0 && 197 if (t->rcu_read_lock_nesting > 0 &&
198 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { 198 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
199 199
200 /* Possibly blocking in an RCU read-side critical section. */ 200 /* Possibly blocking in an RCU read-side critical section. */
201 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); 201 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
202 rnp = rdp->mynode; 202 rnp = rdp->mynode;
203 raw_spin_lock_irqsave(&rnp->lock, flags); 203 raw_spin_lock_irqsave(&rnp->lock, flags);
204 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 204 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
205 t->rcu_blocked_node = rnp; 205 t->rcu_blocked_node = rnp;
206 206
207 /* 207 /*
208 * If this CPU has already checked in, then this task 208 * If this CPU has already checked in, then this task
209 * will hold up the next grace period rather than the 209 * will hold up the next grace period rather than the
210 * current grace period. Queue the task accordingly. 210 * current grace period. Queue the task accordingly.
211 * If the task is queued for the current grace period 211 * If the task is queued for the current grace period
212 * (i.e., this CPU has not yet passed through a quiescent 212 * (i.e., this CPU has not yet passed through a quiescent
213 * state for the current grace period), then as long 213 * state for the current grace period), then as long
214 * as that task remains queued, the current grace period 214 * as that task remains queued, the current grace period
215 * cannot end. Note that there is some uncertainty as 215 * cannot end. Note that there is some uncertainty as
216 * to exactly when the current grace period started. 216 * to exactly when the current grace period started.
217 * We take a conservative approach, which can result 217 * We take a conservative approach, which can result
218 * in unnecessarily waiting on tasks that started very 218 * in unnecessarily waiting on tasks that started very
219 * slightly after the current grace period began. C'est 219 * slightly after the current grace period began. C'est
220 * la vie!!! 220 * la vie!!!
221 * 221 *
222 * But first, note that the current CPU must still be 222 * But first, note that the current CPU must still be
223 * on line! 223 * on line!
224 */ 224 */
225 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); 225 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
226 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); 226 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
227 if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) { 227 if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
228 list_add(&t->rcu_node_entry, rnp->gp_tasks->prev); 228 list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
229 rnp->gp_tasks = &t->rcu_node_entry; 229 rnp->gp_tasks = &t->rcu_node_entry;
230 #ifdef CONFIG_RCU_BOOST 230 #ifdef CONFIG_RCU_BOOST
231 if (rnp->boost_tasks != NULL) 231 if (rnp->boost_tasks != NULL)
232 rnp->boost_tasks = rnp->gp_tasks; 232 rnp->boost_tasks = rnp->gp_tasks;
233 #endif /* #ifdef CONFIG_RCU_BOOST */ 233 #endif /* #ifdef CONFIG_RCU_BOOST */
234 } else { 234 } else {
235 list_add(&t->rcu_node_entry, &rnp->blkd_tasks); 235 list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
236 if (rnp->qsmask & rdp->grpmask) 236 if (rnp->qsmask & rdp->grpmask)
237 rnp->gp_tasks = &t->rcu_node_entry; 237 rnp->gp_tasks = &t->rcu_node_entry;
238 } 238 }
239 trace_rcu_preempt_task(rdp->rsp->name, 239 trace_rcu_preempt_task(rdp->rsp->name,
240 t->pid, 240 t->pid,
241 (rnp->qsmask & rdp->grpmask) 241 (rnp->qsmask & rdp->grpmask)
242 ? rnp->gpnum 242 ? rnp->gpnum
243 : rnp->gpnum + 1); 243 : rnp->gpnum + 1);
244 raw_spin_unlock_irqrestore(&rnp->lock, flags); 244 raw_spin_unlock_irqrestore(&rnp->lock, flags);
245 } else if (t->rcu_read_lock_nesting < 0 && 245 } else if (t->rcu_read_lock_nesting < 0 &&
246 t->rcu_read_unlock_special) { 246 t->rcu_read_unlock_special) {
247 247
248 /* 248 /*
249 * Complete exit from RCU read-side critical section on 249 * Complete exit from RCU read-side critical section on
250 * behalf of preempted instance of __rcu_read_unlock(). 250 * behalf of preempted instance of __rcu_read_unlock().
251 */ 251 */
252 rcu_read_unlock_special(t); 252 rcu_read_unlock_special(t);
253 } 253 }
254 254
255 /* 255 /*
256 * Either we were not in an RCU read-side critical section to 256 * Either we were not in an RCU read-side critical section to
257 * begin with, or we have now recorded that critical section 257 * begin with, or we have now recorded that critical section
258 * globally. Either way, we can now note a quiescent state 258 * globally. Either way, we can now note a quiescent state
259 * for this CPU. Again, if we were in an RCU read-side critical 259 * for this CPU. Again, if we were in an RCU read-side critical
260 * section, and if that critical section was blocking the current 260 * section, and if that critical section was blocking the current
261 * grace period, then the fact that the task has been enqueued 261 * grace period, then the fact that the task has been enqueued
262 * means that we continue to block the current grace period. 262 * means that we continue to block the current grace period.
263 */ 263 */
264 local_irq_save(flags); 264 local_irq_save(flags);
265 rcu_preempt_qs(cpu); 265 rcu_preempt_qs(cpu);
266 local_irq_restore(flags); 266 local_irq_restore(flags);
267 } 267 }
268 268
269 /* 269 /*
270 * Check for preempted RCU readers blocking the current grace period 270 * Check for preempted RCU readers blocking the current grace period
271 * for the specified rcu_node structure. If the caller needs a reliable 271 * for the specified rcu_node structure. If the caller needs a reliable
272 * answer, it must hold the rcu_node's ->lock. 272 * answer, it must hold the rcu_node's ->lock.
273 */ 273 */
274 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) 274 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
275 { 275 {
276 return rnp->gp_tasks != NULL; 276 return rnp->gp_tasks != NULL;
277 } 277 }
278 278
279 /* 279 /*
280 * Record a quiescent state for all tasks that were previously queued 280 * Record a quiescent state for all tasks that were previously queued
281 * on the specified rcu_node structure and that were blocking the current 281 * on the specified rcu_node structure and that were blocking the current
282 * RCU grace period. The caller must hold the specified rnp->lock with 282 * RCU grace period. The caller must hold the specified rnp->lock with
283 * irqs disabled, and this lock is released upon return, but irqs remain 283 * irqs disabled, and this lock is released upon return, but irqs remain
284 * disabled. 284 * disabled.
285 */ 285 */
286 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) 286 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
287 __releases(rnp->lock) 287 __releases(rnp->lock)
288 { 288 {
289 unsigned long mask; 289 unsigned long mask;
290 struct rcu_node *rnp_p; 290 struct rcu_node *rnp_p;
291 291
292 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { 292 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
293 raw_spin_unlock_irqrestore(&rnp->lock, flags); 293 raw_spin_unlock_irqrestore(&rnp->lock, flags);
294 return; /* Still need more quiescent states! */ 294 return; /* Still need more quiescent states! */
295 } 295 }
296 296
297 rnp_p = rnp->parent; 297 rnp_p = rnp->parent;
298 if (rnp_p == NULL) { 298 if (rnp_p == NULL) {
299 /* 299 /*
300 * Either there is only one rcu_node in the tree, 300 * Either there is only one rcu_node in the tree,
301 * or tasks were kicked up to root rcu_node due to 301 * or tasks were kicked up to root rcu_node due to
302 * CPUs going offline. 302 * CPUs going offline.
303 */ 303 */
304 rcu_report_qs_rsp(&rcu_preempt_state, flags); 304 rcu_report_qs_rsp(&rcu_preempt_state, flags);
305 return; 305 return;
306 } 306 }
307 307
308 /* Report up the rest of the hierarchy. */ 308 /* Report up the rest of the hierarchy. */
309 mask = rnp->grpmask; 309 mask = rnp->grpmask;
310 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 310 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
311 raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */ 311 raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */
312 rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags); 312 rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
313 } 313 }
314 314
315 /* 315 /*
316 * Advance a ->blkd_tasks-list pointer to the next entry, instead 316 * Advance a ->blkd_tasks-list pointer to the next entry, instead
317 * returning NULL if at the end of the list. 317 * returning NULL if at the end of the list.
318 */ 318 */
319 static struct list_head *rcu_next_node_entry(struct task_struct *t, 319 static struct list_head *rcu_next_node_entry(struct task_struct *t,
320 struct rcu_node *rnp) 320 struct rcu_node *rnp)
321 { 321 {
322 struct list_head *np; 322 struct list_head *np;
323 323
324 np = t->rcu_node_entry.next; 324 np = t->rcu_node_entry.next;
325 if (np == &rnp->blkd_tasks) 325 if (np == &rnp->blkd_tasks)
326 np = NULL; 326 np = NULL;
327 return np; 327 return np;
328 } 328 }
329 329
330 /* 330 /*
331 * Handle special cases during rcu_read_unlock(), such as needing to 331 * Handle special cases during rcu_read_unlock(), such as needing to
332 * notify RCU core processing or task having blocked during the RCU 332 * notify RCU core processing or task having blocked during the RCU
333 * read-side critical section. 333 * read-side critical section.
334 */ 334 */
335 void rcu_read_unlock_special(struct task_struct *t) 335 void rcu_read_unlock_special(struct task_struct *t)
336 { 336 {
337 int empty; 337 int empty;
338 int empty_exp; 338 int empty_exp;
339 int empty_exp_now; 339 int empty_exp_now;
340 unsigned long flags; 340 unsigned long flags;
341 struct list_head *np; 341 struct list_head *np;
342 #ifdef CONFIG_RCU_BOOST 342 #ifdef CONFIG_RCU_BOOST
343 struct rt_mutex *rbmp = NULL; 343 struct rt_mutex *rbmp = NULL;
344 #endif /* #ifdef CONFIG_RCU_BOOST */ 344 #endif /* #ifdef CONFIG_RCU_BOOST */
345 struct rcu_node *rnp; 345 struct rcu_node *rnp;
346 int special; 346 int special;
347 347
348 /* NMI handlers cannot block and cannot safely manipulate state. */ 348 /* NMI handlers cannot block and cannot safely manipulate state. */
349 if (in_nmi()) 349 if (in_nmi())
350 return; 350 return;
351 351
352 local_irq_save(flags); 352 local_irq_save(flags);
353 353
354 /* 354 /*
355 * If RCU core is waiting for this CPU to exit critical section, 355 * If RCU core is waiting for this CPU to exit critical section,
356 * let it know that we have done so. 356 * let it know that we have done so.
357 */ 357 */
358 special = t->rcu_read_unlock_special; 358 special = t->rcu_read_unlock_special;
359 if (special & RCU_READ_UNLOCK_NEED_QS) { 359 if (special & RCU_READ_UNLOCK_NEED_QS) {
360 rcu_preempt_qs(smp_processor_id()); 360 rcu_preempt_qs(smp_processor_id());
361 } 361 }
362 362
363 /* Hardware IRQ handlers cannot block. */ 363 /* Hardware IRQ handlers cannot block. */
364 if (in_irq() || in_serving_softirq()) { 364 if (in_irq() || in_serving_softirq()) {
365 local_irq_restore(flags); 365 local_irq_restore(flags);
366 return; 366 return;
367 } 367 }
368 368
369 /* Clean up if blocked during RCU read-side critical section. */ 369 /* Clean up if blocked during RCU read-side critical section. */
370 if (special & RCU_READ_UNLOCK_BLOCKED) { 370 if (special & RCU_READ_UNLOCK_BLOCKED) {
371 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; 371 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
372 372
373 /* 373 /*
374 * Remove this task from the list it blocked on. The 374 * Remove this task from the list it blocked on. The
375 * task can migrate while we acquire the lock, but at 375 * task can migrate while we acquire the lock, but at
376 * most one time. So at most two passes through loop. 376 * most one time. So at most two passes through loop.
377 */ 377 */
378 for (;;) { 378 for (;;) {
379 rnp = t->rcu_blocked_node; 379 rnp = t->rcu_blocked_node;
380 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 380 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
381 if (rnp == t->rcu_blocked_node) 381 if (rnp == t->rcu_blocked_node)
382 break; 382 break;
383 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 383 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
384 } 384 }
385 empty = !rcu_preempt_blocked_readers_cgp(rnp); 385 empty = !rcu_preempt_blocked_readers_cgp(rnp);
386 empty_exp = !rcu_preempted_readers_exp(rnp); 386 empty_exp = !rcu_preempted_readers_exp(rnp);
387 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ 387 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
388 np = rcu_next_node_entry(t, rnp); 388 np = rcu_next_node_entry(t, rnp);
389 list_del_init(&t->rcu_node_entry); 389 list_del_init(&t->rcu_node_entry);
390 t->rcu_blocked_node = NULL; 390 t->rcu_blocked_node = NULL;
391 trace_rcu_unlock_preempted_task("rcu_preempt", 391 trace_rcu_unlock_preempted_task("rcu_preempt",
392 rnp->gpnum, t->pid); 392 rnp->gpnum, t->pid);
393 if (&t->rcu_node_entry == rnp->gp_tasks) 393 if (&t->rcu_node_entry == rnp->gp_tasks)
394 rnp->gp_tasks = np; 394 rnp->gp_tasks = np;
395 if (&t->rcu_node_entry == rnp->exp_tasks) 395 if (&t->rcu_node_entry == rnp->exp_tasks)
396 rnp->exp_tasks = np; 396 rnp->exp_tasks = np;
397 #ifdef CONFIG_RCU_BOOST 397 #ifdef CONFIG_RCU_BOOST
398 if (&t->rcu_node_entry == rnp->boost_tasks) 398 if (&t->rcu_node_entry == rnp->boost_tasks)
399 rnp->boost_tasks = np; 399 rnp->boost_tasks = np;
400 /* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */ 400 /* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */
401 if (t->rcu_boost_mutex) { 401 if (t->rcu_boost_mutex) {
402 rbmp = t->rcu_boost_mutex; 402 rbmp = t->rcu_boost_mutex;
403 t->rcu_boost_mutex = NULL; 403 t->rcu_boost_mutex = NULL;
404 } 404 }
405 #endif /* #ifdef CONFIG_RCU_BOOST */ 405 #endif /* #ifdef CONFIG_RCU_BOOST */
406 406
407 /* 407 /*
408 * If this was the last task on the current list, and if 408 * If this was the last task on the current list, and if
409 * we aren't waiting on any CPUs, report the quiescent state. 409 * we aren't waiting on any CPUs, report the quiescent state.
410 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock, 410 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
411 * so we must take a snapshot of the expedited state. 411 * so we must take a snapshot of the expedited state.
412 */ 412 */
413 empty_exp_now = !rcu_preempted_readers_exp(rnp); 413 empty_exp_now = !rcu_preempted_readers_exp(rnp);
414 if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { 414 if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) {
415 trace_rcu_quiescent_state_report("preempt_rcu", 415 trace_rcu_quiescent_state_report("preempt_rcu",
416 rnp->gpnum, 416 rnp->gpnum,
417 0, rnp->qsmask, 417 0, rnp->qsmask,
418 rnp->level, 418 rnp->level,
419 rnp->grplo, 419 rnp->grplo,
420 rnp->grphi, 420 rnp->grphi,
421 !!rnp->gp_tasks); 421 !!rnp->gp_tasks);
422 rcu_report_unblock_qs_rnp(rnp, flags); 422 rcu_report_unblock_qs_rnp(rnp, flags);
423 } else { 423 } else {
424 raw_spin_unlock_irqrestore(&rnp->lock, flags); 424 raw_spin_unlock_irqrestore(&rnp->lock, flags);
425 } 425 }
426 426
427 #ifdef CONFIG_RCU_BOOST 427 #ifdef CONFIG_RCU_BOOST
428 /* Unboost if we were boosted. */ 428 /* Unboost if we were boosted. */
429 if (rbmp) 429 if (rbmp)
430 rt_mutex_unlock(rbmp); 430 rt_mutex_unlock(rbmp);
431 #endif /* #ifdef CONFIG_RCU_BOOST */ 431 #endif /* #ifdef CONFIG_RCU_BOOST */
432 432
433 /* 433 /*
434 * If this was the last task on the expedited lists, 434 * If this was the last task on the expedited lists,
435 * then we need to report up the rcu_node hierarchy. 435 * then we need to report up the rcu_node hierarchy.
436 */ 436 */
437 if (!empty_exp && empty_exp_now) 437 if (!empty_exp && empty_exp_now)
438 rcu_report_exp_rnp(&rcu_preempt_state, rnp, true); 438 rcu_report_exp_rnp(&rcu_preempt_state, rnp, true);
439 } else { 439 } else {
440 local_irq_restore(flags); 440 local_irq_restore(flags);
441 } 441 }
442 } 442 }
443 443
444 #ifdef CONFIG_RCU_CPU_STALL_VERBOSE 444 #ifdef CONFIG_RCU_CPU_STALL_VERBOSE
445 445
446 /* 446 /*
447 * Dump detailed information for all tasks blocking the current RCU 447 * Dump detailed information for all tasks blocking the current RCU
448 * grace period on the specified rcu_node structure. 448 * grace period on the specified rcu_node structure.
449 */ 449 */
450 static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) 450 static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
451 { 451 {
452 unsigned long flags; 452 unsigned long flags;
453 struct task_struct *t; 453 struct task_struct *t;
454 454
455 raw_spin_lock_irqsave(&rnp->lock, flags); 455 raw_spin_lock_irqsave(&rnp->lock, flags);
456 if (!rcu_preempt_blocked_readers_cgp(rnp)) { 456 if (!rcu_preempt_blocked_readers_cgp(rnp)) {
457 raw_spin_unlock_irqrestore(&rnp->lock, flags); 457 raw_spin_unlock_irqrestore(&rnp->lock, flags);
458 return; 458 return;
459 } 459 }
460 t = list_entry(rnp->gp_tasks, 460 t = list_entry(rnp->gp_tasks,
461 struct task_struct, rcu_node_entry); 461 struct task_struct, rcu_node_entry);
462 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) 462 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
463 sched_show_task(t); 463 sched_show_task(t);
464 raw_spin_unlock_irqrestore(&rnp->lock, flags); 464 raw_spin_unlock_irqrestore(&rnp->lock, flags);
465 } 465 }
466 466
467 /* 467 /*
468 * Dump detailed information for all tasks blocking the current RCU 468 * Dump detailed information for all tasks blocking the current RCU
469 * grace period. 469 * grace period.
470 */ 470 */
471 static void rcu_print_detail_task_stall(struct rcu_state *rsp) 471 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
472 { 472 {
473 struct rcu_node *rnp = rcu_get_root(rsp); 473 struct rcu_node *rnp = rcu_get_root(rsp);
474 474
475 rcu_print_detail_task_stall_rnp(rnp); 475 rcu_print_detail_task_stall_rnp(rnp);
476 rcu_for_each_leaf_node(rsp, rnp) 476 rcu_for_each_leaf_node(rsp, rnp)
477 rcu_print_detail_task_stall_rnp(rnp); 477 rcu_print_detail_task_stall_rnp(rnp);
478 } 478 }
479 479
480 #else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ 480 #else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
481 481
482 static void rcu_print_detail_task_stall(struct rcu_state *rsp) 482 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
483 { 483 {
484 } 484 }
485 485
486 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ 486 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
487 487
488 #ifdef CONFIG_RCU_CPU_STALL_INFO 488 #ifdef CONFIG_RCU_CPU_STALL_INFO
489 489
490 static void rcu_print_task_stall_begin(struct rcu_node *rnp) 490 static void rcu_print_task_stall_begin(struct rcu_node *rnp)
491 { 491 {
492 pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):", 492 pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
493 rnp->level, rnp->grplo, rnp->grphi); 493 rnp->level, rnp->grplo, rnp->grphi);
494 } 494 }
495 495
496 static void rcu_print_task_stall_end(void) 496 static void rcu_print_task_stall_end(void)
497 { 497 {
498 pr_cont("\n"); 498 pr_cont("\n");
499 } 499 }
500 500
501 #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ 501 #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
502 502
503 static void rcu_print_task_stall_begin(struct rcu_node *rnp) 503 static void rcu_print_task_stall_begin(struct rcu_node *rnp)
504 { 504 {
505 } 505 }
506 506
507 static void rcu_print_task_stall_end(void) 507 static void rcu_print_task_stall_end(void)
508 { 508 {
509 } 509 }
510 510
511 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */ 511 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
512 512
513 /* 513 /*
514 * Scan the current list of tasks blocked within RCU read-side critical 514 * Scan the current list of tasks blocked within RCU read-side critical
515 * sections, printing out the tid of each. 515 * sections, printing out the tid of each.
516 */ 516 */
517 static int rcu_print_task_stall(struct rcu_node *rnp) 517 static int rcu_print_task_stall(struct rcu_node *rnp)
518 { 518 {
519 struct task_struct *t; 519 struct task_struct *t;
520 int ndetected = 0; 520 int ndetected = 0;
521 521
522 if (!rcu_preempt_blocked_readers_cgp(rnp)) 522 if (!rcu_preempt_blocked_readers_cgp(rnp))
523 return 0; 523 return 0;
524 rcu_print_task_stall_begin(rnp); 524 rcu_print_task_stall_begin(rnp);
525 t = list_entry(rnp->gp_tasks, 525 t = list_entry(rnp->gp_tasks,
526 struct task_struct, rcu_node_entry); 526 struct task_struct, rcu_node_entry);
527 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { 527 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
528 pr_cont(" P%d", t->pid); 528 pr_cont(" P%d", t->pid);
529 ndetected++; 529 ndetected++;
530 } 530 }
531 rcu_print_task_stall_end(); 531 rcu_print_task_stall_end();
532 return ndetected; 532 return ndetected;
533 } 533 }
534 534
535 /* 535 /*
536 * Check that the list of blocked tasks for the newly completed grace 536 * Check that the list of blocked tasks for the newly completed grace
537 * period is in fact empty. It is a serious bug to complete a grace 537 * period is in fact empty. It is a serious bug to complete a grace
538 * period that still has RCU readers blocked! This function must be 538 * period that still has RCU readers blocked! This function must be
539 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock 539 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
540 * must be held by the caller. 540 * must be held by the caller.
541 * 541 *
542 * Also, if there are blocked tasks on the list, they automatically 542 * Also, if there are blocked tasks on the list, they automatically
543 * block the newly created grace period, so set up ->gp_tasks accordingly. 543 * block the newly created grace period, so set up ->gp_tasks accordingly.
544 */ 544 */
545 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 545 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
546 { 546 {
547 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); 547 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
548 if (!list_empty(&rnp->blkd_tasks)) 548 if (!list_empty(&rnp->blkd_tasks))
549 rnp->gp_tasks = rnp->blkd_tasks.next; 549 rnp->gp_tasks = rnp->blkd_tasks.next;
550 WARN_ON_ONCE(rnp->qsmask); 550 WARN_ON_ONCE(rnp->qsmask);
551 } 551 }
552 552
553 #ifdef CONFIG_HOTPLUG_CPU 553 #ifdef CONFIG_HOTPLUG_CPU
554 554
555 /* 555 /*
556 * Handle tasklist migration for case in which all CPUs covered by the 556 * Handle tasklist migration for case in which all CPUs covered by the
557 * specified rcu_node have gone offline. Move them up to the root 557 * specified rcu_node have gone offline. Move them up to the root
558 * rcu_node. The reason for not just moving them to the immediate 558 * rcu_node. The reason for not just moving them to the immediate
559 * parent is to remove the need for rcu_read_unlock_special() to 559 * parent is to remove the need for rcu_read_unlock_special() to
560 * make more than two attempts to acquire the target rcu_node's lock. 560 * make more than two attempts to acquire the target rcu_node's lock.
561 * Returns true if there were tasks blocking the current RCU grace 561 * Returns true if there were tasks blocking the current RCU grace
562 * period. 562 * period.
563 * 563 *
564 * Returns 1 if there was previously a task blocking the current grace 564 * Returns 1 if there was previously a task blocking the current grace
565 * period on the specified rcu_node structure. 565 * period on the specified rcu_node structure.
566 * 566 *
567 * The caller must hold rnp->lock with irqs disabled. 567 * The caller must hold rnp->lock with irqs disabled.
568 */ 568 */
569 static int rcu_preempt_offline_tasks(struct rcu_state *rsp, 569 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
570 struct rcu_node *rnp, 570 struct rcu_node *rnp,
571 struct rcu_data *rdp) 571 struct rcu_data *rdp)
572 { 572 {
573 struct list_head *lp; 573 struct list_head *lp;
574 struct list_head *lp_root; 574 struct list_head *lp_root;
575 int retval = 0; 575 int retval = 0;
576 struct rcu_node *rnp_root = rcu_get_root(rsp); 576 struct rcu_node *rnp_root = rcu_get_root(rsp);
577 struct task_struct *t; 577 struct task_struct *t;
578 578
579 if (rnp == rnp_root) { 579 if (rnp == rnp_root) {
580 WARN_ONCE(1, "Last CPU thought to be offlined?"); 580 WARN_ONCE(1, "Last CPU thought to be offlined?");
581 return 0; /* Shouldn't happen: at least one CPU online. */ 581 return 0; /* Shouldn't happen: at least one CPU online. */
582 } 582 }
583 583
584 /* If we are on an internal node, complain bitterly. */ 584 /* If we are on an internal node, complain bitterly. */
585 WARN_ON_ONCE(rnp != rdp->mynode); 585 WARN_ON_ONCE(rnp != rdp->mynode);
586 586
587 /* 587 /*
588 * Move tasks up to root rcu_node. Don't try to get fancy for 588 * Move tasks up to root rcu_node. Don't try to get fancy for
589 * this corner-case operation -- just put this node's tasks 589 * this corner-case operation -- just put this node's tasks
590 * at the head of the root node's list, and update the root node's 590 * at the head of the root node's list, and update the root node's
591 * ->gp_tasks and ->exp_tasks pointers to those of this node's, 591 * ->gp_tasks and ->exp_tasks pointers to those of this node's,
592 * if non-NULL. This might result in waiting for more tasks than 592 * if non-NULL. This might result in waiting for more tasks than
593 * absolutely necessary, but this is a good performance/complexity 593 * absolutely necessary, but this is a good performance/complexity
594 * tradeoff. 594 * tradeoff.
595 */ 595 */
596 if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0) 596 if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0)
597 retval |= RCU_OFL_TASKS_NORM_GP; 597 retval |= RCU_OFL_TASKS_NORM_GP;
598 if (rcu_preempted_readers_exp(rnp)) 598 if (rcu_preempted_readers_exp(rnp))
599 retval |= RCU_OFL_TASKS_EXP_GP; 599 retval |= RCU_OFL_TASKS_EXP_GP;
600 lp = &rnp->blkd_tasks; 600 lp = &rnp->blkd_tasks;
601 lp_root = &rnp_root->blkd_tasks; 601 lp_root = &rnp_root->blkd_tasks;
602 while (!list_empty(lp)) { 602 while (!list_empty(lp)) {
603 t = list_entry(lp->next, typeof(*t), rcu_node_entry); 603 t = list_entry(lp->next, typeof(*t), rcu_node_entry);
604 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ 604 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
605 list_del(&t->rcu_node_entry); 605 list_del(&t->rcu_node_entry);
606 t->rcu_blocked_node = rnp_root; 606 t->rcu_blocked_node = rnp_root;
607 list_add(&t->rcu_node_entry, lp_root); 607 list_add(&t->rcu_node_entry, lp_root);
608 if (&t->rcu_node_entry == rnp->gp_tasks) 608 if (&t->rcu_node_entry == rnp->gp_tasks)
609 rnp_root->gp_tasks = rnp->gp_tasks; 609 rnp_root->gp_tasks = rnp->gp_tasks;
610 if (&t->rcu_node_entry == rnp->exp_tasks) 610 if (&t->rcu_node_entry == rnp->exp_tasks)
611 rnp_root->exp_tasks = rnp->exp_tasks; 611 rnp_root->exp_tasks = rnp->exp_tasks;
612 #ifdef CONFIG_RCU_BOOST 612 #ifdef CONFIG_RCU_BOOST
613 if (&t->rcu_node_entry == rnp->boost_tasks) 613 if (&t->rcu_node_entry == rnp->boost_tasks)
614 rnp_root->boost_tasks = rnp->boost_tasks; 614 rnp_root->boost_tasks = rnp->boost_tasks;
615 #endif /* #ifdef CONFIG_RCU_BOOST */ 615 #endif /* #ifdef CONFIG_RCU_BOOST */
616 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ 616 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
617 } 617 }
618 618
619 rnp->gp_tasks = NULL; 619 rnp->gp_tasks = NULL;
620 rnp->exp_tasks = NULL; 620 rnp->exp_tasks = NULL;
621 #ifdef CONFIG_RCU_BOOST 621 #ifdef CONFIG_RCU_BOOST
622 rnp->boost_tasks = NULL; 622 rnp->boost_tasks = NULL;
623 /* 623 /*
624 * In case root is being boosted and leaf was not. Make sure 624 * In case root is being boosted and leaf was not. Make sure
625 * that we boost the tasks blocking the current grace period 625 * that we boost the tasks blocking the current grace period
626 * in this case. 626 * in this case.
627 */ 627 */
628 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ 628 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
629 if (rnp_root->boost_tasks != NULL && 629 if (rnp_root->boost_tasks != NULL &&
630 rnp_root->boost_tasks != rnp_root->gp_tasks && 630 rnp_root->boost_tasks != rnp_root->gp_tasks &&
631 rnp_root->boost_tasks != rnp_root->exp_tasks) 631 rnp_root->boost_tasks != rnp_root->exp_tasks)
632 rnp_root->boost_tasks = rnp_root->gp_tasks; 632 rnp_root->boost_tasks = rnp_root->gp_tasks;
633 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ 633 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
634 #endif /* #ifdef CONFIG_RCU_BOOST */ 634 #endif /* #ifdef CONFIG_RCU_BOOST */
635 635
636 return retval; 636 return retval;
637 } 637 }
638 638
639 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 639 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
640 640
641 /* 641 /*
642 * Check for a quiescent state from the current CPU. When a task blocks, 642 * Check for a quiescent state from the current CPU. When a task blocks,
643 * the task is recorded in the corresponding CPU's rcu_node structure, 643 * the task is recorded in the corresponding CPU's rcu_node structure,
644 * which is checked elsewhere. 644 * which is checked elsewhere.
645 * 645 *
646 * Caller must disable hard irqs. 646 * Caller must disable hard irqs.
647 */ 647 */
648 static void rcu_preempt_check_callbacks(int cpu) 648 static void rcu_preempt_check_callbacks(int cpu)
649 { 649 {
650 struct task_struct *t = current; 650 struct task_struct *t = current;
651 651
652 if (t->rcu_read_lock_nesting == 0) { 652 if (t->rcu_read_lock_nesting == 0) {
653 rcu_preempt_qs(cpu); 653 rcu_preempt_qs(cpu);
654 return; 654 return;
655 } 655 }
656 if (t->rcu_read_lock_nesting > 0 && 656 if (t->rcu_read_lock_nesting > 0 &&
657 per_cpu(rcu_preempt_data, cpu).qs_pending) 657 per_cpu(rcu_preempt_data, cpu).qs_pending)
658 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; 658 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
659 } 659 }
660 660
661 #ifdef CONFIG_RCU_BOOST 661 #ifdef CONFIG_RCU_BOOST
662 662
663 static void rcu_preempt_do_callbacks(void) 663 static void rcu_preempt_do_callbacks(void)
664 { 664 {
665 rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data)); 665 rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data));
666 } 666 }
667 667
668 #endif /* #ifdef CONFIG_RCU_BOOST */ 668 #endif /* #ifdef CONFIG_RCU_BOOST */
669 669
670 /* 670 /*
671 * Queue a preemptible-RCU callback for invocation after a grace period. 671 * Queue a preemptible-RCU callback for invocation after a grace period.
672 */ 672 */
673 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 673 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
674 { 674 {
675 __call_rcu(head, func, &rcu_preempt_state, -1, 0); 675 __call_rcu(head, func, &rcu_preempt_state, -1, 0);
676 } 676 }
677 EXPORT_SYMBOL_GPL(call_rcu); 677 EXPORT_SYMBOL_GPL(call_rcu);
678 678
679 /* 679 /*
680 * Queue an RCU callback for lazy invocation after a grace period. 680 * Queue an RCU callback for lazy invocation after a grace period.
681 * This will likely be later named something like "call_rcu_lazy()", 681 * This will likely be later named something like "call_rcu_lazy()",
682 * but this change will require some way of tagging the lazy RCU 682 * but this change will require some way of tagging the lazy RCU
683 * callbacks in the list of pending callbacks. Until then, this 683 * callbacks in the list of pending callbacks. Until then, this
684 * function may only be called from __kfree_rcu(). 684 * function may only be called from __kfree_rcu().
685 */ 685 */
686 void kfree_call_rcu(struct rcu_head *head, 686 void kfree_call_rcu(struct rcu_head *head,
687 void (*func)(struct rcu_head *rcu)) 687 void (*func)(struct rcu_head *rcu))
688 { 688 {
689 __call_rcu(head, func, &rcu_preempt_state, -1, 1); 689 __call_rcu(head, func, &rcu_preempt_state, -1, 1);
690 } 690 }
691 EXPORT_SYMBOL_GPL(kfree_call_rcu); 691 EXPORT_SYMBOL_GPL(kfree_call_rcu);
692 692
693 /** 693 /**
694 * synchronize_rcu - wait until a grace period has elapsed. 694 * synchronize_rcu - wait until a grace period has elapsed.
695 * 695 *
696 * Control will return to the caller some time after a full grace 696 * Control will return to the caller some time after a full grace
697 * period has elapsed, in other words after all currently executing RCU 697 * period has elapsed, in other words after all currently executing RCU
698 * read-side critical sections have completed. Note, however, that 698 * read-side critical sections have completed. Note, however, that
699 * upon return from synchronize_rcu(), the caller might well be executing 699 * upon return from synchronize_rcu(), the caller might well be executing
700 * concurrently with new RCU read-side critical sections that began while 700 * concurrently with new RCU read-side critical sections that began while
701 * synchronize_rcu() was waiting. RCU read-side critical sections are 701 * synchronize_rcu() was waiting. RCU read-side critical sections are
702 * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. 702 * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
703 * 703 *
704 * See the description of synchronize_sched() for more detailed information 704 * See the description of synchronize_sched() for more detailed information
705 * on memory ordering guarantees. 705 * on memory ordering guarantees.
706 */ 706 */
707 void synchronize_rcu(void) 707 void synchronize_rcu(void)
708 { 708 {
709 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && 709 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
710 !lock_is_held(&rcu_lock_map) && 710 !lock_is_held(&rcu_lock_map) &&
711 !lock_is_held(&rcu_sched_lock_map), 711 !lock_is_held(&rcu_sched_lock_map),
712 "Illegal synchronize_rcu() in RCU read-side critical section"); 712 "Illegal synchronize_rcu() in RCU read-side critical section");
713 if (!rcu_scheduler_active) 713 if (!rcu_scheduler_active)
714 return; 714 return;
715 if (rcu_expedited) 715 if (rcu_expedited)
716 synchronize_rcu_expedited(); 716 synchronize_rcu_expedited();
717 else 717 else
718 wait_rcu_gp(call_rcu); 718 wait_rcu_gp(call_rcu);
719 } 719 }
720 EXPORT_SYMBOL_GPL(synchronize_rcu); 720 EXPORT_SYMBOL_GPL(synchronize_rcu);
721 721
722 static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); 722 static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
723 static unsigned long sync_rcu_preempt_exp_count; 723 static unsigned long sync_rcu_preempt_exp_count;
724 static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); 724 static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
725 725
726 /* 726 /*
727 * Return non-zero if there are any tasks in RCU read-side critical 727 * Return non-zero if there are any tasks in RCU read-side critical
728 * sections blocking the current preemptible-RCU expedited grace period. 728 * sections blocking the current preemptible-RCU expedited grace period.
729 * If there is no preemptible-RCU expedited grace period currently in 729 * If there is no preemptible-RCU expedited grace period currently in
730 * progress, returns zero unconditionally. 730 * progress, returns zero unconditionally.
731 */ 731 */
732 static int rcu_preempted_readers_exp(struct rcu_node *rnp) 732 static int rcu_preempted_readers_exp(struct rcu_node *rnp)
733 { 733 {
734 return rnp->exp_tasks != NULL; 734 return rnp->exp_tasks != NULL;
735 } 735 }
736 736
737 /* 737 /*
738 * return non-zero if there is no RCU expedited grace period in progress 738 * return non-zero if there is no RCU expedited grace period in progress
739 * for the specified rcu_node structure, in other words, if all CPUs and 739 * for the specified rcu_node structure, in other words, if all CPUs and
740 * tasks covered by the specified rcu_node structure have done their bit 740 * tasks covered by the specified rcu_node structure have done their bit
741 * for the current expedited grace period. Works only for preemptible 741 * for the current expedited grace period. Works only for preemptible
742 * RCU -- other RCU implementation use other means. 742 * RCU -- other RCU implementation use other means.
743 * 743 *
744 * Caller must hold sync_rcu_preempt_exp_mutex. 744 * Caller must hold sync_rcu_preempt_exp_mutex.
745 */ 745 */
746 static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) 746 static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
747 { 747 {
748 return !rcu_preempted_readers_exp(rnp) && 748 return !rcu_preempted_readers_exp(rnp) &&
749 ACCESS_ONCE(rnp->expmask) == 0; 749 ACCESS_ONCE(rnp->expmask) == 0;
750 } 750 }
751 751
752 /* 752 /*
753 * Report the exit from RCU read-side critical section for the last task 753 * Report the exit from RCU read-side critical section for the last task
754 * that queued itself during or before the current expedited preemptible-RCU 754 * that queued itself during or before the current expedited preemptible-RCU
755 * grace period. This event is reported either to the rcu_node structure on 755 * grace period. This event is reported either to the rcu_node structure on
756 * which the task was queued or to one of that rcu_node structure's ancestors, 756 * which the task was queued or to one of that rcu_node structure's ancestors,
757 * recursively up the tree. (Calm down, calm down, we do the recursion 757 * recursively up the tree. (Calm down, calm down, we do the recursion
758 * iteratively!) 758 * iteratively!)
759 * 759 *
760 * Most callers will set the "wake" flag, but the task initiating the 760 * Most callers will set the "wake" flag, but the task initiating the
761 * expedited grace period need not wake itself. 761 * expedited grace period need not wake itself.
762 * 762 *
763 * Caller must hold sync_rcu_preempt_exp_mutex. 763 * Caller must hold sync_rcu_preempt_exp_mutex.
764 */ 764 */
765 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, 765 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
766 bool wake) 766 bool wake)
767 { 767 {
768 unsigned long flags; 768 unsigned long flags;
769 unsigned long mask; 769 unsigned long mask;
770 770
771 raw_spin_lock_irqsave(&rnp->lock, flags); 771 raw_spin_lock_irqsave(&rnp->lock, flags);
772 for (;;) { 772 for (;;) {
773 if (!sync_rcu_preempt_exp_done(rnp)) { 773 if (!sync_rcu_preempt_exp_done(rnp)) {
774 raw_spin_unlock_irqrestore(&rnp->lock, flags); 774 raw_spin_unlock_irqrestore(&rnp->lock, flags);
775 break; 775 break;
776 } 776 }
777 if (rnp->parent == NULL) { 777 if (rnp->parent == NULL) {
778 raw_spin_unlock_irqrestore(&rnp->lock, flags); 778 raw_spin_unlock_irqrestore(&rnp->lock, flags);
779 if (wake) 779 if (wake)
780 wake_up(&sync_rcu_preempt_exp_wq); 780 wake_up(&sync_rcu_preempt_exp_wq);
781 break; 781 break;
782 } 782 }
783 mask = rnp->grpmask; 783 mask = rnp->grpmask;
784 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 784 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
785 rnp = rnp->parent; 785 rnp = rnp->parent;
786 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 786 raw_spin_lock(&rnp->lock); /* irqs already disabled */
787 rnp->expmask &= ~mask; 787 rnp->expmask &= ~mask;
788 } 788 }
789 } 789 }
790 790
791 /* 791 /*
792 * Snapshot the tasks blocking the newly started preemptible-RCU expedited 792 * Snapshot the tasks blocking the newly started preemptible-RCU expedited
793 * grace period for the specified rcu_node structure. If there are no such 793 * grace period for the specified rcu_node structure. If there are no such
794 * tasks, report it up the rcu_node hierarchy. 794 * tasks, report it up the rcu_node hierarchy.
795 * 795 *
796 * Caller must hold sync_rcu_preempt_exp_mutex and must exclude 796 * Caller must hold sync_rcu_preempt_exp_mutex and must exclude
797 * CPU hotplug operations. 797 * CPU hotplug operations.
798 */ 798 */
799 static void 799 static void
800 sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) 800 sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
801 { 801 {
802 unsigned long flags; 802 unsigned long flags;
803 int must_wait = 0; 803 int must_wait = 0;
804 804
805 raw_spin_lock_irqsave(&rnp->lock, flags); 805 raw_spin_lock_irqsave(&rnp->lock, flags);
806 if (list_empty(&rnp->blkd_tasks)) { 806 if (list_empty(&rnp->blkd_tasks)) {
807 raw_spin_unlock_irqrestore(&rnp->lock, flags); 807 raw_spin_unlock_irqrestore(&rnp->lock, flags);
808 } else { 808 } else {
809 rnp->exp_tasks = rnp->blkd_tasks.next; 809 rnp->exp_tasks = rnp->blkd_tasks.next;
810 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ 810 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
811 must_wait = 1; 811 must_wait = 1;
812 } 812 }
813 if (!must_wait) 813 if (!must_wait)
814 rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */ 814 rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */
815 } 815 }
816 816
817 /** 817 /**
818 * synchronize_rcu_expedited - Brute-force RCU grace period 818 * synchronize_rcu_expedited - Brute-force RCU grace period
819 * 819 *
820 * Wait for an RCU-preempt grace period, but expedite it. The basic 820 * Wait for an RCU-preempt grace period, but expedite it. The basic
821 * idea is to invoke synchronize_sched_expedited() to push all the tasks to 821 * idea is to invoke synchronize_sched_expedited() to push all the tasks to
822 * the ->blkd_tasks lists and wait for this list to drain. This consumes 822 * the ->blkd_tasks lists and wait for this list to drain. This consumes
823 * significant time on all CPUs and is unfriendly to real-time workloads, 823 * significant time on all CPUs and is unfriendly to real-time workloads,
824 * so is thus not recommended for any sort of common-case code. 824 * so is thus not recommended for any sort of common-case code.
825 * In fact, if you are using synchronize_rcu_expedited() in a loop, 825 * In fact, if you are using synchronize_rcu_expedited() in a loop,
826 * please restructure your code to batch your updates, and then Use a 826 * please restructure your code to batch your updates, and then Use a
827 * single synchronize_rcu() instead. 827 * single synchronize_rcu() instead.
828 * 828 *
829 * Note that it is illegal to call this function while holding any lock 829 * Note that it is illegal to call this function while holding any lock
830 * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal 830 * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal
831 * to call this function from a CPU-hotplug notifier. Failing to observe 831 * to call this function from a CPU-hotplug notifier. Failing to observe
832 * these restriction will result in deadlock. 832 * these restriction will result in deadlock.
833 */ 833 */
834 void synchronize_rcu_expedited(void) 834 void synchronize_rcu_expedited(void)
835 { 835 {
836 unsigned long flags; 836 unsigned long flags;
837 struct rcu_node *rnp; 837 struct rcu_node *rnp;
838 struct rcu_state *rsp = &rcu_preempt_state; 838 struct rcu_state *rsp = &rcu_preempt_state;
839 unsigned long snap; 839 unsigned long snap;
840 int trycount = 0; 840 int trycount = 0;
841 841
842 smp_mb(); /* Caller's modifications seen first by other CPUs. */ 842 smp_mb(); /* Caller's modifications seen first by other CPUs. */
843 snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1; 843 snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
844 smp_mb(); /* Above access cannot bleed into critical section. */ 844 smp_mb(); /* Above access cannot bleed into critical section. */
845 845
846 /* 846 /*
847 * Block CPU-hotplug operations. This means that any CPU-hotplug 847 * Block CPU-hotplug operations. This means that any CPU-hotplug
848 * operation that finds an rcu_node structure with tasks in the 848 * operation that finds an rcu_node structure with tasks in the
849 * process of being boosted will know that all tasks blocking 849 * process of being boosted will know that all tasks blocking
850 * this expedited grace period will already be in the process of 850 * this expedited grace period will already be in the process of
851 * being boosted. This simplifies the process of moving tasks 851 * being boosted. This simplifies the process of moving tasks
852 * from leaf to root rcu_node structures. 852 * from leaf to root rcu_node structures.
853 */ 853 */
854 get_online_cpus(); 854 get_online_cpus();
855 855
856 /* 856 /*
857 * Acquire lock, falling back to synchronize_rcu() if too many 857 * Acquire lock, falling back to synchronize_rcu() if too many
858 * lock-acquisition failures. Of course, if someone does the 858 * lock-acquisition failures. Of course, if someone does the
859 * expedited grace period for us, just leave. 859 * expedited grace period for us, just leave.
860 */ 860 */
861 while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { 861 while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
862 if (ULONG_CMP_LT(snap, 862 if (ULONG_CMP_LT(snap,
863 ACCESS_ONCE(sync_rcu_preempt_exp_count))) { 863 ACCESS_ONCE(sync_rcu_preempt_exp_count))) {
864 put_online_cpus(); 864 put_online_cpus();
865 goto mb_ret; /* Others did our work for us. */ 865 goto mb_ret; /* Others did our work for us. */
866 } 866 }
867 if (trycount++ < 10) { 867 if (trycount++ < 10) {
868 udelay(trycount * num_online_cpus()); 868 udelay(trycount * num_online_cpus());
869 } else { 869 } else {
870 put_online_cpus(); 870 put_online_cpus();
871 wait_rcu_gp(call_rcu); 871 wait_rcu_gp(call_rcu);
872 return; 872 return;
873 } 873 }
874 } 874 }
875 if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) { 875 if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) {
876 put_online_cpus(); 876 put_online_cpus();
877 goto unlock_mb_ret; /* Others did our work for us. */ 877 goto unlock_mb_ret; /* Others did our work for us. */
878 } 878 }
879 879
880 /* force all RCU readers onto ->blkd_tasks lists. */ 880 /* force all RCU readers onto ->blkd_tasks lists. */
881 synchronize_sched_expedited(); 881 synchronize_sched_expedited();
882 882
883 /* Initialize ->expmask for all non-leaf rcu_node structures. */ 883 /* Initialize ->expmask for all non-leaf rcu_node structures. */
884 rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { 884 rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
885 raw_spin_lock_irqsave(&rnp->lock, flags); 885 raw_spin_lock_irqsave(&rnp->lock, flags);
886 rnp->expmask = rnp->qsmaskinit; 886 rnp->expmask = rnp->qsmaskinit;
887 raw_spin_unlock_irqrestore(&rnp->lock, flags); 887 raw_spin_unlock_irqrestore(&rnp->lock, flags);
888 } 888 }
889 889
890 /* Snapshot current state of ->blkd_tasks lists. */ 890 /* Snapshot current state of ->blkd_tasks lists. */
891 rcu_for_each_leaf_node(rsp, rnp) 891 rcu_for_each_leaf_node(rsp, rnp)
892 sync_rcu_preempt_exp_init(rsp, rnp); 892 sync_rcu_preempt_exp_init(rsp, rnp);
893 if (NUM_RCU_NODES > 1) 893 if (NUM_RCU_NODES > 1)
894 sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); 894 sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
895 895
896 put_online_cpus(); 896 put_online_cpus();
897 897
898 /* Wait for snapshotted ->blkd_tasks lists to drain. */ 898 /* Wait for snapshotted ->blkd_tasks lists to drain. */
899 rnp = rcu_get_root(rsp); 899 rnp = rcu_get_root(rsp);
900 wait_event(sync_rcu_preempt_exp_wq, 900 wait_event(sync_rcu_preempt_exp_wq,
901 sync_rcu_preempt_exp_done(rnp)); 901 sync_rcu_preempt_exp_done(rnp));
902 902
903 /* Clean up and exit. */ 903 /* Clean up and exit. */
904 smp_mb(); /* ensure expedited GP seen before counter increment. */ 904 smp_mb(); /* ensure expedited GP seen before counter increment. */
905 ACCESS_ONCE(sync_rcu_preempt_exp_count)++; 905 ACCESS_ONCE(sync_rcu_preempt_exp_count)++;
906 unlock_mb_ret: 906 unlock_mb_ret:
907 mutex_unlock(&sync_rcu_preempt_exp_mutex); 907 mutex_unlock(&sync_rcu_preempt_exp_mutex);
908 mb_ret: 908 mb_ret:
909 smp_mb(); /* ensure subsequent action seen after grace period. */ 909 smp_mb(); /* ensure subsequent action seen after grace period. */
910 } 910 }
911 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 911 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
912 912
913 /** 913 /**
914 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. 914 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
915 * 915 *
916 * Note that this primitive does not necessarily wait for an RCU grace period 916 * Note that this primitive does not necessarily wait for an RCU grace period
917 * to complete. For example, if there are no RCU callbacks queued anywhere 917 * to complete. For example, if there are no RCU callbacks queued anywhere
918 * in the system, then rcu_barrier() is within its rights to return 918 * in the system, then rcu_barrier() is within its rights to return
919 * immediately, without waiting for anything, much less an RCU grace period. 919 * immediately, without waiting for anything, much less an RCU grace period.
920 */ 920 */
921 void rcu_barrier(void) 921 void rcu_barrier(void)
922 { 922 {
923 _rcu_barrier(&rcu_preempt_state); 923 _rcu_barrier(&rcu_preempt_state);
924 } 924 }
925 EXPORT_SYMBOL_GPL(rcu_barrier); 925 EXPORT_SYMBOL_GPL(rcu_barrier);
926 926
927 /* 927 /*
928 * Initialize preemptible RCU's state structures. 928 * Initialize preemptible RCU's state structures.
929 */ 929 */
930 static void __init __rcu_init_preempt(void) 930 static void __init __rcu_init_preempt(void)
931 { 931 {
932 rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); 932 rcu_init_one(&rcu_preempt_state, &rcu_preempt_data);
933 } 933 }
934 934
935 /*
936 * Check for a task exiting while in a preemptible-RCU read-side
937 * critical section, clean up if so. No need to issue warnings,
938 * as debug_check_no_locks_held() already does this if lockdep
939 * is enabled.
940 */
941 void exit_rcu(void)
942 {
943 struct task_struct *t = current;
944
945 if (likely(list_empty(&current->rcu_node_entry)))
946 return;
947 t->rcu_read_lock_nesting = 1;
948 barrier();
949 t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED;
950 __rcu_read_unlock();
951 }
952
935 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 953 #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
936 954
937 static struct rcu_state *rcu_state = &rcu_sched_state; 955 static struct rcu_state *rcu_state = &rcu_sched_state;
938 956
939 /* 957 /*
940 * Tell them what RCU they are running. 958 * Tell them what RCU they are running.
941 */ 959 */
942 static void __init rcu_bootup_announce(void) 960 static void __init rcu_bootup_announce(void)
943 { 961 {
944 pr_info("Hierarchical RCU implementation.\n"); 962 pr_info("Hierarchical RCU implementation.\n");
945 rcu_bootup_announce_oddness(); 963 rcu_bootup_announce_oddness();
946 } 964 }
947 965
948 /* 966 /*
949 * Return the number of RCU batches processed thus far for debug & stats. 967 * Return the number of RCU batches processed thus far for debug & stats.
950 */ 968 */
951 long rcu_batches_completed(void) 969 long rcu_batches_completed(void)
952 { 970 {
953 return rcu_batches_completed_sched(); 971 return rcu_batches_completed_sched();
954 } 972 }
955 EXPORT_SYMBOL_GPL(rcu_batches_completed); 973 EXPORT_SYMBOL_GPL(rcu_batches_completed);
956 974
957 /* 975 /*
958 * Force a quiescent state for RCU, which, because there is no preemptible 976 * Force a quiescent state for RCU, which, because there is no preemptible
959 * RCU, becomes the same as rcu-sched. 977 * RCU, becomes the same as rcu-sched.
960 */ 978 */
961 void rcu_force_quiescent_state(void) 979 void rcu_force_quiescent_state(void)
962 { 980 {
963 rcu_sched_force_quiescent_state(); 981 rcu_sched_force_quiescent_state();
964 } 982 }
965 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 983 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
966 984
967 /* 985 /*
968 * Because preemptible RCU does not exist, we never have to check for 986 * Because preemptible RCU does not exist, we never have to check for
969 * CPUs being in quiescent states. 987 * CPUs being in quiescent states.
970 */ 988 */
971 static void rcu_preempt_note_context_switch(int cpu) 989 static void rcu_preempt_note_context_switch(int cpu)
972 { 990 {
973 } 991 }
974 992
975 /* 993 /*
976 * Because preemptible RCU does not exist, there are never any preempted 994 * Because preemptible RCU does not exist, there are never any preempted
977 * RCU readers. 995 * RCU readers.
978 */ 996 */
979 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) 997 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
980 { 998 {
981 return 0; 999 return 0;
982 } 1000 }
983 1001
984 #ifdef CONFIG_HOTPLUG_CPU 1002 #ifdef CONFIG_HOTPLUG_CPU
985 1003
986 /* Because preemptible RCU does not exist, no quieting of tasks. */ 1004 /* Because preemptible RCU does not exist, no quieting of tasks. */
987 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) 1005 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
988 { 1006 {
989 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1007 raw_spin_unlock_irqrestore(&rnp->lock, flags);
990 } 1008 }
991 1009
992 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 1010 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
993 1011
994 /* 1012 /*
995 * Because preemptible RCU does not exist, we never have to check for 1013 * Because preemptible RCU does not exist, we never have to check for
996 * tasks blocked within RCU read-side critical sections. 1014 * tasks blocked within RCU read-side critical sections.
997 */ 1015 */
998 static void rcu_print_detail_task_stall(struct rcu_state *rsp) 1016 static void rcu_print_detail_task_stall(struct rcu_state *rsp)
999 { 1017 {
1000 } 1018 }
1001 1019
1002 /* 1020 /*
1003 * Because preemptible RCU does not exist, we never have to check for 1021 * Because preemptible RCU does not exist, we never have to check for
1004 * tasks blocked within RCU read-side critical sections. 1022 * tasks blocked within RCU read-side critical sections.
1005 */ 1023 */
1006 static int rcu_print_task_stall(struct rcu_node *rnp) 1024 static int rcu_print_task_stall(struct rcu_node *rnp)
1007 { 1025 {
1008 return 0; 1026 return 0;
1009 } 1027 }
1010 1028
1011 /* 1029 /*
1012 * Because there is no preemptible RCU, there can be no readers blocked, 1030 * Because there is no preemptible RCU, there can be no readers blocked,
1013 * so there is no need to check for blocked tasks. So check only for 1031 * so there is no need to check for blocked tasks. So check only for
1014 * bogus qsmask values. 1032 * bogus qsmask values.
1015 */ 1033 */
1016 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 1034 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
1017 { 1035 {
1018 WARN_ON_ONCE(rnp->qsmask); 1036 WARN_ON_ONCE(rnp->qsmask);
1019 } 1037 }
1020 1038
1021 #ifdef CONFIG_HOTPLUG_CPU 1039 #ifdef CONFIG_HOTPLUG_CPU
1022 1040
1023 /* 1041 /*
1024 * Because preemptible RCU does not exist, it never needs to migrate 1042 * Because preemptible RCU does not exist, it never needs to migrate
1025 * tasks that were blocked within RCU read-side critical sections, and 1043 * tasks that were blocked within RCU read-side critical sections, and
1026 * such non-existent tasks cannot possibly have been blocking the current 1044 * such non-existent tasks cannot possibly have been blocking the current
1027 * grace period. 1045 * grace period.
1028 */ 1046 */
1029 static int rcu_preempt_offline_tasks(struct rcu_state *rsp, 1047 static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
1030 struct rcu_node *rnp, 1048 struct rcu_node *rnp,
1031 struct rcu_data *rdp) 1049 struct rcu_data *rdp)
1032 { 1050 {
1033 return 0; 1051 return 0;
1034 } 1052 }
1035 1053
1036 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 1054 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
1037 1055
1038 /* 1056 /*
1039 * Because preemptible RCU does not exist, it never has any callbacks 1057 * Because preemptible RCU does not exist, it never has any callbacks
1040 * to check. 1058 * to check.
1041 */ 1059 */
1042 static void rcu_preempt_check_callbacks(int cpu) 1060 static void rcu_preempt_check_callbacks(int cpu)
1043 { 1061 {
1044 } 1062 }
1045 1063
1046 /* 1064 /*
1047 * Queue an RCU callback for lazy invocation after a grace period. 1065 * Queue an RCU callback for lazy invocation after a grace period.
1048 * This will likely be later named something like "call_rcu_lazy()", 1066 * This will likely be later named something like "call_rcu_lazy()",
1049 * but this change will require some way of tagging the lazy RCU 1067 * but this change will require some way of tagging the lazy RCU
1050 * callbacks in the list of pending callbacks. Until then, this 1068 * callbacks in the list of pending callbacks. Until then, this
1051 * function may only be called from __kfree_rcu(). 1069 * function may only be called from __kfree_rcu().
1052 * 1070 *
1053 * Because there is no preemptible RCU, we use RCU-sched instead. 1071 * Because there is no preemptible RCU, we use RCU-sched instead.
1054 */ 1072 */
1055 void kfree_call_rcu(struct rcu_head *head, 1073 void kfree_call_rcu(struct rcu_head *head,
1056 void (*func)(struct rcu_head *rcu)) 1074 void (*func)(struct rcu_head *rcu))
1057 { 1075 {
1058 __call_rcu(head, func, &rcu_sched_state, -1, 1); 1076 __call_rcu(head, func, &rcu_sched_state, -1, 1);
1059 } 1077 }
1060 EXPORT_SYMBOL_GPL(kfree_call_rcu); 1078 EXPORT_SYMBOL_GPL(kfree_call_rcu);
1061 1079
1062 /* 1080 /*
1063 * Wait for an rcu-preempt grace period, but make it happen quickly. 1081 * Wait for an rcu-preempt grace period, but make it happen quickly.
1064 * But because preemptible RCU does not exist, map to rcu-sched. 1082 * But because preemptible RCU does not exist, map to rcu-sched.
1065 */ 1083 */
1066 void synchronize_rcu_expedited(void) 1084 void synchronize_rcu_expedited(void)
1067 { 1085 {
1068 synchronize_sched_expedited(); 1086 synchronize_sched_expedited();
1069 } 1087 }
1070 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 1088 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
1071 1089
1072 #ifdef CONFIG_HOTPLUG_CPU 1090 #ifdef CONFIG_HOTPLUG_CPU
1073 1091
1074 /* 1092 /*
1075 * Because preemptible RCU does not exist, there is never any need to 1093 * Because preemptible RCU does not exist, there is never any need to
1076 * report on tasks preempted in RCU read-side critical sections during 1094 * report on tasks preempted in RCU read-side critical sections during
1077 * expedited RCU grace periods. 1095 * expedited RCU grace periods.
1078 */ 1096 */
1079 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, 1097 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
1080 bool wake) 1098 bool wake)
1081 { 1099 {
1082 } 1100 }
1083 1101
1084 #endif /* #ifdef CONFIG_HOTPLUG_CPU */ 1102 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
1085 1103
1086 /* 1104 /*
1087 * Because preemptible RCU does not exist, rcu_barrier() is just 1105 * Because preemptible RCU does not exist, rcu_barrier() is just
1088 * another name for rcu_barrier_sched(). 1106 * another name for rcu_barrier_sched().
1089 */ 1107 */
1090 void rcu_barrier(void) 1108 void rcu_barrier(void)
1091 { 1109 {
1092 rcu_barrier_sched(); 1110 rcu_barrier_sched();
1093 } 1111 }
1094 EXPORT_SYMBOL_GPL(rcu_barrier); 1112 EXPORT_SYMBOL_GPL(rcu_barrier);
1095 1113
1096 /* 1114 /*
1097 * Because preemptible RCU does not exist, it need not be initialized. 1115 * Because preemptible RCU does not exist, it need not be initialized.
1098 */ 1116 */
1099 static void __init __rcu_init_preempt(void) 1117 static void __init __rcu_init_preempt(void)
1118 {
1119 }
1120
1121 /*
1122 * Because preemptible RCU does not exist, tasks cannot possibly exit
1123 * while in preemptible RCU read-side critical sections.
1124 */
1125 void exit_rcu(void)
1100 { 1126 {
1101 } 1127 }
1102 1128
1103 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ 1129 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
1104 1130
1105 #ifdef CONFIG_RCU_BOOST 1131 #ifdef CONFIG_RCU_BOOST
1106 1132
1107 #include "rtmutex_common.h" 1133 #include "rtmutex_common.h"
1108 1134
1109 #ifdef CONFIG_RCU_TRACE 1135 #ifdef CONFIG_RCU_TRACE
1110 1136
1111 static void rcu_initiate_boost_trace(struct rcu_node *rnp) 1137 static void rcu_initiate_boost_trace(struct rcu_node *rnp)
1112 { 1138 {
1113 if (list_empty(&rnp->blkd_tasks)) 1139 if (list_empty(&rnp->blkd_tasks))
1114 rnp->n_balk_blkd_tasks++; 1140 rnp->n_balk_blkd_tasks++;
1115 else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL) 1141 else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)
1116 rnp->n_balk_exp_gp_tasks++; 1142 rnp->n_balk_exp_gp_tasks++;
1117 else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL) 1143 else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL)
1118 rnp->n_balk_boost_tasks++; 1144 rnp->n_balk_boost_tasks++;
1119 else if (rnp->gp_tasks != NULL && rnp->qsmask != 0) 1145 else if (rnp->gp_tasks != NULL && rnp->qsmask != 0)
1120 rnp->n_balk_notblocked++; 1146 rnp->n_balk_notblocked++;
1121 else if (rnp->gp_tasks != NULL && 1147 else if (rnp->gp_tasks != NULL &&
1122 ULONG_CMP_LT(jiffies, rnp->boost_time)) 1148 ULONG_CMP_LT(jiffies, rnp->boost_time))
1123 rnp->n_balk_notyet++; 1149 rnp->n_balk_notyet++;
1124 else 1150 else
1125 rnp->n_balk_nos++; 1151 rnp->n_balk_nos++;
1126 } 1152 }
1127 1153
1128 #else /* #ifdef CONFIG_RCU_TRACE */ 1154 #else /* #ifdef CONFIG_RCU_TRACE */
1129 1155
1130 static void rcu_initiate_boost_trace(struct rcu_node *rnp) 1156 static void rcu_initiate_boost_trace(struct rcu_node *rnp)
1131 { 1157 {
1132 } 1158 }
1133 1159
1134 #endif /* #else #ifdef CONFIG_RCU_TRACE */ 1160 #endif /* #else #ifdef CONFIG_RCU_TRACE */
1135 1161
1136 static void rcu_wake_cond(struct task_struct *t, int status) 1162 static void rcu_wake_cond(struct task_struct *t, int status)
1137 { 1163 {
1138 /* 1164 /*
1139 * If the thread is yielding, only wake it when this 1165 * If the thread is yielding, only wake it when this
1140 * is invoked from idle 1166 * is invoked from idle
1141 */ 1167 */
1142 if (status != RCU_KTHREAD_YIELDING || is_idle_task(current)) 1168 if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
1143 wake_up_process(t); 1169 wake_up_process(t);
1144 } 1170 }
1145 1171
1146 /* 1172 /*
1147 * Carry out RCU priority boosting on the task indicated by ->exp_tasks 1173 * Carry out RCU priority boosting on the task indicated by ->exp_tasks
1148 * or ->boost_tasks, advancing the pointer to the next task in the 1174 * or ->boost_tasks, advancing the pointer to the next task in the
1149 * ->blkd_tasks list. 1175 * ->blkd_tasks list.
1150 * 1176 *
1151 * Note that irqs must be enabled: boosting the task can block. 1177 * Note that irqs must be enabled: boosting the task can block.
1152 * Returns 1 if there are more tasks needing to be boosted. 1178 * Returns 1 if there are more tasks needing to be boosted.
1153 */ 1179 */
1154 static int rcu_boost(struct rcu_node *rnp) 1180 static int rcu_boost(struct rcu_node *rnp)
1155 { 1181 {
1156 unsigned long flags; 1182 unsigned long flags;
1157 struct rt_mutex mtx; 1183 struct rt_mutex mtx;
1158 struct task_struct *t; 1184 struct task_struct *t;
1159 struct list_head *tb; 1185 struct list_head *tb;
1160 1186
1161 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) 1187 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL)
1162 return 0; /* Nothing left to boost. */ 1188 return 0; /* Nothing left to boost. */
1163 1189
1164 raw_spin_lock_irqsave(&rnp->lock, flags); 1190 raw_spin_lock_irqsave(&rnp->lock, flags);
1165 1191
1166 /* 1192 /*
1167 * Recheck under the lock: all tasks in need of boosting 1193 * Recheck under the lock: all tasks in need of boosting
1168 * might exit their RCU read-side critical sections on their own. 1194 * might exit their RCU read-side critical sections on their own.
1169 */ 1195 */
1170 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) { 1196 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
1171 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1197 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1172 return 0; 1198 return 0;
1173 } 1199 }
1174 1200
1175 /* 1201 /*
1176 * Preferentially boost tasks blocking expedited grace periods. 1202 * Preferentially boost tasks blocking expedited grace periods.
1177 * This cannot starve the normal grace periods because a second 1203 * This cannot starve the normal grace periods because a second
1178 * expedited grace period must boost all blocked tasks, including 1204 * expedited grace period must boost all blocked tasks, including
1179 * those blocking the pre-existing normal grace period. 1205 * those blocking the pre-existing normal grace period.
1180 */ 1206 */
1181 if (rnp->exp_tasks != NULL) { 1207 if (rnp->exp_tasks != NULL) {
1182 tb = rnp->exp_tasks; 1208 tb = rnp->exp_tasks;
1183 rnp->n_exp_boosts++; 1209 rnp->n_exp_boosts++;
1184 } else { 1210 } else {
1185 tb = rnp->boost_tasks; 1211 tb = rnp->boost_tasks;
1186 rnp->n_normal_boosts++; 1212 rnp->n_normal_boosts++;
1187 } 1213 }
1188 rnp->n_tasks_boosted++; 1214 rnp->n_tasks_boosted++;
1189 1215
1190 /* 1216 /*
1191 * We boost task t by manufacturing an rt_mutex that appears to 1217 * We boost task t by manufacturing an rt_mutex that appears to
1192 * be held by task t. We leave a pointer to that rt_mutex where 1218 * be held by task t. We leave a pointer to that rt_mutex where
1193 * task t can find it, and task t will release the mutex when it 1219 * task t can find it, and task t will release the mutex when it
1194 * exits its outermost RCU read-side critical section. Then 1220 * exits its outermost RCU read-side critical section. Then
1195 * simply acquiring this artificial rt_mutex will boost task 1221 * simply acquiring this artificial rt_mutex will boost task
1196 * t's priority. (Thanks to tglx for suggesting this approach!) 1222 * t's priority. (Thanks to tglx for suggesting this approach!)
1197 * 1223 *
1198 * Note that task t must acquire rnp->lock to remove itself from 1224 * Note that task t must acquire rnp->lock to remove itself from
1199 * the ->blkd_tasks list, which it will do from exit() if from 1225 * the ->blkd_tasks list, which it will do from exit() if from
1200 * nowhere else. We therefore are guaranteed that task t will 1226 * nowhere else. We therefore are guaranteed that task t will
1201 * stay around at least until we drop rnp->lock. Note that 1227 * stay around at least until we drop rnp->lock. Note that
1202 * rnp->lock also resolves races between our priority boosting 1228 * rnp->lock also resolves races between our priority boosting
1203 * and task t's exiting its outermost RCU read-side critical 1229 * and task t's exiting its outermost RCU read-side critical
1204 * section. 1230 * section.
1205 */ 1231 */
1206 t = container_of(tb, struct task_struct, rcu_node_entry); 1232 t = container_of(tb, struct task_struct, rcu_node_entry);
1207 rt_mutex_init_proxy_locked(&mtx, t); 1233 rt_mutex_init_proxy_locked(&mtx, t);
1208 t->rcu_boost_mutex = &mtx; 1234 t->rcu_boost_mutex = &mtx;
1209 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1235 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1210 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ 1236 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
1211 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ 1237 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
1212 1238
1213 return ACCESS_ONCE(rnp->exp_tasks) != NULL || 1239 return ACCESS_ONCE(rnp->exp_tasks) != NULL ||
1214 ACCESS_ONCE(rnp->boost_tasks) != NULL; 1240 ACCESS_ONCE(rnp->boost_tasks) != NULL;
1215 } 1241 }
1216 1242
1217 /* 1243 /*
1218 * Priority-boosting kthread. One per leaf rcu_node and one for the 1244 * Priority-boosting kthread. One per leaf rcu_node and one for the
1219 * root rcu_node. 1245 * root rcu_node.
1220 */ 1246 */
1221 static int rcu_boost_kthread(void *arg) 1247 static int rcu_boost_kthread(void *arg)
1222 { 1248 {
1223 struct rcu_node *rnp = (struct rcu_node *)arg; 1249 struct rcu_node *rnp = (struct rcu_node *)arg;
1224 int spincnt = 0; 1250 int spincnt = 0;
1225 int more2boost; 1251 int more2boost;
1226 1252
1227 trace_rcu_utilization("Start boost kthread@init"); 1253 trace_rcu_utilization("Start boost kthread@init");
1228 for (;;) { 1254 for (;;) {
1229 rnp->boost_kthread_status = RCU_KTHREAD_WAITING; 1255 rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
1230 trace_rcu_utilization("End boost kthread@rcu_wait"); 1256 trace_rcu_utilization("End boost kthread@rcu_wait");
1231 rcu_wait(rnp->boost_tasks || rnp->exp_tasks); 1257 rcu_wait(rnp->boost_tasks || rnp->exp_tasks);
1232 trace_rcu_utilization("Start boost kthread@rcu_wait"); 1258 trace_rcu_utilization("Start boost kthread@rcu_wait");
1233 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; 1259 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
1234 more2boost = rcu_boost(rnp); 1260 more2boost = rcu_boost(rnp);
1235 if (more2boost) 1261 if (more2boost)
1236 spincnt++; 1262 spincnt++;
1237 else 1263 else
1238 spincnt = 0; 1264 spincnt = 0;
1239 if (spincnt > 10) { 1265 if (spincnt > 10) {
1240 rnp->boost_kthread_status = RCU_KTHREAD_YIELDING; 1266 rnp->boost_kthread_status = RCU_KTHREAD_YIELDING;
1241 trace_rcu_utilization("End boost kthread@rcu_yield"); 1267 trace_rcu_utilization("End boost kthread@rcu_yield");
1242 schedule_timeout_interruptible(2); 1268 schedule_timeout_interruptible(2);
1243 trace_rcu_utilization("Start boost kthread@rcu_yield"); 1269 trace_rcu_utilization("Start boost kthread@rcu_yield");
1244 spincnt = 0; 1270 spincnt = 0;
1245 } 1271 }
1246 } 1272 }
1247 /* NOTREACHED */ 1273 /* NOTREACHED */
1248 trace_rcu_utilization("End boost kthread@notreached"); 1274 trace_rcu_utilization("End boost kthread@notreached");
1249 return 0; 1275 return 0;
1250 } 1276 }
1251 1277
1252 /* 1278 /*
1253 * Check to see if it is time to start boosting RCU readers that are 1279 * Check to see if it is time to start boosting RCU readers that are
1254 * blocking the current grace period, and, if so, tell the per-rcu_node 1280 * blocking the current grace period, and, if so, tell the per-rcu_node
1255 * kthread to start boosting them. If there is an expedited grace 1281 * kthread to start boosting them. If there is an expedited grace
1256 * period in progress, it is always time to boost. 1282 * period in progress, it is always time to boost.
1257 * 1283 *
1258 * The caller must hold rnp->lock, which this function releases. 1284 * The caller must hold rnp->lock, which this function releases.
1259 * The ->boost_kthread_task is immortal, so we don't need to worry 1285 * The ->boost_kthread_task is immortal, so we don't need to worry
1260 * about it going away. 1286 * about it going away.
1261 */ 1287 */
1262 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) 1288 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1263 { 1289 {
1264 struct task_struct *t; 1290 struct task_struct *t;
1265 1291
1266 if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) { 1292 if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
1267 rnp->n_balk_exp_gp_tasks++; 1293 rnp->n_balk_exp_gp_tasks++;
1268 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1294 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1269 return; 1295 return;
1270 } 1296 }
1271 if (rnp->exp_tasks != NULL || 1297 if (rnp->exp_tasks != NULL ||
1272 (rnp->gp_tasks != NULL && 1298 (rnp->gp_tasks != NULL &&
1273 rnp->boost_tasks == NULL && 1299 rnp->boost_tasks == NULL &&
1274 rnp->qsmask == 0 && 1300 rnp->qsmask == 0 &&
1275 ULONG_CMP_GE(jiffies, rnp->boost_time))) { 1301 ULONG_CMP_GE(jiffies, rnp->boost_time))) {
1276 if (rnp->exp_tasks == NULL) 1302 if (rnp->exp_tasks == NULL)
1277 rnp->boost_tasks = rnp->gp_tasks; 1303 rnp->boost_tasks = rnp->gp_tasks;
1278 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1304 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1279 t = rnp->boost_kthread_task; 1305 t = rnp->boost_kthread_task;
1280 if (t) 1306 if (t)
1281 rcu_wake_cond(t, rnp->boost_kthread_status); 1307 rcu_wake_cond(t, rnp->boost_kthread_status);
1282 } else { 1308 } else {
1283 rcu_initiate_boost_trace(rnp); 1309 rcu_initiate_boost_trace(rnp);
1284 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1310 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1285 } 1311 }
1286 } 1312 }
1287 1313
1288 /* 1314 /*
1289 * Wake up the per-CPU kthread to invoke RCU callbacks. 1315 * Wake up the per-CPU kthread to invoke RCU callbacks.
1290 */ 1316 */
1291 static void invoke_rcu_callbacks_kthread(void) 1317 static void invoke_rcu_callbacks_kthread(void)
1292 { 1318 {
1293 unsigned long flags; 1319 unsigned long flags;
1294 1320
1295 local_irq_save(flags); 1321 local_irq_save(flags);
1296 __this_cpu_write(rcu_cpu_has_work, 1); 1322 __this_cpu_write(rcu_cpu_has_work, 1);
1297 if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && 1323 if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
1298 current != __this_cpu_read(rcu_cpu_kthread_task)) { 1324 current != __this_cpu_read(rcu_cpu_kthread_task)) {
1299 rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task), 1325 rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
1300 __this_cpu_read(rcu_cpu_kthread_status)); 1326 __this_cpu_read(rcu_cpu_kthread_status));
1301 } 1327 }
1302 local_irq_restore(flags); 1328 local_irq_restore(flags);
1303 } 1329 }
1304 1330
1305 /* 1331 /*
1306 * Is the current CPU running the RCU-callbacks kthread? 1332 * Is the current CPU running the RCU-callbacks kthread?
1307 * Caller must have preemption disabled. 1333 * Caller must have preemption disabled.
1308 */ 1334 */
1309 static bool rcu_is_callbacks_kthread(void) 1335 static bool rcu_is_callbacks_kthread(void)
1310 { 1336 {
1311 return __get_cpu_var(rcu_cpu_kthread_task) == current; 1337 return __get_cpu_var(rcu_cpu_kthread_task) == current;
1312 } 1338 }
1313 1339
1314 #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000) 1340 #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
1315 1341
1316 /* 1342 /*
1317 * Do priority-boost accounting for the start of a new grace period. 1343 * Do priority-boost accounting for the start of a new grace period.
1318 */ 1344 */
1319 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) 1345 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1320 { 1346 {
1321 rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES; 1347 rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
1322 } 1348 }
1323 1349
1324 /* 1350 /*
1325 * Create an RCU-boost kthread for the specified node if one does not 1351 * Create an RCU-boost kthread for the specified node if one does not
1326 * already exist. We only create this kthread for preemptible RCU. 1352 * already exist. We only create this kthread for preemptible RCU.
1327 * Returns zero if all is well, a negated errno otherwise. 1353 * Returns zero if all is well, a negated errno otherwise.
1328 */ 1354 */
1329 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, 1355 static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1330 struct rcu_node *rnp) 1356 struct rcu_node *rnp)
1331 { 1357 {
1332 int rnp_index = rnp - &rsp->node[0]; 1358 int rnp_index = rnp - &rsp->node[0];
1333 unsigned long flags; 1359 unsigned long flags;
1334 struct sched_param sp; 1360 struct sched_param sp;
1335 struct task_struct *t; 1361 struct task_struct *t;
1336 1362
1337 if (&rcu_preempt_state != rsp) 1363 if (&rcu_preempt_state != rsp)
1338 return 0; 1364 return 0;
1339 1365
1340 if (!rcu_scheduler_fully_active || rnp->qsmaskinit == 0) 1366 if (!rcu_scheduler_fully_active || rnp->qsmaskinit == 0)
1341 return 0; 1367 return 0;
1342 1368
1343 rsp->boost = 1; 1369 rsp->boost = 1;
1344 if (rnp->boost_kthread_task != NULL) 1370 if (rnp->boost_kthread_task != NULL)
1345 return 0; 1371 return 0;
1346 t = kthread_create(rcu_boost_kthread, (void *)rnp, 1372 t = kthread_create(rcu_boost_kthread, (void *)rnp,
1347 "rcub/%d", rnp_index); 1373 "rcub/%d", rnp_index);
1348 if (IS_ERR(t)) 1374 if (IS_ERR(t))
1349 return PTR_ERR(t); 1375 return PTR_ERR(t);
1350 raw_spin_lock_irqsave(&rnp->lock, flags); 1376 raw_spin_lock_irqsave(&rnp->lock, flags);
1351 rnp->boost_kthread_task = t; 1377 rnp->boost_kthread_task = t;
1352 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1378 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1353 sp.sched_priority = RCU_BOOST_PRIO; 1379 sp.sched_priority = RCU_BOOST_PRIO;
1354 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1380 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1355 wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ 1381 wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
1356 return 0; 1382 return 0;
1357 } 1383 }
1358 1384
1359 static void rcu_kthread_do_work(void) 1385 static void rcu_kthread_do_work(void)
1360 { 1386 {
1361 rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); 1387 rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data));
1362 rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); 1388 rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
1363 rcu_preempt_do_callbacks(); 1389 rcu_preempt_do_callbacks();
1364 } 1390 }
1365 1391
1366 static void rcu_cpu_kthread_setup(unsigned int cpu) 1392 static void rcu_cpu_kthread_setup(unsigned int cpu)
1367 { 1393 {
1368 struct sched_param sp; 1394 struct sched_param sp;
1369 1395
1370 sp.sched_priority = RCU_KTHREAD_PRIO; 1396 sp.sched_priority = RCU_KTHREAD_PRIO;
1371 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1397 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1372 } 1398 }
1373 1399
1374 static void rcu_cpu_kthread_park(unsigned int cpu) 1400 static void rcu_cpu_kthread_park(unsigned int cpu)
1375 { 1401 {
1376 per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; 1402 per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
1377 } 1403 }
1378 1404
1379 static int rcu_cpu_kthread_should_run(unsigned int cpu) 1405 static int rcu_cpu_kthread_should_run(unsigned int cpu)
1380 { 1406 {
1381 return __get_cpu_var(rcu_cpu_has_work); 1407 return __get_cpu_var(rcu_cpu_has_work);
1382 } 1408 }
1383 1409
1384 /* 1410 /*
1385 * Per-CPU kernel thread that invokes RCU callbacks. This replaces the 1411 * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
1386 * RCU softirq used in flavors and configurations of RCU that do not 1412 * RCU softirq used in flavors and configurations of RCU that do not
1387 * support RCU priority boosting. 1413 * support RCU priority boosting.
1388 */ 1414 */
1389 static void rcu_cpu_kthread(unsigned int cpu) 1415 static void rcu_cpu_kthread(unsigned int cpu)
1390 { 1416 {
1391 unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status); 1417 unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status);
1392 char work, *workp = &__get_cpu_var(rcu_cpu_has_work); 1418 char work, *workp = &__get_cpu_var(rcu_cpu_has_work);
1393 int spincnt; 1419 int spincnt;
1394 1420
1395 for (spincnt = 0; spincnt < 10; spincnt++) { 1421 for (spincnt = 0; spincnt < 10; spincnt++) {
1396 trace_rcu_utilization("Start CPU kthread@rcu_wait"); 1422 trace_rcu_utilization("Start CPU kthread@rcu_wait");
1397 local_bh_disable(); 1423 local_bh_disable();
1398 *statusp = RCU_KTHREAD_RUNNING; 1424 *statusp = RCU_KTHREAD_RUNNING;
1399 this_cpu_inc(rcu_cpu_kthread_loops); 1425 this_cpu_inc(rcu_cpu_kthread_loops);
1400 local_irq_disable(); 1426 local_irq_disable();
1401 work = *workp; 1427 work = *workp;
1402 *workp = 0; 1428 *workp = 0;
1403 local_irq_enable(); 1429 local_irq_enable();
1404 if (work) 1430 if (work)
1405 rcu_kthread_do_work(); 1431 rcu_kthread_do_work();
1406 local_bh_enable(); 1432 local_bh_enable();
1407 if (*workp == 0) { 1433 if (*workp == 0) {
1408 trace_rcu_utilization("End CPU kthread@rcu_wait"); 1434 trace_rcu_utilization("End CPU kthread@rcu_wait");
1409 *statusp = RCU_KTHREAD_WAITING; 1435 *statusp = RCU_KTHREAD_WAITING;
1410 return; 1436 return;
1411 } 1437 }
1412 } 1438 }
1413 *statusp = RCU_KTHREAD_YIELDING; 1439 *statusp = RCU_KTHREAD_YIELDING;
1414 trace_rcu_utilization("Start CPU kthread@rcu_yield"); 1440 trace_rcu_utilization("Start CPU kthread@rcu_yield");
1415 schedule_timeout_interruptible(2); 1441 schedule_timeout_interruptible(2);
1416 trace_rcu_utilization("End CPU kthread@rcu_yield"); 1442 trace_rcu_utilization("End CPU kthread@rcu_yield");
1417 *statusp = RCU_KTHREAD_WAITING; 1443 *statusp = RCU_KTHREAD_WAITING;
1418 } 1444 }
1419 1445
1420 /* 1446 /*
1421 * Set the per-rcu_node kthread's affinity to cover all CPUs that are 1447 * Set the per-rcu_node kthread's affinity to cover all CPUs that are
1422 * served by the rcu_node in question. The CPU hotplug lock is still 1448 * served by the rcu_node in question. The CPU hotplug lock is still
1423 * held, so the value of rnp->qsmaskinit will be stable. 1449 * held, so the value of rnp->qsmaskinit will be stable.
1424 * 1450 *
1425 * We don't include outgoingcpu in the affinity set, use -1 if there is 1451 * We don't include outgoingcpu in the affinity set, use -1 if there is
1426 * no outgoing CPU. If there are no CPUs left in the affinity set, 1452 * no outgoing CPU. If there are no CPUs left in the affinity set,
1427 * this function allows the kthread to execute on any CPU. 1453 * this function allows the kthread to execute on any CPU.
1428 */ 1454 */
1429 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) 1455 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1430 { 1456 {
1431 struct task_struct *t = rnp->boost_kthread_task; 1457 struct task_struct *t = rnp->boost_kthread_task;
1432 unsigned long mask = rnp->qsmaskinit; 1458 unsigned long mask = rnp->qsmaskinit;
1433 cpumask_var_t cm; 1459 cpumask_var_t cm;
1434 int cpu; 1460 int cpu;
1435 1461
1436 if (!t) 1462 if (!t)
1437 return; 1463 return;
1438 if (!zalloc_cpumask_var(&cm, GFP_KERNEL)) 1464 if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
1439 return; 1465 return;
1440 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) 1466 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
1441 if ((mask & 0x1) && cpu != outgoingcpu) 1467 if ((mask & 0x1) && cpu != outgoingcpu)
1442 cpumask_set_cpu(cpu, cm); 1468 cpumask_set_cpu(cpu, cm);
1443 if (cpumask_weight(cm) == 0) { 1469 if (cpumask_weight(cm) == 0) {
1444 cpumask_setall(cm); 1470 cpumask_setall(cm);
1445 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) 1471 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++)
1446 cpumask_clear_cpu(cpu, cm); 1472 cpumask_clear_cpu(cpu, cm);
1447 WARN_ON_ONCE(cpumask_weight(cm) == 0); 1473 WARN_ON_ONCE(cpumask_weight(cm) == 0);
1448 } 1474 }
1449 set_cpus_allowed_ptr(t, cm); 1475 set_cpus_allowed_ptr(t, cm);
1450 free_cpumask_var(cm); 1476 free_cpumask_var(cm);
1451 } 1477 }
1452 1478
1453 static struct smp_hotplug_thread rcu_cpu_thread_spec = { 1479 static struct smp_hotplug_thread rcu_cpu_thread_spec = {
1454 .store = &rcu_cpu_kthread_task, 1480 .store = &rcu_cpu_kthread_task,
1455 .thread_should_run = rcu_cpu_kthread_should_run, 1481 .thread_should_run = rcu_cpu_kthread_should_run,
1456 .thread_fn = rcu_cpu_kthread, 1482 .thread_fn = rcu_cpu_kthread,
1457 .thread_comm = "rcuc/%u", 1483 .thread_comm = "rcuc/%u",
1458 .setup = rcu_cpu_kthread_setup, 1484 .setup = rcu_cpu_kthread_setup,
1459 .park = rcu_cpu_kthread_park, 1485 .park = rcu_cpu_kthread_park,
1460 }; 1486 };
1461 1487
1462 /* 1488 /*
1463 * Spawn all kthreads -- called as soon as the scheduler is running. 1489 * Spawn all kthreads -- called as soon as the scheduler is running.
1464 */ 1490 */
1465 static int __init rcu_spawn_kthreads(void) 1491 static int __init rcu_spawn_kthreads(void)
1466 { 1492 {
1467 struct rcu_node *rnp; 1493 struct rcu_node *rnp;
1468 int cpu; 1494 int cpu;
1469 1495
1470 rcu_scheduler_fully_active = 1; 1496 rcu_scheduler_fully_active = 1;
1471 for_each_possible_cpu(cpu) 1497 for_each_possible_cpu(cpu)
1472 per_cpu(rcu_cpu_has_work, cpu) = 0; 1498 per_cpu(rcu_cpu_has_work, cpu) = 0;
1473 BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); 1499 BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
1474 rnp = rcu_get_root(rcu_state); 1500 rnp = rcu_get_root(rcu_state);
1475 (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); 1501 (void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
1476 if (NUM_RCU_NODES > 1) { 1502 if (NUM_RCU_NODES > 1) {
1477 rcu_for_each_leaf_node(rcu_state, rnp) 1503 rcu_for_each_leaf_node(rcu_state, rnp)
1478 (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); 1504 (void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
1479 } 1505 }
1480 return 0; 1506 return 0;
1481 } 1507 }
1482 early_initcall(rcu_spawn_kthreads); 1508 early_initcall(rcu_spawn_kthreads);
1483 1509
1484 static void __cpuinit rcu_prepare_kthreads(int cpu) 1510 static void __cpuinit rcu_prepare_kthreads(int cpu)
1485 { 1511 {
1486 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 1512 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
1487 struct rcu_node *rnp = rdp->mynode; 1513 struct rcu_node *rnp = rdp->mynode;
1488 1514
1489 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ 1515 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
1490 if (rcu_scheduler_fully_active) 1516 if (rcu_scheduler_fully_active)
1491 (void)rcu_spawn_one_boost_kthread(rcu_state, rnp); 1517 (void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
1492 } 1518 }
1493 1519
1494 #else /* #ifdef CONFIG_RCU_BOOST */ 1520 #else /* #ifdef CONFIG_RCU_BOOST */
1495 1521
1496 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) 1522 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1497 { 1523 {
1498 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1524 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1499 } 1525 }
1500 1526
1501 static void invoke_rcu_callbacks_kthread(void) 1527 static void invoke_rcu_callbacks_kthread(void)
1502 { 1528 {
1503 WARN_ON_ONCE(1); 1529 WARN_ON_ONCE(1);
1504 } 1530 }
1505 1531
1506 static bool rcu_is_callbacks_kthread(void) 1532 static bool rcu_is_callbacks_kthread(void)
1507 { 1533 {
1508 return false; 1534 return false;
1509 } 1535 }
1510 1536
1511 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) 1537 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1512 { 1538 {
1513 } 1539 }
1514 1540
1515 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) 1541 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1516 { 1542 {
1517 } 1543 }
1518 1544
1519 static int __init rcu_scheduler_really_started(void) 1545 static int __init rcu_scheduler_really_started(void)
1520 { 1546 {
1521 rcu_scheduler_fully_active = 1; 1547 rcu_scheduler_fully_active = 1;
1522 return 0; 1548 return 0;
1523 } 1549 }
1524 early_initcall(rcu_scheduler_really_started); 1550 early_initcall(rcu_scheduler_really_started);
1525 1551
1526 static void __cpuinit rcu_prepare_kthreads(int cpu) 1552 static void __cpuinit rcu_prepare_kthreads(int cpu)
1527 { 1553 {
1528 } 1554 }
1529 1555
1530 #endif /* #else #ifdef CONFIG_RCU_BOOST */ 1556 #endif /* #else #ifdef CONFIG_RCU_BOOST */
1531 1557
1532 #if !defined(CONFIG_RCU_FAST_NO_HZ) 1558 #if !defined(CONFIG_RCU_FAST_NO_HZ)
1533 1559
1534 /* 1560 /*
1535 * Check to see if any future RCU-related work will need to be done 1561 * Check to see if any future RCU-related work will need to be done
1536 * by the current CPU, even if none need be done immediately, returning 1562 * by the current CPU, even if none need be done immediately, returning
1537 * 1 if so. This function is part of the RCU implementation; it is -not- 1563 * 1 if so. This function is part of the RCU implementation; it is -not-
1538 * an exported member of the RCU API. 1564 * an exported member of the RCU API.
1539 * 1565 *
1540 * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs 1566 * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
1541 * any flavor of RCU. 1567 * any flavor of RCU.
1542 */ 1568 */
1543 int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) 1569 int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
1544 { 1570 {
1545 *delta_jiffies = ULONG_MAX; 1571 *delta_jiffies = ULONG_MAX;
1546 return rcu_cpu_has_callbacks(cpu, NULL); 1572 return rcu_cpu_has_callbacks(cpu, NULL);
1547 } 1573 }
1548 1574
1549 /* 1575 /*
1550 * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up 1576 * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
1551 * after it. 1577 * after it.
1552 */ 1578 */
1553 static void rcu_cleanup_after_idle(int cpu) 1579 static void rcu_cleanup_after_idle(int cpu)
1554 { 1580 {
1555 } 1581 }
1556 1582
1557 /* 1583 /*
1558 * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n, 1584 * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n,
1559 * is nothing. 1585 * is nothing.
1560 */ 1586 */
1561 static void rcu_prepare_for_idle(int cpu) 1587 static void rcu_prepare_for_idle(int cpu)
1562 { 1588 {
1563 } 1589 }
1564 1590
1565 /* 1591 /*
1566 * Don't bother keeping a running count of the number of RCU callbacks 1592 * Don't bother keeping a running count of the number of RCU callbacks
1567 * posted because CONFIG_RCU_FAST_NO_HZ=n. 1593 * posted because CONFIG_RCU_FAST_NO_HZ=n.
1568 */ 1594 */
1569 static void rcu_idle_count_callbacks_posted(void) 1595 static void rcu_idle_count_callbacks_posted(void)
1570 { 1596 {
1571 } 1597 }
1572 1598
1573 #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 1599 #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
1574 1600
1575 /* 1601 /*
1576 * This code is invoked when a CPU goes idle, at which point we want 1602 * This code is invoked when a CPU goes idle, at which point we want
1577 * to have the CPU do everything required for RCU so that it can enter 1603 * to have the CPU do everything required for RCU so that it can enter
1578 * the energy-efficient dyntick-idle mode. This is handled by a 1604 * the energy-efficient dyntick-idle mode. This is handled by a
1579 * state machine implemented by rcu_prepare_for_idle() below. 1605 * state machine implemented by rcu_prepare_for_idle() below.
1580 * 1606 *
1581 * The following three proprocessor symbols control this state machine: 1607 * The following three proprocessor symbols control this state machine:
1582 * 1608 *
1583 * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted 1609 * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
1584 * to sleep in dyntick-idle mode with RCU callbacks pending. This 1610 * to sleep in dyntick-idle mode with RCU callbacks pending. This
1585 * is sized to be roughly one RCU grace period. Those energy-efficiency 1611 * is sized to be roughly one RCU grace period. Those energy-efficiency
1586 * benchmarkers who might otherwise be tempted to set this to a large 1612 * benchmarkers who might otherwise be tempted to set this to a large
1587 * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your 1613 * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
1588 * system. And if you are -that- concerned about energy efficiency, 1614 * system. And if you are -that- concerned about energy efficiency,
1589 * just power the system down and be done with it! 1615 * just power the system down and be done with it!
1590 * RCU_IDLE_LAZY_GP_DELAY gives the number of jiffies that a CPU is 1616 * RCU_IDLE_LAZY_GP_DELAY gives the number of jiffies that a CPU is
1591 * permitted to sleep in dyntick-idle mode with only lazy RCU 1617 * permitted to sleep in dyntick-idle mode with only lazy RCU
1592 * callbacks pending. Setting this too high can OOM your system. 1618 * callbacks pending. Setting this too high can OOM your system.
1593 * 1619 *
1594 * The values below work well in practice. If future workloads require 1620 * The values below work well in practice. If future workloads require
1595 * adjustment, they can be converted into kernel config parameters, though 1621 * adjustment, they can be converted into kernel config parameters, though
1596 * making the state machine smarter might be a better option. 1622 * making the state machine smarter might be a better option.
1597 */ 1623 */
1598 #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ 1624 #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */
1599 #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ 1625 #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */
1600 1626
1601 static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY; 1627 static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
1602 module_param(rcu_idle_gp_delay, int, 0644); 1628 module_param(rcu_idle_gp_delay, int, 0644);
1603 static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; 1629 static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;
1604 module_param(rcu_idle_lazy_gp_delay, int, 0644); 1630 module_param(rcu_idle_lazy_gp_delay, int, 0644);
1605 1631
1606 extern int tick_nohz_enabled; 1632 extern int tick_nohz_enabled;
1607 1633
1608 /* 1634 /*
1609 * Try to advance callbacks for all flavors of RCU on the current CPU. 1635 * Try to advance callbacks for all flavors of RCU on the current CPU.
1610 * Afterwards, if there are any callbacks ready for immediate invocation, 1636 * Afterwards, if there are any callbacks ready for immediate invocation,
1611 * return true. 1637 * return true.
1612 */ 1638 */
1613 static bool rcu_try_advance_all_cbs(void) 1639 static bool rcu_try_advance_all_cbs(void)
1614 { 1640 {
1615 bool cbs_ready = false; 1641 bool cbs_ready = false;
1616 struct rcu_data *rdp; 1642 struct rcu_data *rdp;
1617 struct rcu_node *rnp; 1643 struct rcu_node *rnp;
1618 struct rcu_state *rsp; 1644 struct rcu_state *rsp;
1619 1645
1620 for_each_rcu_flavor(rsp) { 1646 for_each_rcu_flavor(rsp) {
1621 rdp = this_cpu_ptr(rsp->rda); 1647 rdp = this_cpu_ptr(rsp->rda);
1622 rnp = rdp->mynode; 1648 rnp = rdp->mynode;
1623 1649
1624 /* 1650 /*
1625 * Don't bother checking unless a grace period has 1651 * Don't bother checking unless a grace period has
1626 * completed since we last checked and there are 1652 * completed since we last checked and there are
1627 * callbacks not yet ready to invoke. 1653 * callbacks not yet ready to invoke.
1628 */ 1654 */
1629 if (rdp->completed != rnp->completed && 1655 if (rdp->completed != rnp->completed &&
1630 rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]) 1656 rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL])
1631 rcu_process_gp_end(rsp, rdp); 1657 rcu_process_gp_end(rsp, rdp);
1632 1658
1633 if (cpu_has_callbacks_ready_to_invoke(rdp)) 1659 if (cpu_has_callbacks_ready_to_invoke(rdp))
1634 cbs_ready = true; 1660 cbs_ready = true;
1635 } 1661 }
1636 return cbs_ready; 1662 return cbs_ready;
1637 } 1663 }
1638 1664
1639 /* 1665 /*
1640 * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready 1666 * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
1641 * to invoke. If the CPU has callbacks, try to advance them. Tell the 1667 * to invoke. If the CPU has callbacks, try to advance them. Tell the
1642 * caller to set the timeout based on whether or not there are non-lazy 1668 * caller to set the timeout based on whether or not there are non-lazy
1643 * callbacks. 1669 * callbacks.
1644 * 1670 *
1645 * The caller must have disabled interrupts. 1671 * The caller must have disabled interrupts.
1646 */ 1672 */
1647 int rcu_needs_cpu(int cpu, unsigned long *dj) 1673 int rcu_needs_cpu(int cpu, unsigned long *dj)
1648 { 1674 {
1649 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1675 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1650 1676
1651 /* Snapshot to detect later posting of non-lazy callback. */ 1677 /* Snapshot to detect later posting of non-lazy callback. */
1652 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; 1678 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
1653 1679
1654 /* If no callbacks, RCU doesn't need the CPU. */ 1680 /* If no callbacks, RCU doesn't need the CPU. */
1655 if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) { 1681 if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) {
1656 *dj = ULONG_MAX; 1682 *dj = ULONG_MAX;
1657 return 0; 1683 return 0;
1658 } 1684 }
1659 1685
1660 /* Attempt to advance callbacks. */ 1686 /* Attempt to advance callbacks. */
1661 if (rcu_try_advance_all_cbs()) { 1687 if (rcu_try_advance_all_cbs()) {
1662 /* Some ready to invoke, so initiate later invocation. */ 1688 /* Some ready to invoke, so initiate later invocation. */
1663 invoke_rcu_core(); 1689 invoke_rcu_core();
1664 return 1; 1690 return 1;
1665 } 1691 }
1666 rdtp->last_accelerate = jiffies; 1692 rdtp->last_accelerate = jiffies;
1667 1693
1668 /* Request timer delay depending on laziness, and round. */ 1694 /* Request timer delay depending on laziness, and round. */
1669 if (!rdtp->all_lazy) { 1695 if (!rdtp->all_lazy) {
1670 *dj = round_up(rcu_idle_gp_delay + jiffies, 1696 *dj = round_up(rcu_idle_gp_delay + jiffies,
1671 rcu_idle_gp_delay) - jiffies; 1697 rcu_idle_gp_delay) - jiffies;
1672 } else { 1698 } else {
1673 *dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies; 1699 *dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies;
1674 } 1700 }
1675 return 0; 1701 return 0;
1676 } 1702 }
1677 1703
1678 /* 1704 /*
1679 * Prepare a CPU for idle from an RCU perspective. The first major task 1705 * Prepare a CPU for idle from an RCU perspective. The first major task
1680 * is to sense whether nohz mode has been enabled or disabled via sysfs. 1706 * is to sense whether nohz mode has been enabled or disabled via sysfs.
1681 * The second major task is to check to see if a non-lazy callback has 1707 * The second major task is to check to see if a non-lazy callback has
1682 * arrived at a CPU that previously had only lazy callbacks. The third 1708 * arrived at a CPU that previously had only lazy callbacks. The third
1683 * major task is to accelerate (that is, assign grace-period numbers to) 1709 * major task is to accelerate (that is, assign grace-period numbers to)
1684 * any recently arrived callbacks. 1710 * any recently arrived callbacks.
1685 * 1711 *
1686 * The caller must have disabled interrupts. 1712 * The caller must have disabled interrupts.
1687 */ 1713 */
1688 static void rcu_prepare_for_idle(int cpu) 1714 static void rcu_prepare_for_idle(int cpu)
1689 { 1715 {
1690 struct rcu_data *rdp; 1716 struct rcu_data *rdp;
1691 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1717 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1692 struct rcu_node *rnp; 1718 struct rcu_node *rnp;
1693 struct rcu_state *rsp; 1719 struct rcu_state *rsp;
1694 int tne; 1720 int tne;
1695 1721
1696 /* Handle nohz enablement switches conservatively. */ 1722 /* Handle nohz enablement switches conservatively. */
1697 tne = ACCESS_ONCE(tick_nohz_enabled); 1723 tne = ACCESS_ONCE(tick_nohz_enabled);
1698 if (tne != rdtp->tick_nohz_enabled_snap) { 1724 if (tne != rdtp->tick_nohz_enabled_snap) {
1699 if (rcu_cpu_has_callbacks(cpu, NULL)) 1725 if (rcu_cpu_has_callbacks(cpu, NULL))
1700 invoke_rcu_core(); /* force nohz to see update. */ 1726 invoke_rcu_core(); /* force nohz to see update. */
1701 rdtp->tick_nohz_enabled_snap = tne; 1727 rdtp->tick_nohz_enabled_snap = tne;
1702 return; 1728 return;
1703 } 1729 }
1704 if (!tne) 1730 if (!tne)
1705 return; 1731 return;
1706 1732
1707 /* If this is a no-CBs CPU, no callbacks, just return. */ 1733 /* If this is a no-CBs CPU, no callbacks, just return. */
1708 if (rcu_is_nocb_cpu(cpu)) 1734 if (rcu_is_nocb_cpu(cpu))
1709 return; 1735 return;
1710 1736
1711 /* 1737 /*
1712 * If a non-lazy callback arrived at a CPU having only lazy 1738 * If a non-lazy callback arrived at a CPU having only lazy
1713 * callbacks, invoke RCU core for the side-effect of recalculating 1739 * callbacks, invoke RCU core for the side-effect of recalculating
1714 * idle duration on re-entry to idle. 1740 * idle duration on re-entry to idle.
1715 */ 1741 */
1716 if (rdtp->all_lazy && 1742 if (rdtp->all_lazy &&
1717 rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) { 1743 rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) {
1718 invoke_rcu_core(); 1744 invoke_rcu_core();
1719 return; 1745 return;
1720 } 1746 }
1721 1747
1722 /* 1748 /*
1723 * If we have not yet accelerated this jiffy, accelerate all 1749 * If we have not yet accelerated this jiffy, accelerate all
1724 * callbacks on this CPU. 1750 * callbacks on this CPU.
1725 */ 1751 */
1726 if (rdtp->last_accelerate == jiffies) 1752 if (rdtp->last_accelerate == jiffies)
1727 return; 1753 return;
1728 rdtp->last_accelerate = jiffies; 1754 rdtp->last_accelerate = jiffies;
1729 for_each_rcu_flavor(rsp) { 1755 for_each_rcu_flavor(rsp) {
1730 rdp = per_cpu_ptr(rsp->rda, cpu); 1756 rdp = per_cpu_ptr(rsp->rda, cpu);
1731 if (!*rdp->nxttail[RCU_DONE_TAIL]) 1757 if (!*rdp->nxttail[RCU_DONE_TAIL])
1732 continue; 1758 continue;
1733 rnp = rdp->mynode; 1759 rnp = rdp->mynode;
1734 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 1760 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
1735 rcu_accelerate_cbs(rsp, rnp, rdp); 1761 rcu_accelerate_cbs(rsp, rnp, rdp);
1736 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1762 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1737 } 1763 }
1738 } 1764 }
1739 1765
1740 /* 1766 /*
1741 * Clean up for exit from idle. Attempt to advance callbacks based on 1767 * Clean up for exit from idle. Attempt to advance callbacks based on
1742 * any grace periods that elapsed while the CPU was idle, and if any 1768 * any grace periods that elapsed while the CPU was idle, and if any
1743 * callbacks are now ready to invoke, initiate invocation. 1769 * callbacks are now ready to invoke, initiate invocation.
1744 */ 1770 */
1745 static void rcu_cleanup_after_idle(int cpu) 1771 static void rcu_cleanup_after_idle(int cpu)
1746 { 1772 {
1747 struct rcu_data *rdp; 1773 struct rcu_data *rdp;
1748 struct rcu_state *rsp; 1774 struct rcu_state *rsp;
1749 1775
1750 if (rcu_is_nocb_cpu(cpu)) 1776 if (rcu_is_nocb_cpu(cpu))
1751 return; 1777 return;
1752 rcu_try_advance_all_cbs(); 1778 rcu_try_advance_all_cbs();
1753 for_each_rcu_flavor(rsp) { 1779 for_each_rcu_flavor(rsp) {
1754 rdp = per_cpu_ptr(rsp->rda, cpu); 1780 rdp = per_cpu_ptr(rsp->rda, cpu);
1755 if (cpu_has_callbacks_ready_to_invoke(rdp)) 1781 if (cpu_has_callbacks_ready_to_invoke(rdp))
1756 invoke_rcu_core(); 1782 invoke_rcu_core();
1757 } 1783 }
1758 } 1784 }
1759 1785
1760 /* 1786 /*
1761 * Keep a running count of the number of non-lazy callbacks posted 1787 * Keep a running count of the number of non-lazy callbacks posted
1762 * on this CPU. This running counter (which is never decremented) allows 1788 * on this CPU. This running counter (which is never decremented) allows
1763 * rcu_prepare_for_idle() to detect when something out of the idle loop 1789 * rcu_prepare_for_idle() to detect when something out of the idle loop
1764 * posts a callback, even if an equal number of callbacks are invoked. 1790 * posts a callback, even if an equal number of callbacks are invoked.
1765 * Of course, callbacks should only be posted from within a trace event 1791 * Of course, callbacks should only be posted from within a trace event
1766 * designed to be called from idle or from within RCU_NONIDLE(). 1792 * designed to be called from idle or from within RCU_NONIDLE().
1767 */ 1793 */
1768 static void rcu_idle_count_callbacks_posted(void) 1794 static void rcu_idle_count_callbacks_posted(void)
1769 { 1795 {
1770 __this_cpu_add(rcu_dynticks.nonlazy_posted, 1); 1796 __this_cpu_add(rcu_dynticks.nonlazy_posted, 1);
1771 } 1797 }
1772 1798
1773 /* 1799 /*
1774 * Data for flushing lazy RCU callbacks at OOM time. 1800 * Data for flushing lazy RCU callbacks at OOM time.
1775 */ 1801 */
1776 static atomic_t oom_callback_count; 1802 static atomic_t oom_callback_count;
1777 static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq); 1803 static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq);
1778 1804
1779 /* 1805 /*
1780 * RCU OOM callback -- decrement the outstanding count and deliver the 1806 * RCU OOM callback -- decrement the outstanding count and deliver the
1781 * wake-up if we are the last one. 1807 * wake-up if we are the last one.
1782 */ 1808 */
1783 static void rcu_oom_callback(struct rcu_head *rhp) 1809 static void rcu_oom_callback(struct rcu_head *rhp)
1784 { 1810 {
1785 if (atomic_dec_and_test(&oom_callback_count)) 1811 if (atomic_dec_and_test(&oom_callback_count))
1786 wake_up(&oom_callback_wq); 1812 wake_up(&oom_callback_wq);
1787 } 1813 }
1788 1814
1789 /* 1815 /*
1790 * Post an rcu_oom_notify callback on the current CPU if it has at 1816 * Post an rcu_oom_notify callback on the current CPU if it has at
1791 * least one lazy callback. This will unnecessarily post callbacks 1817 * least one lazy callback. This will unnecessarily post callbacks
1792 * to CPUs that already have a non-lazy callback at the end of their 1818 * to CPUs that already have a non-lazy callback at the end of their
1793 * callback list, but this is an infrequent operation, so accept some 1819 * callback list, but this is an infrequent operation, so accept some
1794 * extra overhead to keep things simple. 1820 * extra overhead to keep things simple.
1795 */ 1821 */
1796 static void rcu_oom_notify_cpu(void *unused) 1822 static void rcu_oom_notify_cpu(void *unused)
1797 { 1823 {
1798 struct rcu_state *rsp; 1824 struct rcu_state *rsp;
1799 struct rcu_data *rdp; 1825 struct rcu_data *rdp;
1800 1826
1801 for_each_rcu_flavor(rsp) { 1827 for_each_rcu_flavor(rsp) {
1802 rdp = __this_cpu_ptr(rsp->rda); 1828 rdp = __this_cpu_ptr(rsp->rda);
1803 if (rdp->qlen_lazy != 0) { 1829 if (rdp->qlen_lazy != 0) {
1804 atomic_inc(&oom_callback_count); 1830 atomic_inc(&oom_callback_count);
1805 rsp->call(&rdp->oom_head, rcu_oom_callback); 1831 rsp->call(&rdp->oom_head, rcu_oom_callback);
1806 } 1832 }
1807 } 1833 }
1808 } 1834 }
1809 1835
1810 /* 1836 /*
1811 * If low on memory, ensure that each CPU has a non-lazy callback. 1837 * If low on memory, ensure that each CPU has a non-lazy callback.
1812 * This will wake up CPUs that have only lazy callbacks, in turn 1838 * This will wake up CPUs that have only lazy callbacks, in turn
1813 * ensuring that they free up the corresponding memory in a timely manner. 1839 * ensuring that they free up the corresponding memory in a timely manner.
1814 * Because an uncertain amount of memory will be freed in some uncertain 1840 * Because an uncertain amount of memory will be freed in some uncertain
1815 * timeframe, we do not claim to have freed anything. 1841 * timeframe, we do not claim to have freed anything.
1816 */ 1842 */
1817 static int rcu_oom_notify(struct notifier_block *self, 1843 static int rcu_oom_notify(struct notifier_block *self,
1818 unsigned long notused, void *nfreed) 1844 unsigned long notused, void *nfreed)
1819 { 1845 {
1820 int cpu; 1846 int cpu;
1821 1847
1822 /* Wait for callbacks from earlier instance to complete. */ 1848 /* Wait for callbacks from earlier instance to complete. */
1823 wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0); 1849 wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0);
1824 1850
1825 /* 1851 /*
1826 * Prevent premature wakeup: ensure that all increments happen 1852 * Prevent premature wakeup: ensure that all increments happen
1827 * before there is a chance of the counter reaching zero. 1853 * before there is a chance of the counter reaching zero.
1828 */ 1854 */
1829 atomic_set(&oom_callback_count, 1); 1855 atomic_set(&oom_callback_count, 1);
1830 1856
1831 get_online_cpus(); 1857 get_online_cpus();
1832 for_each_online_cpu(cpu) { 1858 for_each_online_cpu(cpu) {
1833 smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1); 1859 smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
1834 cond_resched(); 1860 cond_resched();
1835 } 1861 }
1836 put_online_cpus(); 1862 put_online_cpus();
1837 1863
1838 /* Unconditionally decrement: no need to wake ourselves up. */ 1864 /* Unconditionally decrement: no need to wake ourselves up. */
1839 atomic_dec(&oom_callback_count); 1865 atomic_dec(&oom_callback_count);
1840 1866
1841 return NOTIFY_OK; 1867 return NOTIFY_OK;
1842 } 1868 }
1843 1869
1844 static struct notifier_block rcu_oom_nb = { 1870 static struct notifier_block rcu_oom_nb = {
1845 .notifier_call = rcu_oom_notify 1871 .notifier_call = rcu_oom_notify
1846 }; 1872 };
1847 1873
1848 static int __init rcu_register_oom_notifier(void) 1874 static int __init rcu_register_oom_notifier(void)
1849 { 1875 {
1850 register_oom_notifier(&rcu_oom_nb); 1876 register_oom_notifier(&rcu_oom_nb);
1851 return 0; 1877 return 0;
1852 } 1878 }
1853 early_initcall(rcu_register_oom_notifier); 1879 early_initcall(rcu_register_oom_notifier);
1854 1880
1855 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 1881 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
1856 1882
1857 #ifdef CONFIG_RCU_CPU_STALL_INFO 1883 #ifdef CONFIG_RCU_CPU_STALL_INFO
1858 1884
1859 #ifdef CONFIG_RCU_FAST_NO_HZ 1885 #ifdef CONFIG_RCU_FAST_NO_HZ
1860 1886
1861 static void print_cpu_stall_fast_no_hz(char *cp, int cpu) 1887 static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
1862 { 1888 {
1863 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1889 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1864 unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap; 1890 unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap;
1865 1891
1866 sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c", 1892 sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c",
1867 rdtp->last_accelerate & 0xffff, jiffies & 0xffff, 1893 rdtp->last_accelerate & 0xffff, jiffies & 0xffff,
1868 ulong2long(nlpd), 1894 ulong2long(nlpd),
1869 rdtp->all_lazy ? 'L' : '.', 1895 rdtp->all_lazy ? 'L' : '.',
1870 rdtp->tick_nohz_enabled_snap ? '.' : 'D'); 1896 rdtp->tick_nohz_enabled_snap ? '.' : 'D');
1871 } 1897 }
1872 1898
1873 #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 1899 #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
1874 1900
1875 static void print_cpu_stall_fast_no_hz(char *cp, int cpu) 1901 static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
1876 { 1902 {
1877 *cp = '\0'; 1903 *cp = '\0';
1878 } 1904 }
1879 1905
1880 #endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */ 1906 #endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
1881 1907
1882 /* Initiate the stall-info list. */ 1908 /* Initiate the stall-info list. */
1883 static void print_cpu_stall_info_begin(void) 1909 static void print_cpu_stall_info_begin(void)
1884 { 1910 {
1885 pr_cont("\n"); 1911 pr_cont("\n");
1886 } 1912 }
1887 1913
1888 /* 1914 /*
1889 * Print out diagnostic information for the specified stalled CPU. 1915 * Print out diagnostic information for the specified stalled CPU.
1890 * 1916 *
1891 * If the specified CPU is aware of the current RCU grace period 1917 * If the specified CPU is aware of the current RCU grace period
1892 * (flavor specified by rsp), then print the number of scheduling 1918 * (flavor specified by rsp), then print the number of scheduling
1893 * clock interrupts the CPU has taken during the time that it has 1919 * clock interrupts the CPU has taken during the time that it has
1894 * been aware. Otherwise, print the number of RCU grace periods 1920 * been aware. Otherwise, print the number of RCU grace periods
1895 * that this CPU is ignorant of, for example, "1" if the CPU was 1921 * that this CPU is ignorant of, for example, "1" if the CPU was
1896 * aware of the previous grace period. 1922 * aware of the previous grace period.
1897 * 1923 *
1898 * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info. 1924 * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.
1899 */ 1925 */
1900 static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) 1926 static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
1901 { 1927 {
1902 char fast_no_hz[72]; 1928 char fast_no_hz[72];
1903 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 1929 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1904 struct rcu_dynticks *rdtp = rdp->dynticks; 1930 struct rcu_dynticks *rdtp = rdp->dynticks;
1905 char *ticks_title; 1931 char *ticks_title;
1906 unsigned long ticks_value; 1932 unsigned long ticks_value;
1907 1933
1908 if (rsp->gpnum == rdp->gpnum) { 1934 if (rsp->gpnum == rdp->gpnum) {
1909 ticks_title = "ticks this GP"; 1935 ticks_title = "ticks this GP";
1910 ticks_value = rdp->ticks_this_gp; 1936 ticks_value = rdp->ticks_this_gp;
1911 } else { 1937 } else {
1912 ticks_title = "GPs behind"; 1938 ticks_title = "GPs behind";
1913 ticks_value = rsp->gpnum - rdp->gpnum; 1939 ticks_value = rsp->gpnum - rdp->gpnum;
1914 } 1940 }
1915 print_cpu_stall_fast_no_hz(fast_no_hz, cpu); 1941 print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
1916 pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u %s\n", 1942 pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u %s\n",
1917 cpu, ticks_value, ticks_title, 1943 cpu, ticks_value, ticks_title,
1918 atomic_read(&rdtp->dynticks) & 0xfff, 1944 atomic_read(&rdtp->dynticks) & 0xfff,
1919 rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, 1945 rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
1920 rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), 1946 rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
1921 fast_no_hz); 1947 fast_no_hz);
1922 } 1948 }
1923 1949
1924 /* Terminate the stall-info list. */ 1950 /* Terminate the stall-info list. */
1925 static void print_cpu_stall_info_end(void) 1951 static void print_cpu_stall_info_end(void)
1926 { 1952 {
1927 pr_err("\t"); 1953 pr_err("\t");
1928 } 1954 }
1929 1955
1930 /* Zero ->ticks_this_gp for all flavors of RCU. */ 1956 /* Zero ->ticks_this_gp for all flavors of RCU. */
1931 static void zero_cpu_stall_ticks(struct rcu_data *rdp) 1957 static void zero_cpu_stall_ticks(struct rcu_data *rdp)
1932 { 1958 {
1933 rdp->ticks_this_gp = 0; 1959 rdp->ticks_this_gp = 0;
1934 rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id()); 1960 rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id());
1935 } 1961 }
1936 1962
1937 /* Increment ->ticks_this_gp for all flavors of RCU. */ 1963 /* Increment ->ticks_this_gp for all flavors of RCU. */
1938 static void increment_cpu_stall_ticks(void) 1964 static void increment_cpu_stall_ticks(void)
1939 { 1965 {
1940 struct rcu_state *rsp; 1966 struct rcu_state *rsp;
1941 1967
1942 for_each_rcu_flavor(rsp) 1968 for_each_rcu_flavor(rsp)
1943 __this_cpu_ptr(rsp->rda)->ticks_this_gp++; 1969 __this_cpu_ptr(rsp->rda)->ticks_this_gp++;
1944 } 1970 }
1945 1971
1946 #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */ 1972 #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
1947 1973
1948 static void print_cpu_stall_info_begin(void) 1974 static void print_cpu_stall_info_begin(void)
1949 { 1975 {
1950 pr_cont(" {"); 1976 pr_cont(" {");
1951 } 1977 }
1952 1978
1953 static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) 1979 static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
1954 { 1980 {
1955 pr_cont(" %d", cpu); 1981 pr_cont(" %d", cpu);
1956 } 1982 }
1957 1983
1958 static void print_cpu_stall_info_end(void) 1984 static void print_cpu_stall_info_end(void)
1959 { 1985 {
1960 pr_cont("} "); 1986 pr_cont("} ");
1961 } 1987 }
1962 1988
1963 static void zero_cpu_stall_ticks(struct rcu_data *rdp) 1989 static void zero_cpu_stall_ticks(struct rcu_data *rdp)
1964 { 1990 {
1965 } 1991 }
1966 1992
1967 static void increment_cpu_stall_ticks(void) 1993 static void increment_cpu_stall_ticks(void)
1968 { 1994 {
1969 } 1995 }
1970 1996
1971 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */ 1997 #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
1972 1998
1973 #ifdef CONFIG_RCU_NOCB_CPU 1999 #ifdef CONFIG_RCU_NOCB_CPU
1974 2000
1975 /* 2001 /*
1976 * Offload callback processing from the boot-time-specified set of CPUs 2002 * Offload callback processing from the boot-time-specified set of CPUs
1977 * specified by rcu_nocb_mask. For each CPU in the set, there is a 2003 * specified by rcu_nocb_mask. For each CPU in the set, there is a
1978 * kthread created that pulls the callbacks from the corresponding CPU, 2004 * kthread created that pulls the callbacks from the corresponding CPU,
1979 * waits for a grace period to elapse, and invokes the callbacks. 2005 * waits for a grace period to elapse, and invokes the callbacks.
1980 * The no-CBs CPUs do a wake_up() on their kthread when they insert 2006 * The no-CBs CPUs do a wake_up() on their kthread when they insert
1981 * a callback into any empty list, unless the rcu_nocb_poll boot parameter 2007 * a callback into any empty list, unless the rcu_nocb_poll boot parameter
1982 * has been specified, in which case each kthread actively polls its 2008 * has been specified, in which case each kthread actively polls its
1983 * CPU. (Which isn't so great for energy efficiency, but which does 2009 * CPU. (Which isn't so great for energy efficiency, but which does
1984 * reduce RCU's overhead on that CPU.) 2010 * reduce RCU's overhead on that CPU.)
1985 * 2011 *
1986 * This is intended to be used in conjunction with Frederic Weisbecker's 2012 * This is intended to be used in conjunction with Frederic Weisbecker's
1987 * adaptive-idle work, which would seriously reduce OS jitter on CPUs 2013 * adaptive-idle work, which would seriously reduce OS jitter on CPUs
1988 * running CPU-bound user-mode computations. 2014 * running CPU-bound user-mode computations.
1989 * 2015 *
1990 * Offloading of callback processing could also in theory be used as 2016 * Offloading of callback processing could also in theory be used as
1991 * an energy-efficiency measure because CPUs with no RCU callbacks 2017 * an energy-efficiency measure because CPUs with no RCU callbacks
1992 * queued are more aggressive about entering dyntick-idle mode. 2018 * queued are more aggressive about entering dyntick-idle mode.
1993 */ 2019 */
1994 2020
1995 2021
1996 /* Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. */ 2022 /* Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. */
1997 static int __init rcu_nocb_setup(char *str) 2023 static int __init rcu_nocb_setup(char *str)
1998 { 2024 {
1999 alloc_bootmem_cpumask_var(&rcu_nocb_mask); 2025 alloc_bootmem_cpumask_var(&rcu_nocb_mask);
2000 have_rcu_nocb_mask = true; 2026 have_rcu_nocb_mask = true;
2001 cpulist_parse(str, rcu_nocb_mask); 2027 cpulist_parse(str, rcu_nocb_mask);
2002 return 1; 2028 return 1;
2003 } 2029 }
2004 __setup("rcu_nocbs=", rcu_nocb_setup); 2030 __setup("rcu_nocbs=", rcu_nocb_setup);
2005 2031
2006 static int __init parse_rcu_nocb_poll(char *arg) 2032 static int __init parse_rcu_nocb_poll(char *arg)
2007 { 2033 {
2008 rcu_nocb_poll = 1; 2034 rcu_nocb_poll = 1;
2009 return 0; 2035 return 0;
2010 } 2036 }
2011 early_param("rcu_nocb_poll", parse_rcu_nocb_poll); 2037 early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
2012 2038
2013 /* 2039 /*
2014 * Do any no-CBs CPUs need another grace period? 2040 * Do any no-CBs CPUs need another grace period?
2015 * 2041 *
2016 * Interrupts must be disabled. If the caller does not hold the root 2042 * Interrupts must be disabled. If the caller does not hold the root
2017 * rnp_node structure's ->lock, the results are advisory only. 2043 * rnp_node structure's ->lock, the results are advisory only.
2018 */ 2044 */
2019 static int rcu_nocb_needs_gp(struct rcu_state *rsp) 2045 static int rcu_nocb_needs_gp(struct rcu_state *rsp)
2020 { 2046 {
2021 struct rcu_node *rnp = rcu_get_root(rsp); 2047 struct rcu_node *rnp = rcu_get_root(rsp);
2022 2048
2023 return rnp->need_future_gp[(ACCESS_ONCE(rnp->completed) + 1) & 0x1]; 2049 return rnp->need_future_gp[(ACCESS_ONCE(rnp->completed) + 1) & 0x1];
2024 } 2050 }
2025 2051
2026 /* 2052 /*
2027 * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended 2053 * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
2028 * grace period. 2054 * grace period.
2029 */ 2055 */
2030 static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) 2056 static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
2031 { 2057 {
2032 wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]); 2058 wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
2033 } 2059 }
2034 2060
2035 /* 2061 /*
2036 * Set the root rcu_node structure's ->need_future_gp field 2062 * Set the root rcu_node structure's ->need_future_gp field
2037 * based on the sum of those of all rcu_node structures. This does 2063 * based on the sum of those of all rcu_node structures. This does
2038 * double-count the root rcu_node structure's requests, but this 2064 * double-count the root rcu_node structure's requests, but this
2039 * is necessary to handle the possibility of a rcu_nocb_kthread() 2065 * is necessary to handle the possibility of a rcu_nocb_kthread()
2040 * having awakened during the time that the rcu_node structures 2066 * having awakened during the time that the rcu_node structures
2041 * were being updated for the end of the previous grace period. 2067 * were being updated for the end of the previous grace period.
2042 */ 2068 */
2043 static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq) 2069 static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
2044 { 2070 {
2045 rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq; 2071 rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
2046 } 2072 }
2047 2073
2048 static void rcu_init_one_nocb(struct rcu_node *rnp) 2074 static void rcu_init_one_nocb(struct rcu_node *rnp)
2049 { 2075 {
2050 init_waitqueue_head(&rnp->nocb_gp_wq[0]); 2076 init_waitqueue_head(&rnp->nocb_gp_wq[0]);
2051 init_waitqueue_head(&rnp->nocb_gp_wq[1]); 2077 init_waitqueue_head(&rnp->nocb_gp_wq[1]);
2052 } 2078 }
2053 2079
2054 /* Is the specified CPU a no-CPUs CPU? */ 2080 /* Is the specified CPU a no-CPUs CPU? */
2055 bool rcu_is_nocb_cpu(int cpu) 2081 bool rcu_is_nocb_cpu(int cpu)
2056 { 2082 {
2057 if (have_rcu_nocb_mask) 2083 if (have_rcu_nocb_mask)
2058 return cpumask_test_cpu(cpu, rcu_nocb_mask); 2084 return cpumask_test_cpu(cpu, rcu_nocb_mask);
2059 return false; 2085 return false;
2060 } 2086 }
2061 2087
2062 /* 2088 /*
2063 * Enqueue the specified string of rcu_head structures onto the specified 2089 * Enqueue the specified string of rcu_head structures onto the specified
2064 * CPU's no-CBs lists. The CPU is specified by rdp, the head of the 2090 * CPU's no-CBs lists. The CPU is specified by rdp, the head of the
2065 * string by rhp, and the tail of the string by rhtp. The non-lazy/lazy 2091 * string by rhp, and the tail of the string by rhtp. The non-lazy/lazy
2066 * counts are supplied by rhcount and rhcount_lazy. 2092 * counts are supplied by rhcount and rhcount_lazy.
2067 * 2093 *
2068 * If warranted, also wake up the kthread servicing this CPUs queues. 2094 * If warranted, also wake up the kthread servicing this CPUs queues.
2069 */ 2095 */
2070 static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, 2096 static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
2071 struct rcu_head *rhp, 2097 struct rcu_head *rhp,
2072 struct rcu_head **rhtp, 2098 struct rcu_head **rhtp,
2073 int rhcount, int rhcount_lazy) 2099 int rhcount, int rhcount_lazy)
2074 { 2100 {
2075 int len; 2101 int len;
2076 struct rcu_head **old_rhpp; 2102 struct rcu_head **old_rhpp;
2077 struct task_struct *t; 2103 struct task_struct *t;
2078 2104
2079 /* Enqueue the callback on the nocb list and update counts. */ 2105 /* Enqueue the callback on the nocb list and update counts. */
2080 old_rhpp = xchg(&rdp->nocb_tail, rhtp); 2106 old_rhpp = xchg(&rdp->nocb_tail, rhtp);
2081 ACCESS_ONCE(*old_rhpp) = rhp; 2107 ACCESS_ONCE(*old_rhpp) = rhp;
2082 atomic_long_add(rhcount, &rdp->nocb_q_count); 2108 atomic_long_add(rhcount, &rdp->nocb_q_count);
2083 atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy); 2109 atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);
2084 2110
2085 /* If we are not being polled and there is a kthread, awaken it ... */ 2111 /* If we are not being polled and there is a kthread, awaken it ... */
2086 t = ACCESS_ONCE(rdp->nocb_kthread); 2112 t = ACCESS_ONCE(rdp->nocb_kthread);
2087 if (rcu_nocb_poll | !t) 2113 if (rcu_nocb_poll | !t)
2088 return; 2114 return;
2089 len = atomic_long_read(&rdp->nocb_q_count); 2115 len = atomic_long_read(&rdp->nocb_q_count);
2090 if (old_rhpp == &rdp->nocb_head) { 2116 if (old_rhpp == &rdp->nocb_head) {
2091 wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */ 2117 wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */
2092 rdp->qlen_last_fqs_check = 0; 2118 rdp->qlen_last_fqs_check = 0;
2093 } else if (len > rdp->qlen_last_fqs_check + qhimark) { 2119 } else if (len > rdp->qlen_last_fqs_check + qhimark) {
2094 wake_up_process(t); /* ... or if many callbacks queued. */ 2120 wake_up_process(t); /* ... or if many callbacks queued. */
2095 rdp->qlen_last_fqs_check = LONG_MAX / 2; 2121 rdp->qlen_last_fqs_check = LONG_MAX / 2;
2096 } 2122 }
2097 return; 2123 return;
2098 } 2124 }
2099 2125
2100 /* 2126 /*
2101 * This is a helper for __call_rcu(), which invokes this when the normal 2127 * This is a helper for __call_rcu(), which invokes this when the normal
2102 * callback queue is inoperable. If this is not a no-CBs CPU, this 2128 * callback queue is inoperable. If this is not a no-CBs CPU, this
2103 * function returns failure back to __call_rcu(), which can complain 2129 * function returns failure back to __call_rcu(), which can complain
2104 * appropriately. 2130 * appropriately.
2105 * 2131 *
2106 * Otherwise, this function queues the callback where the corresponding 2132 * Otherwise, this function queues the callback where the corresponding
2107 * "rcuo" kthread can find it. 2133 * "rcuo" kthread can find it.
2108 */ 2134 */
2109 static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, 2135 static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
2110 bool lazy) 2136 bool lazy)
2111 { 2137 {
2112 2138
2113 if (!rcu_is_nocb_cpu(rdp->cpu)) 2139 if (!rcu_is_nocb_cpu(rdp->cpu))
2114 return 0; 2140 return 0;
2115 __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy); 2141 __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy);
2116 if (__is_kfree_rcu_offset((unsigned long)rhp->func)) 2142 if (__is_kfree_rcu_offset((unsigned long)rhp->func))
2117 trace_rcu_kfree_callback(rdp->rsp->name, rhp, 2143 trace_rcu_kfree_callback(rdp->rsp->name, rhp,
2118 (unsigned long)rhp->func, 2144 (unsigned long)rhp->func,
2119 rdp->qlen_lazy, rdp->qlen); 2145 rdp->qlen_lazy, rdp->qlen);
2120 else 2146 else
2121 trace_rcu_callback(rdp->rsp->name, rhp, 2147 trace_rcu_callback(rdp->rsp->name, rhp,
2122 rdp->qlen_lazy, rdp->qlen); 2148 rdp->qlen_lazy, rdp->qlen);
2123 return 1; 2149 return 1;
2124 } 2150 }
2125 2151
2126 /* 2152 /*
2127 * Adopt orphaned callbacks on a no-CBs CPU, or return 0 if this is 2153 * Adopt orphaned callbacks on a no-CBs CPU, or return 0 if this is
2128 * not a no-CBs CPU. 2154 * not a no-CBs CPU.
2129 */ 2155 */
2130 static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, 2156 static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2131 struct rcu_data *rdp) 2157 struct rcu_data *rdp)
2132 { 2158 {
2133 long ql = rsp->qlen; 2159 long ql = rsp->qlen;
2134 long qll = rsp->qlen_lazy; 2160 long qll = rsp->qlen_lazy;
2135 2161
2136 /* If this is not a no-CBs CPU, tell the caller to do it the old way. */ 2162 /* If this is not a no-CBs CPU, tell the caller to do it the old way. */
2137 if (!rcu_is_nocb_cpu(smp_processor_id())) 2163 if (!rcu_is_nocb_cpu(smp_processor_id()))
2138 return 0; 2164 return 0;
2139 rsp->qlen = 0; 2165 rsp->qlen = 0;
2140 rsp->qlen_lazy = 0; 2166 rsp->qlen_lazy = 0;
2141 2167
2142 /* First, enqueue the donelist, if any. This preserves CB ordering. */ 2168 /* First, enqueue the donelist, if any. This preserves CB ordering. */
2143 if (rsp->orphan_donelist != NULL) { 2169 if (rsp->orphan_donelist != NULL) {
2144 __call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist, 2170 __call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist,
2145 rsp->orphan_donetail, ql, qll); 2171 rsp->orphan_donetail, ql, qll);
2146 ql = qll = 0; 2172 ql = qll = 0;
2147 rsp->orphan_donelist = NULL; 2173 rsp->orphan_donelist = NULL;
2148 rsp->orphan_donetail = &rsp->orphan_donelist; 2174 rsp->orphan_donetail = &rsp->orphan_donelist;
2149 } 2175 }
2150 if (rsp->orphan_nxtlist != NULL) { 2176 if (rsp->orphan_nxtlist != NULL) {
2151 __call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist, 2177 __call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist,
2152 rsp->orphan_nxttail, ql, qll); 2178 rsp->orphan_nxttail, ql, qll);
2153 ql = qll = 0; 2179 ql = qll = 0;
2154 rsp->orphan_nxtlist = NULL; 2180 rsp->orphan_nxtlist = NULL;
2155 rsp->orphan_nxttail = &rsp->orphan_nxtlist; 2181 rsp->orphan_nxttail = &rsp->orphan_nxtlist;
2156 } 2182 }
2157 return 1; 2183 return 1;
2158 } 2184 }
2159 2185
2160 /* 2186 /*
2161 * If necessary, kick off a new grace period, and either way wait 2187 * If necessary, kick off a new grace period, and either way wait
2162 * for a subsequent grace period to complete. 2188 * for a subsequent grace period to complete.
2163 */ 2189 */
2164 static void rcu_nocb_wait_gp(struct rcu_data *rdp) 2190 static void rcu_nocb_wait_gp(struct rcu_data *rdp)
2165 { 2191 {
2166 unsigned long c; 2192 unsigned long c;
2167 bool d; 2193 bool d;
2168 unsigned long flags; 2194 unsigned long flags;
2169 struct rcu_node *rnp = rdp->mynode; 2195 struct rcu_node *rnp = rdp->mynode;
2170 2196
2171 raw_spin_lock_irqsave(&rnp->lock, flags); 2197 raw_spin_lock_irqsave(&rnp->lock, flags);
2172 c = rcu_start_future_gp(rnp, rdp); 2198 c = rcu_start_future_gp(rnp, rdp);
2173 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2199 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2174 2200
2175 /* 2201 /*
2176 * Wait for the grace period. Do so interruptibly to avoid messing 2202 * Wait for the grace period. Do so interruptibly to avoid messing
2177 * up the load average. 2203 * up the load average.
2178 */ 2204 */
2179 trace_rcu_future_gp(rnp, rdp, c, "StartWait"); 2205 trace_rcu_future_gp(rnp, rdp, c, "StartWait");
2180 for (;;) { 2206 for (;;) {
2181 wait_event_interruptible( 2207 wait_event_interruptible(
2182 rnp->nocb_gp_wq[c & 0x1], 2208 rnp->nocb_gp_wq[c & 0x1],
2183 (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c))); 2209 (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));
2184 if (likely(d)) 2210 if (likely(d))
2185 break; 2211 break;
2186 flush_signals(current); 2212 flush_signals(current);
2187 trace_rcu_future_gp(rnp, rdp, c, "ResumeWait"); 2213 trace_rcu_future_gp(rnp, rdp, c, "ResumeWait");
2188 } 2214 }
2189 trace_rcu_future_gp(rnp, rdp, c, "EndWait"); 2215 trace_rcu_future_gp(rnp, rdp, c, "EndWait");
2190 smp_mb(); /* Ensure that CB invocation happens after GP end. */ 2216 smp_mb(); /* Ensure that CB invocation happens after GP end. */
2191 } 2217 }
2192 2218
2193 /* 2219 /*
2194 * Per-rcu_data kthread, but only for no-CBs CPUs. Each kthread invokes 2220 * Per-rcu_data kthread, but only for no-CBs CPUs. Each kthread invokes
2195 * callbacks queued by the corresponding no-CBs CPU. 2221 * callbacks queued by the corresponding no-CBs CPU.
2196 */ 2222 */
2197 static int rcu_nocb_kthread(void *arg) 2223 static int rcu_nocb_kthread(void *arg)
2198 { 2224 {
2199 int c, cl; 2225 int c, cl;
2200 struct rcu_head *list; 2226 struct rcu_head *list;
2201 struct rcu_head *next; 2227 struct rcu_head *next;
2202 struct rcu_head **tail; 2228 struct rcu_head **tail;
2203 struct rcu_data *rdp = arg; 2229 struct rcu_data *rdp = arg;
2204 2230
2205 /* Each pass through this loop invokes one batch of callbacks */ 2231 /* Each pass through this loop invokes one batch of callbacks */
2206 for (;;) { 2232 for (;;) {
2207 /* If not polling, wait for next batch of callbacks. */ 2233 /* If not polling, wait for next batch of callbacks. */
2208 if (!rcu_nocb_poll) 2234 if (!rcu_nocb_poll)
2209 wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head); 2235 wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head);
2210 list = ACCESS_ONCE(rdp->nocb_head); 2236 list = ACCESS_ONCE(rdp->nocb_head);
2211 if (!list) { 2237 if (!list) {
2212 schedule_timeout_interruptible(1); 2238 schedule_timeout_interruptible(1);
2213 flush_signals(current); 2239 flush_signals(current);
2214 continue; 2240 continue;
2215 } 2241 }
2216 2242
2217 /* 2243 /*
2218 * Extract queued callbacks, update counts, and wait 2244 * Extract queued callbacks, update counts, and wait
2219 * for a grace period to elapse. 2245 * for a grace period to elapse.
2220 */ 2246 */
2221 ACCESS_ONCE(rdp->nocb_head) = NULL; 2247 ACCESS_ONCE(rdp->nocb_head) = NULL;
2222 tail = xchg(&rdp->nocb_tail, &rdp->nocb_head); 2248 tail = xchg(&rdp->nocb_tail, &rdp->nocb_head);
2223 c = atomic_long_xchg(&rdp->nocb_q_count, 0); 2249 c = atomic_long_xchg(&rdp->nocb_q_count, 0);
2224 cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0); 2250 cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0);
2225 ACCESS_ONCE(rdp->nocb_p_count) += c; 2251 ACCESS_ONCE(rdp->nocb_p_count) += c;
2226 ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl; 2252 ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl;
2227 rcu_nocb_wait_gp(rdp); 2253 rcu_nocb_wait_gp(rdp);
2228 2254
2229 /* Each pass through the following loop invokes a callback. */ 2255 /* Each pass through the following loop invokes a callback. */
2230 trace_rcu_batch_start(rdp->rsp->name, cl, c, -1); 2256 trace_rcu_batch_start(rdp->rsp->name, cl, c, -1);
2231 c = cl = 0; 2257 c = cl = 0;
2232 while (list) { 2258 while (list) {
2233 next = list->next; 2259 next = list->next;
2234 /* Wait for enqueuing to complete, if needed. */ 2260 /* Wait for enqueuing to complete, if needed. */
2235 while (next == NULL && &list->next != tail) { 2261 while (next == NULL && &list->next != tail) {
2236 schedule_timeout_interruptible(1); 2262 schedule_timeout_interruptible(1);
2237 next = list->next; 2263 next = list->next;
2238 } 2264 }
2239 debug_rcu_head_unqueue(list); 2265 debug_rcu_head_unqueue(list);
2240 local_bh_disable(); 2266 local_bh_disable();
2241 if (__rcu_reclaim(rdp->rsp->name, list)) 2267 if (__rcu_reclaim(rdp->rsp->name, list))
2242 cl++; 2268 cl++;
2243 c++; 2269 c++;
2244 local_bh_enable(); 2270 local_bh_enable();
2245 list = next; 2271 list = next;
2246 } 2272 }
2247 trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1); 2273 trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
2248 ACCESS_ONCE(rdp->nocb_p_count) -= c; 2274 ACCESS_ONCE(rdp->nocb_p_count) -= c;
2249 ACCESS_ONCE(rdp->nocb_p_count_lazy) -= cl; 2275 ACCESS_ONCE(rdp->nocb_p_count_lazy) -= cl;
2250 rdp->n_nocbs_invoked += c; 2276 rdp->n_nocbs_invoked += c;
2251 } 2277 }
2252 return 0; 2278 return 0;
2253 } 2279 }
2254 2280
2255 /* Initialize per-rcu_data variables for no-CBs CPUs. */ 2281 /* Initialize per-rcu_data variables for no-CBs CPUs. */
2256 static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) 2282 static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2257 { 2283 {
2258 rdp->nocb_tail = &rdp->nocb_head; 2284 rdp->nocb_tail = &rdp->nocb_head;
2259 init_waitqueue_head(&rdp->nocb_wq); 2285 init_waitqueue_head(&rdp->nocb_wq);
2260 } 2286 }
2261 2287
2262 /* Create a kthread for each RCU flavor for each no-CBs CPU. */ 2288 /* Create a kthread for each RCU flavor for each no-CBs CPU. */
2263 static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) 2289 static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
2264 { 2290 {
2265 int cpu; 2291 int cpu;
2266 struct rcu_data *rdp; 2292 struct rcu_data *rdp;
2267 struct task_struct *t; 2293 struct task_struct *t;
2268 2294
2269 if (rcu_nocb_mask == NULL) 2295 if (rcu_nocb_mask == NULL)
2270 return; 2296 return;
2271 for_each_cpu(cpu, rcu_nocb_mask) { 2297 for_each_cpu(cpu, rcu_nocb_mask) {
2272 rdp = per_cpu_ptr(rsp->rda, cpu); 2298 rdp = per_cpu_ptr(rsp->rda, cpu);
2273 t = kthread_run(rcu_nocb_kthread, rdp, 2299 t = kthread_run(rcu_nocb_kthread, rdp,
2274 "rcuo%c/%d", rsp->abbr, cpu); 2300 "rcuo%c/%d", rsp->abbr, cpu);
2275 BUG_ON(IS_ERR(t)); 2301 BUG_ON(IS_ERR(t));
2276 ACCESS_ONCE(rdp->nocb_kthread) = t; 2302 ACCESS_ONCE(rdp->nocb_kthread) = t;
2277 } 2303 }
2278 } 2304 }
2279 2305
2280 /* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */ 2306 /* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */
2281 static bool init_nocb_callback_list(struct rcu_data *rdp) 2307 static bool init_nocb_callback_list(struct rcu_data *rdp)
2282 { 2308 {
2283 if (rcu_nocb_mask == NULL || 2309 if (rcu_nocb_mask == NULL ||
2284 !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask)) 2310 !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask))
2285 return false; 2311 return false;
2286 rdp->nxttail[RCU_NEXT_TAIL] = NULL; 2312 rdp->nxttail[RCU_NEXT_TAIL] = NULL;
2287 return true; 2313 return true;
2288 } 2314 }
2289 2315
2290 #else /* #ifdef CONFIG_RCU_NOCB_CPU */ 2316 #else /* #ifdef CONFIG_RCU_NOCB_CPU */
2291 2317
2292 static int rcu_nocb_needs_gp(struct rcu_state *rsp) 2318 static int rcu_nocb_needs_gp(struct rcu_state *rsp)
2293 { 2319 {
2294 return 0; 2320 return 0;
2295 } 2321 }
2296 2322
2297 static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) 2323 static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
2298 { 2324 {
2299 } 2325 }
2300 2326
2301 static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq) 2327 static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
2302 { 2328 {
2303 } 2329 }
2304 2330
2305 static void rcu_init_one_nocb(struct rcu_node *rnp) 2331 static void rcu_init_one_nocb(struct rcu_node *rnp)
2306 { 2332 {
2307 } 2333 }
2308 2334
2309 static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, 2335 static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
2310 bool lazy) 2336 bool lazy)
2311 { 2337 {
2312 return 0; 2338 return 0;
2313 } 2339 }
2314 2340
2315 static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, 2341 static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2316 struct rcu_data *rdp) 2342 struct rcu_data *rdp)
2317 { 2343 {
2318 return 0; 2344 return 0;
2319 } 2345 }
2320 2346
2321 static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) 2347 static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2322 { 2348 {
2323 } 2349 }
2324 2350
2325 static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) 2351 static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
2326 { 2352 {
2327 } 2353 }
2328 2354
2329 static bool init_nocb_callback_list(struct rcu_data *rdp) 2355 static bool init_nocb_callback_list(struct rcu_data *rdp)
2330 { 2356 {
2331 return false; 2357 return false;
2332 } 2358 }
2333 2359
2334 #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ 2360 #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
2335 2361
2336 /* 2362 /*
2337 * An adaptive-ticks CPU can potentially execute in kernel mode for an 2363 * An adaptive-ticks CPU can potentially execute in kernel mode for an
2338 * arbitrarily long period of time with the scheduling-clock tick turned 2364 * arbitrarily long period of time with the scheduling-clock tick turned
2339 * off. RCU will be paying attention to this CPU because it is in the 2365 * off. RCU will be paying attention to this CPU because it is in the
2340 * kernel, but the CPU cannot be guaranteed to be executing the RCU state 2366 * kernel, but the CPU cannot be guaranteed to be executing the RCU state
2341 * machine because the scheduling-clock tick has been disabled. Therefore, 2367 * machine because the scheduling-clock tick has been disabled. Therefore,
2342 * if an adaptive-ticks CPU is failing to respond to the current grace 2368 * if an adaptive-ticks CPU is failing to respond to the current grace
2343 * period and has not be idle from an RCU perspective, kick it. 2369 * period and has not be idle from an RCU perspective, kick it.
2344 */ 2370 */
2345 static void rcu_kick_nohz_cpu(int cpu) 2371 static void rcu_kick_nohz_cpu(int cpu)
2346 { 2372 {
2347 #ifdef CONFIG_NO_HZ_FULL 2373 #ifdef CONFIG_NO_HZ_FULL
2348 if (tick_nohz_full_cpu(cpu)) 2374 if (tick_nohz_full_cpu(cpu))
2349 smp_send_reschedule(cpu); 2375 smp_send_reschedule(cpu);
2350 #endif /* #ifdef CONFIG_NO_HZ_FULL */ 2376 #endif /* #ifdef CONFIG_NO_HZ_FULL */
2351 } 2377 }
2352 2378