Blame view
lib/percpu-refcount.c
12.4 KB
215e262f2
|
1 2 3 4 |
#define pr_fmt(fmt) "%s: " fmt " ", __func__ #include <linux/kernel.h> |
490c79a65
|
5 6 |
#include <linux/sched.h> #include <linux/wait.h> |
215e262f2
|
7 8 9 10 11 12 13 14 15 |
#include <linux/percpu-refcount.h> /* * Initially, a percpu refcount is just a set of percpu counters. Initially, we * don't try to detect the ref hitting 0 - which means that get/put can just * increment or decrement the local counter. Note that the counter on a * particular cpu can (and will) wrap - this is fine, when we go to shutdown the * percpu counters will all sum to the correct value * |
bdb428c82
|
16 |
* (More precisely: because modular arithmetic is commutative the sum of all the |
eecc16ba9
|
17 18 |
* percpu_count vars will be equal to what it would have been if all the gets * and puts were done to a single integer, even if some of the percpu integers |
215e262f2
|
19 20 21 22 23 24 25 26 27 28 29 30 |
* overflow or underflow). * * The real trick to implementing percpu refcounts is shutdown. We can't detect * the ref hitting 0 on every put - this would require global synchronization * and defeat the whole purpose of using percpu refs. * * What we do is require the user to keep track of the initial refcount; we know * the ref can't hit 0 before the user drops the initial ref, so as long as we * convert to non percpu mode before the initial ref is dropped everything * works. * * Converting to non percpu mode is done with some RCUish stuff in |
e625305b3
|
31 32 |
* percpu_ref_kill. Additionally, we need a bias value so that the * atomic_long_t can't hit 0 before we've added up all the percpu refs. |
215e262f2
|
33 |
*/ |
eecc16ba9
|
34 |
#define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1)) |
215e262f2
|
35 |
|
33e465ce7
|
36 |
static DEFINE_SPINLOCK(percpu_ref_switch_lock); |
490c79a65
|
37 |
static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq); |
eecc16ba9
|
38 |
static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref) |
eae7975dd
|
39 |
{ |
eecc16ba9
|
40 |
return (unsigned long __percpu *) |
27344a901
|
41 |
(ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD); |
eae7975dd
|
42 |
} |
215e262f2
|
43 44 |
/** * percpu_ref_init - initialize a percpu refcount |
ac899061a
|
45 46 |
* @ref: percpu_ref to initialize * @release: function which will be called when refcount hits 0 |
2aad2a86f
|
47 |
* @flags: PERCPU_REF_INIT_* flags |
a34375ef9
|
48 |
* @gfp: allocation mask to use |
215e262f2
|
49 |
* |
2aad2a86f
|
50 51 52 |
* Initializes @ref. If @flags is zero, @ref starts in percpu mode with a * refcount of 1; analagous to atomic_long_set(ref, 1). See the * definitions of PERCPU_REF_INIT_* flags for flag behaviors. |
215e262f2
|
53 54 55 56 |
* * Note that @release must not sleep - it may potentially be called from RCU * callback context by percpu_ref_kill(). */ |
a34375ef9
|
57 |
int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release, |
2aad2a86f
|
58 |
unsigned int flags, gfp_t gfp) |
215e262f2
|
59 |
{ |
27344a901
|
60 61 |
size_t align = max_t(size_t, 1 << __PERCPU_REF_FLAG_BITS, __alignof__(unsigned long)); |
2aad2a86f
|
62 |
unsigned long start_count = 0; |
215e262f2
|
63 |
|
27344a901
|
64 65 |
ref->percpu_count_ptr = (unsigned long) __alloc_percpu_gfp(sizeof(unsigned long), align, gfp); |
eecc16ba9
|
66 |
if (!ref->percpu_count_ptr) |
215e262f2
|
67 |
return -ENOMEM; |
1cae13e75
|
68 |
ref->force_atomic = flags & PERCPU_REF_INIT_ATOMIC; |
2aad2a86f
|
69 70 71 72 73 74 75 76 77 78 79 |
if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD)) ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC; else start_count += PERCPU_COUNT_BIAS; if (flags & PERCPU_REF_INIT_DEAD) ref->percpu_count_ptr |= __PERCPU_REF_DEAD; else start_count++; atomic_long_set(&ref->count, start_count); |
215e262f2
|
80 |
ref->release = release; |
a67823c1e
|
81 |
ref->confirm_switch = NULL; |
215e262f2
|
82 83 |
return 0; } |
5e9dd373d
|
84 |
EXPORT_SYMBOL_GPL(percpu_ref_init); |
215e262f2
|
85 |
|
bc497bd33
|
86 |
/** |
9a1049da9
|
87 88 |
* percpu_ref_exit - undo percpu_ref_init() * @ref: percpu_ref to exit |
bc497bd33
|
89 |
* |
9a1049da9
|
90 91 92 93 94 |
* This function exits @ref. The caller is responsible for ensuring that * @ref is no longer in active use. The usual places to invoke this * function from are the @ref->release() callback or in init failure path * where percpu_ref_init() succeeded but other parts of the initialization * of the embedding object failed. |
bc497bd33
|
95 |
*/ |
9a1049da9
|
96 |
void percpu_ref_exit(struct percpu_ref *ref) |
bc497bd33
|
97 |
{ |
eecc16ba9
|
98 |
unsigned long __percpu *percpu_count = percpu_count_ptr(ref); |
bc497bd33
|
99 |
|
eecc16ba9
|
100 |
if (percpu_count) { |
a67823c1e
|
101 102 |
/* non-NULL confirm_switch indicates switching in progress */ WARN_ON_ONCE(ref->confirm_switch); |
eecc16ba9
|
103 |
free_percpu(percpu_count); |
27344a901
|
104 |
ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD; |
bc497bd33
|
105 106 |
} } |
9a1049da9
|
107 |
EXPORT_SYMBOL_GPL(percpu_ref_exit); |
bc497bd33
|
108 |
|
490c79a65
|
109 110 111 112 113 114 115 116 117 118 119 120 121 |
static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu) { struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu); ref->confirm_switch(ref); ref->confirm_switch = NULL; wake_up_all(&percpu_ref_switch_waitq); /* drop ref from percpu_ref_switch_to_atomic() */ percpu_ref_put(ref); } static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu) |
215e262f2
|
122 123 |
{ struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu); |
eecc16ba9
|
124 |
unsigned long __percpu *percpu_count = percpu_count_ptr(ref); |
e625305b3
|
125 |
unsigned long count = 0; |
215e262f2
|
126 |
int cpu; |
215e262f2
|
127 |
for_each_possible_cpu(cpu) |
eecc16ba9
|
128 |
count += *per_cpu_ptr(percpu_count, cpu); |
215e262f2
|
129 |
|
eecc16ba9
|
130 |
pr_debug("global %ld percpu %ld", |
e625305b3
|
131 |
atomic_long_read(&ref->count), (long)count); |
215e262f2
|
132 133 134 135 136 137 138 139 140 141 142 143 144 |
/* * It's crucial that we sum the percpu counters _before_ adding the sum * to &ref->count; since gets could be happening on one cpu while puts * happen on another, adding a single cpu's count could cause * @ref->count to hit 0 before we've got a consistent value - but the * sum of all the counts will be consistent and correct. * * Subtracting the bias value then has to happen _after_ adding count to * &ref->count; we need the bias value to prevent &ref->count from * reaching 0 before we add the percpu counts. But doing it at the same * time is equivalent and saves us atomic operations: */ |
eecc16ba9
|
145 |
atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count); |
215e262f2
|
146 |
|
e625305b3
|
147 |
WARN_ONCE(atomic_long_read(&ref->count) <= 0, |
490c79a65
|
148 |
"percpu ref (%pf) <= 0 (%ld) after switching to atomic", |
e625305b3
|
149 |
ref->release, atomic_long_read(&ref->count)); |
687b0ad27
|
150 |
|
490c79a65
|
151 152 153 |
/* @ref is viewed as dead on all CPUs, send out switch confirmation */ percpu_ref_call_confirm_rcu(rcu); } |
dbece3a0f
|
154 |
|
490c79a65
|
155 156 157 158 159 160 161 |
static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref) { } static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref, percpu_ref_func_t *confirm_switch) { |
b2302c7fd
|
162 |
if (ref->percpu_count_ptr & __PERCPU_REF_ATOMIC) { |
18808354b
|
163 |
if (confirm_switch) |
b2302c7fd
|
164 |
confirm_switch(ref); |
b2302c7fd
|
165 |
return; |
490c79a65
|
166 |
} |
215e262f2
|
167 |
|
b2302c7fd
|
168 169 170 171 172 173 174 |
/* switching from percpu to atomic */ ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC; /* * Non-NULL ->confirm_switch is used to indicate that switching is * in progress. Use noop one if unspecified. */ |
b2302c7fd
|
175 176 177 178 |
ref->confirm_switch = confirm_switch ?: percpu_ref_noop_confirm_switch; percpu_ref_get(ref); /* put after confirmation */ call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu); |
215e262f2
|
179 |
} |
a22373701
|
180 |
|
f47ad4578
|
181 |
static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) |
a22373701
|
182 |
{ |
eecc16ba9
|
183 |
unsigned long __percpu *percpu_count = percpu_count_ptr(ref); |
a22373701
|
184 |
int cpu; |
eecc16ba9
|
185 |
BUG_ON(!percpu_count); |
a22373701
|
186 |
|
f47ad4578
|
187 188 |
if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) return; |
f47ad4578
|
189 |
atomic_long_add(PERCPU_COUNT_BIAS, &ref->count); |
a22373701
|
190 191 192 |
/* * Restore per-cpu operation. smp_store_release() is paired with |
9e804d1f5
|
193 194 |
* smp_read_barrier_depends() in __ref_is_percpu() and guarantees * that the zeroing is visible to all percpu accesses which can see |
f47ad4578
|
195 |
* the following __PERCPU_REF_ATOMIC clearing. |
a22373701
|
196 197 |
*/ for_each_possible_cpu(cpu) |
eecc16ba9
|
198 |
*per_cpu_ptr(percpu_count, cpu) = 0; |
a22373701
|
199 |
|
eecc16ba9
|
200 |
smp_store_release(&ref->percpu_count_ptr, |
f47ad4578
|
201 202 |
ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC); } |
3f49bdd95
|
203 204 205 |
static void __percpu_ref_switch_mode(struct percpu_ref *ref, percpu_ref_func_t *confirm_switch) { |
33e465ce7
|
206 |
lockdep_assert_held(&percpu_ref_switch_lock); |
3f49bdd95
|
207 208 209 210 |
/* * If the previous ATOMIC switching hasn't finished yet, wait for * its completion. If the caller ensures that ATOMIC switching * isn't in progress, this function can be called from any context. |
3f49bdd95
|
211 |
*/ |
33e465ce7
|
212 213 |
wait_event_lock_irq(percpu_ref_switch_waitq, !ref->confirm_switch, percpu_ref_switch_lock); |
3f49bdd95
|
214 215 216 217 218 219 |
if (ref->force_atomic || (ref->percpu_count_ptr & __PERCPU_REF_DEAD)) __percpu_ref_switch_to_atomic(ref, confirm_switch); else __percpu_ref_switch_to_percpu(ref); } |
f47ad4578
|
220 |
/** |
b2302c7fd
|
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 |
* percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode * @ref: percpu_ref to switch to atomic mode * @confirm_switch: optional confirmation callback * * There's no reason to use this function for the usual reference counting. * Use percpu_ref_kill[_and_confirm](). * * Schedule switching of @ref to atomic mode. All its percpu counts will * be collected to the main atomic counter. On completion, when all CPUs * are guaraneed to be in atomic mode, @confirm_switch, which may not * block, is invoked. This function may be invoked concurrently with all * the get/put operations and can safely be mixed with kill and reinit * operations. Note that @ref will stay in atomic mode across kill/reinit * cycles until percpu_ref_switch_to_percpu() is called. * |
3f49bdd95
|
236 237 238 |
* This function may block if @ref is in the process of switching to atomic * mode. If the caller ensures that @ref is not in the process of * switching to atomic mode, this function can be called from any context. |
b2302c7fd
|
239 240 241 242 |
*/ void percpu_ref_switch_to_atomic(struct percpu_ref *ref, percpu_ref_func_t *confirm_switch) { |
33e465ce7
|
243 244 245 |
unsigned long flags; spin_lock_irqsave(&percpu_ref_switch_lock, flags); |
b2302c7fd
|
246 |
ref->force_atomic = true; |
3f49bdd95
|
247 |
__percpu_ref_switch_mode(ref, confirm_switch); |
33e465ce7
|
248 249 |
spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); |
b2302c7fd
|
250 |
} |
210f7cdcf
|
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 |
EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic); /** * percpu_ref_switch_to_atomic_sync - switch a percpu_ref to atomic mode * @ref: percpu_ref to switch to atomic mode * * Schedule switching the ref to atomic mode, and wait for the * switch to complete. Caller must ensure that no other thread * will switch back to percpu mode. */ void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref) { percpu_ref_switch_to_atomic(ref, NULL); wait_event(percpu_ref_switch_waitq, !ref->confirm_switch); } EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic_sync); |
b2302c7fd
|
267 268 |
/** |
f47ad4578
|
269 270 271 272 273 274 275 276 |
* percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode * @ref: percpu_ref to switch to percpu mode * * There's no reason to use this function for the usual reference counting. * To re-use an expired ref, use percpu_ref_reinit(). * * Switch @ref to percpu mode. This function may be invoked concurrently * with all the get/put operations and can safely be mixed with kill and |
1cae13e75
|
277 278 279 280 |
* reinit operations. This function reverses the sticky atomic state set * by PERCPU_REF_INIT_ATOMIC or percpu_ref_switch_to_atomic(). If @ref is * dying or dead, the actual switching takes place on the following * percpu_ref_reinit(). |
f47ad4578
|
281 |
* |
3f49bdd95
|
282 283 284 |
* This function may block if @ref is in the process of switching to atomic * mode. If the caller ensures that @ref is not in the process of * switching to atomic mode, this function can be called from any context. |
f47ad4578
|
285 286 287 |
*/ void percpu_ref_switch_to_percpu(struct percpu_ref *ref) { |
33e465ce7
|
288 289 290 |
unsigned long flags; spin_lock_irqsave(&percpu_ref_switch_lock, flags); |
1cae13e75
|
291 |
ref->force_atomic = false; |
3f49bdd95
|
292 |
__percpu_ref_switch_mode(ref, NULL); |
33e465ce7
|
293 294 |
spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); |
a22373701
|
295 |
} |
210f7cdcf
|
296 |
EXPORT_SYMBOL_GPL(percpu_ref_switch_to_percpu); |
490c79a65
|
297 298 299 300 301 302 303 304 305 306 307 308 309 |
/** * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation * @ref: percpu_ref to kill * @confirm_kill: optional confirmation callback * * Equivalent to percpu_ref_kill() but also schedules kill confirmation if * @confirm_kill is not NULL. @confirm_kill, which may not block, will be * called after @ref is seen as dead from all CPUs at which point all * further invocations of percpu_ref_tryget_live() will fail. See * percpu_ref_tryget_live() for details. * * This function normally doesn't block and can be called from any context |
f47ad4578
|
310 |
* but it may block if @confirm_kill is specified and @ref is in the |
a2f5630cb
|
311 |
* process of switching to atomic mode by percpu_ref_switch_to_atomic(). |
490c79a65
|
312 313 314 315 |
*/ void percpu_ref_kill_and_confirm(struct percpu_ref *ref, percpu_ref_func_t *confirm_kill) { |
33e465ce7
|
316 317 318 |
unsigned long flags; spin_lock_irqsave(&percpu_ref_switch_lock, flags); |
490c79a65
|
319 320 321 322 |
WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD, "%s called more than once on %pf!", __func__, ref->release); ref->percpu_count_ptr |= __PERCPU_REF_DEAD; |
3f49bdd95
|
323 |
__percpu_ref_switch_mode(ref, confirm_kill); |
490c79a65
|
324 |
percpu_ref_put(ref); |
33e465ce7
|
325 326 |
spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); |
490c79a65
|
327 328 |
} EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); |
f47ad4578
|
329 330 331 332 333 334 |
/** * percpu_ref_reinit - re-initialize a percpu refcount * @ref: perpcu_ref to re-initialize * * Re-initialize @ref so that it's in the same state as when it finished |
1cae13e75
|
335 336 |
* percpu_ref_init() ignoring %PERCPU_REF_INIT_DEAD. @ref must have been * initialized successfully and reached 0 but not exited. |
f47ad4578
|
337 338 339 340 341 342 |
* * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while * this function is in progress. */ void percpu_ref_reinit(struct percpu_ref *ref) { |
33e465ce7
|
343 344 345 |
unsigned long flags; spin_lock_irqsave(&percpu_ref_switch_lock, flags); |
f47ad4578
|
346 347 348 349 |
WARN_ON_ONCE(!percpu_ref_is_zero(ref)); ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD; percpu_ref_get(ref); |
3f49bdd95
|
350 |
__percpu_ref_switch_mode(ref, NULL); |
33e465ce7
|
351 352 |
spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); |
f47ad4578
|
353 354 |
} EXPORT_SYMBOL_GPL(percpu_ref_reinit); |