Blame view
lib/percpu-refcount.c
13.5 KB
457c89965
|
1 |
// SPDX-License-Identifier: GPL-2.0-only |
215e262f2
|
2 3 4 5 |
#define pr_fmt(fmt) "%s: " fmt " ", __func__ #include <linux/kernel.h> |
490c79a65
|
6 7 |
#include <linux/sched.h> #include <linux/wait.h> |
215e262f2
|
8 9 10 11 12 13 14 15 16 |
#include <linux/percpu-refcount.h> /* * Initially, a percpu refcount is just a set of percpu counters. Initially, we * don't try to detect the ref hitting 0 - which means that get/put can just * increment or decrement the local counter. Note that the counter on a * particular cpu can (and will) wrap - this is fine, when we go to shutdown the * percpu counters will all sum to the correct value * |
bdb428c82
|
17 |
* (More precisely: because modular arithmetic is commutative the sum of all the |
eecc16ba9
|
18 19 |
* percpu_count vars will be equal to what it would have been if all the gets * and puts were done to a single integer, even if some of the percpu integers |
215e262f2
|
20 21 22 23 24 25 26 27 28 29 30 31 |
* overflow or underflow). * * The real trick to implementing percpu refcounts is shutdown. We can't detect * the ref hitting 0 on every put - this would require global synchronization * and defeat the whole purpose of using percpu refs. * * What we do is require the user to keep track of the initial refcount; we know * the ref can't hit 0 before the user drops the initial ref, so as long as we * convert to non percpu mode before the initial ref is dropped everything * works. * * Converting to non percpu mode is done with some RCUish stuff in |
e625305b3
|
32 33 |
* percpu_ref_kill. Additionally, we need a bias value so that the * atomic_long_t can't hit 0 before we've added up all the percpu refs. |
215e262f2
|
34 |
*/ |
eecc16ba9
|
35 |
#define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1)) |
215e262f2
|
36 |
|
33e465ce7
|
37 |
static DEFINE_SPINLOCK(percpu_ref_switch_lock); |
490c79a65
|
38 |
static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq); |
eecc16ba9
|
39 |
static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref) |
eae7975dd
|
40 |
{ |
eecc16ba9
|
41 |
return (unsigned long __percpu *) |
27344a901
|
42 |
(ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD); |
eae7975dd
|
43 |
} |
215e262f2
|
44 45 |
/** * percpu_ref_init - initialize a percpu refcount |
ac899061a
|
46 47 |
* @ref: percpu_ref to initialize * @release: function which will be called when refcount hits 0 |
2aad2a86f
|
48 |
* @flags: PERCPU_REF_INIT_* flags |
a34375ef9
|
49 |
* @gfp: allocation mask to use |
215e262f2
|
50 |
* |
2aad2a86f
|
51 52 53 |
* Initializes @ref. If @flags is zero, @ref starts in percpu mode with a * refcount of 1; analagous to atomic_long_set(ref, 1). See the * definitions of PERCPU_REF_INIT_* flags for flag behaviors. |
215e262f2
|
54 55 56 57 |
* * Note that @release must not sleep - it may potentially be called from RCU * callback context by percpu_ref_kill(). */ |
a34375ef9
|
58 |
int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release, |
2aad2a86f
|
59 |
unsigned int flags, gfp_t gfp) |
215e262f2
|
60 |
{ |
27344a901
|
61 62 |
size_t align = max_t(size_t, 1 << __PERCPU_REF_FLAG_BITS, __alignof__(unsigned long)); |
2aad2a86f
|
63 |
unsigned long start_count = 0; |
215e262f2
|
64 |
|
27344a901
|
65 66 |
ref->percpu_count_ptr = (unsigned long) __alloc_percpu_gfp(sizeof(unsigned long), align, gfp); |
eecc16ba9
|
67 |
if (!ref->percpu_count_ptr) |
215e262f2
|
68 |
return -ENOMEM; |
1cae13e75
|
69 |
ref->force_atomic = flags & PERCPU_REF_INIT_ATOMIC; |
7d9ab9b6a
|
70 |
ref->allow_reinit = flags & PERCPU_REF_ALLOW_REINIT; |
1cae13e75
|
71 |
|
7d9ab9b6a
|
72 |
if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD)) { |
2aad2a86f
|
73 |
ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC; |
7d9ab9b6a
|
74 75 |
ref->allow_reinit = true; } else { |
2aad2a86f
|
76 |
start_count += PERCPU_COUNT_BIAS; |
7d9ab9b6a
|
77 |
} |
2aad2a86f
|
78 79 80 81 82 83 84 |
if (flags & PERCPU_REF_INIT_DEAD) ref->percpu_count_ptr |= __PERCPU_REF_DEAD; else start_count++; atomic_long_set(&ref->count, start_count); |
215e262f2
|
85 |
ref->release = release; |
a67823c1e
|
86 |
ref->confirm_switch = NULL; |
215e262f2
|
87 88 |
return 0; } |
5e9dd373d
|
89 |
EXPORT_SYMBOL_GPL(percpu_ref_init); |
215e262f2
|
90 |
|
bc497bd33
|
91 |
/** |
9a1049da9
|
92 93 |
* percpu_ref_exit - undo percpu_ref_init() * @ref: percpu_ref to exit |
bc497bd33
|
94 |
* |
9a1049da9
|
95 96 97 98 99 |
* This function exits @ref. The caller is responsible for ensuring that * @ref is no longer in active use. The usual places to invoke this * function from are the @ref->release() callback or in init failure path * where percpu_ref_init() succeeded but other parts of the initialization * of the embedding object failed. |
bc497bd33
|
100 |
*/ |
9a1049da9
|
101 |
void percpu_ref_exit(struct percpu_ref *ref) |
bc497bd33
|
102 |
{ |
eecc16ba9
|
103 |
unsigned long __percpu *percpu_count = percpu_count_ptr(ref); |
bc497bd33
|
104 |
|
eecc16ba9
|
105 |
if (percpu_count) { |
a67823c1e
|
106 107 |
/* non-NULL confirm_switch indicates switching in progress */ WARN_ON_ONCE(ref->confirm_switch); |
eecc16ba9
|
108 |
free_percpu(percpu_count); |
27344a901
|
109 |
ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD; |
bc497bd33
|
110 111 |
} } |
9a1049da9
|
112 |
EXPORT_SYMBOL_GPL(percpu_ref_exit); |
bc497bd33
|
113 |
|
490c79a65
|
114 115 116 117 118 119 120 |
static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu) { struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu); ref->confirm_switch(ref); ref->confirm_switch = NULL; wake_up_all(&percpu_ref_switch_waitq); |
7d9ab9b6a
|
121 122 |
if (!ref->allow_reinit) percpu_ref_exit(ref); |
490c79a65
|
123 124 125 126 127 |
/* drop ref from percpu_ref_switch_to_atomic() */ percpu_ref_put(ref); } static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu) |
215e262f2
|
128 129 |
{ struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu); |
eecc16ba9
|
130 |
unsigned long __percpu *percpu_count = percpu_count_ptr(ref); |
e625305b3
|
131 |
unsigned long count = 0; |
215e262f2
|
132 |
int cpu; |
215e262f2
|
133 |
for_each_possible_cpu(cpu) |
eecc16ba9
|
134 |
count += *per_cpu_ptr(percpu_count, cpu); |
215e262f2
|
135 |
|
eecc16ba9
|
136 |
pr_debug("global %ld percpu %ld", |
e625305b3
|
137 |
atomic_long_read(&ref->count), (long)count); |
215e262f2
|
138 139 140 141 142 143 144 145 146 147 148 149 150 |
/* * It's crucial that we sum the percpu counters _before_ adding the sum * to &ref->count; since gets could be happening on one cpu while puts * happen on another, adding a single cpu's count could cause * @ref->count to hit 0 before we've got a consistent value - but the * sum of all the counts will be consistent and correct. * * Subtracting the bias value then has to happen _after_ adding count to * &ref->count; we need the bias value to prevent &ref->count from * reaching 0 before we add the percpu counts. But doing it at the same * time is equivalent and saves us atomic operations: */ |
eecc16ba9
|
151 |
atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count); |
215e262f2
|
152 |
|
e625305b3
|
153 |
WARN_ONCE(atomic_long_read(&ref->count) <= 0, |
d75f773c8
|
154 |
"percpu ref (%ps) <= 0 (%ld) after switching to atomic", |
e625305b3
|
155 |
ref->release, atomic_long_read(&ref->count)); |
687b0ad27
|
156 |
|
490c79a65
|
157 158 159 |
/* @ref is viewed as dead on all CPUs, send out switch confirmation */ percpu_ref_call_confirm_rcu(rcu); } |
dbece3a0f
|
160 |
|
490c79a65
|
161 162 163 164 165 166 167 |
static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref) { } static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref, percpu_ref_func_t *confirm_switch) { |
b2302c7fd
|
168 |
if (ref->percpu_count_ptr & __PERCPU_REF_ATOMIC) { |
18808354b
|
169 |
if (confirm_switch) |
b2302c7fd
|
170 |
confirm_switch(ref); |
b2302c7fd
|
171 |
return; |
490c79a65
|
172 |
} |
215e262f2
|
173 |
|
b2302c7fd
|
174 175 176 177 178 179 180 |
/* switching from percpu to atomic */ ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC; /* * Non-NULL ->confirm_switch is used to indicate that switching is * in progress. Use noop one if unspecified. */ |
b2302c7fd
|
181 182 183 |
ref->confirm_switch = confirm_switch ?: percpu_ref_noop_confirm_switch; percpu_ref_get(ref); /* put after confirmation */ |
36bd1a8e9
|
184 |
call_rcu(&ref->rcu, percpu_ref_switch_to_atomic_rcu); |
215e262f2
|
185 |
} |
a22373701
|
186 |
|
f47ad4578
|
187 |
static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) |
a22373701
|
188 |
{ |
eecc16ba9
|
189 |
unsigned long __percpu *percpu_count = percpu_count_ptr(ref); |
a22373701
|
190 |
int cpu; |
eecc16ba9
|
191 |
BUG_ON(!percpu_count); |
a22373701
|
192 |
|
f47ad4578
|
193 194 |
if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) return; |
7d9ab9b6a
|
195 196 |
if (WARN_ON_ONCE(!ref->allow_reinit)) return; |
f47ad4578
|
197 |
atomic_long_add(PERCPU_COUNT_BIAS, &ref->count); |
a22373701
|
198 199 |
/* |
b393e8b33
|
200 201 202 203 |
* Restore per-cpu operation. smp_store_release() is paired * with READ_ONCE() in __ref_is_percpu() and guarantees that the * zeroing is visible to all percpu accesses which can see the * following __PERCPU_REF_ATOMIC clearing. |
a22373701
|
204 205 |
*/ for_each_possible_cpu(cpu) |
eecc16ba9
|
206 |
*per_cpu_ptr(percpu_count, cpu) = 0; |
a22373701
|
207 |
|
eecc16ba9
|
208 |
smp_store_release(&ref->percpu_count_ptr, |
f47ad4578
|
209 210 |
ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC); } |
3f49bdd95
|
211 212 213 |
static void __percpu_ref_switch_mode(struct percpu_ref *ref, percpu_ref_func_t *confirm_switch) { |
33e465ce7
|
214 |
lockdep_assert_held(&percpu_ref_switch_lock); |
3f49bdd95
|
215 216 217 218 |
/* * If the previous ATOMIC switching hasn't finished yet, wait for * its completion. If the caller ensures that ATOMIC switching * isn't in progress, this function can be called from any context. |
3f49bdd95
|
219 |
*/ |
33e465ce7
|
220 221 |
wait_event_lock_irq(percpu_ref_switch_waitq, !ref->confirm_switch, percpu_ref_switch_lock); |
3f49bdd95
|
222 223 224 225 226 227 |
if (ref->force_atomic || (ref->percpu_count_ptr & __PERCPU_REF_DEAD)) __percpu_ref_switch_to_atomic(ref, confirm_switch); else __percpu_ref_switch_to_percpu(ref); } |
f47ad4578
|
228 |
/** |
b2302c7fd
|
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 |
* percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode * @ref: percpu_ref to switch to atomic mode * @confirm_switch: optional confirmation callback * * There's no reason to use this function for the usual reference counting. * Use percpu_ref_kill[_and_confirm](). * * Schedule switching of @ref to atomic mode. All its percpu counts will * be collected to the main atomic counter. On completion, when all CPUs * are guaraneed to be in atomic mode, @confirm_switch, which may not * block, is invoked. This function may be invoked concurrently with all * the get/put operations and can safely be mixed with kill and reinit * operations. Note that @ref will stay in atomic mode across kill/reinit * cycles until percpu_ref_switch_to_percpu() is called. * |
3f49bdd95
|
244 245 246 |
* This function may block if @ref is in the process of switching to atomic * mode. If the caller ensures that @ref is not in the process of * switching to atomic mode, this function can be called from any context. |
b2302c7fd
|
247 248 249 250 |
*/ void percpu_ref_switch_to_atomic(struct percpu_ref *ref, percpu_ref_func_t *confirm_switch) { |
33e465ce7
|
251 252 253 |
unsigned long flags; spin_lock_irqsave(&percpu_ref_switch_lock, flags); |
b2302c7fd
|
254 |
ref->force_atomic = true; |
3f49bdd95
|
255 |
__percpu_ref_switch_mode(ref, confirm_switch); |
33e465ce7
|
256 257 |
spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); |
b2302c7fd
|
258 |
} |
210f7cdcf
|
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 |
EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic); /** * percpu_ref_switch_to_atomic_sync - switch a percpu_ref to atomic mode * @ref: percpu_ref to switch to atomic mode * * Schedule switching the ref to atomic mode, and wait for the * switch to complete. Caller must ensure that no other thread * will switch back to percpu mode. */ void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref) { percpu_ref_switch_to_atomic(ref, NULL); wait_event(percpu_ref_switch_waitq, !ref->confirm_switch); } EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic_sync); |
b2302c7fd
|
275 276 |
/** |
f47ad4578
|
277 278 279 280 281 282 283 284 |
* percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode * @ref: percpu_ref to switch to percpu mode * * There's no reason to use this function for the usual reference counting. * To re-use an expired ref, use percpu_ref_reinit(). * * Switch @ref to percpu mode. This function may be invoked concurrently * with all the get/put operations and can safely be mixed with kill and |
1cae13e75
|
285 286 287 288 |
* reinit operations. This function reverses the sticky atomic state set * by PERCPU_REF_INIT_ATOMIC or percpu_ref_switch_to_atomic(). If @ref is * dying or dead, the actual switching takes place on the following * percpu_ref_reinit(). |
f47ad4578
|
289 |
* |
3f49bdd95
|
290 291 292 |
* This function may block if @ref is in the process of switching to atomic * mode. If the caller ensures that @ref is not in the process of * switching to atomic mode, this function can be called from any context. |
f47ad4578
|
293 294 295 |
*/ void percpu_ref_switch_to_percpu(struct percpu_ref *ref) { |
33e465ce7
|
296 297 298 |
unsigned long flags; spin_lock_irqsave(&percpu_ref_switch_lock, flags); |
1cae13e75
|
299 |
ref->force_atomic = false; |
3f49bdd95
|
300 |
__percpu_ref_switch_mode(ref, NULL); |
33e465ce7
|
301 302 |
spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); |
a22373701
|
303 |
} |
210f7cdcf
|
304 |
EXPORT_SYMBOL_GPL(percpu_ref_switch_to_percpu); |
490c79a65
|
305 306 307 308 309 310 311 312 313 314 315 316 317 |
/** * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation * @ref: percpu_ref to kill * @confirm_kill: optional confirmation callback * * Equivalent to percpu_ref_kill() but also schedules kill confirmation if * @confirm_kill is not NULL. @confirm_kill, which may not block, will be * called after @ref is seen as dead from all CPUs at which point all * further invocations of percpu_ref_tryget_live() will fail. See * percpu_ref_tryget_live() for details. * * This function normally doesn't block and can be called from any context |
f47ad4578
|
318 |
* but it may block if @confirm_kill is specified and @ref is in the |
a2f5630cb
|
319 |
* process of switching to atomic mode by percpu_ref_switch_to_atomic(). |
b3a5d1119
|
320 321 |
* * There are no implied RCU grace periods between kill and release. |
490c79a65
|
322 323 324 325 |
*/ void percpu_ref_kill_and_confirm(struct percpu_ref *ref, percpu_ref_func_t *confirm_kill) { |
33e465ce7
|
326 327 328 |
unsigned long flags; spin_lock_irqsave(&percpu_ref_switch_lock, flags); |
490c79a65
|
329 |
WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD, |
d75f773c8
|
330 |
"%s called more than once on %ps!", __func__, ref->release); |
490c79a65
|
331 332 |
ref->percpu_count_ptr |= __PERCPU_REF_DEAD; |
3f49bdd95
|
333 |
__percpu_ref_switch_mode(ref, confirm_kill); |
490c79a65
|
334 |
percpu_ref_put(ref); |
33e465ce7
|
335 336 |
spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); |
490c79a65
|
337 338 |
} EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); |
f47ad4578
|
339 340 341 342 343 344 |
/** * percpu_ref_reinit - re-initialize a percpu refcount * @ref: perpcu_ref to re-initialize * * Re-initialize @ref so that it's in the same state as when it finished |
1cae13e75
|
345 346 |
* percpu_ref_init() ignoring %PERCPU_REF_INIT_DEAD. @ref must have been * initialized successfully and reached 0 but not exited. |
f47ad4578
|
347 348 349 350 351 352 |
* * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while * this function is in progress. */ void percpu_ref_reinit(struct percpu_ref *ref) { |
18c9a6bbe
|
353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 |
WARN_ON_ONCE(!percpu_ref_is_zero(ref)); percpu_ref_resurrect(ref); } EXPORT_SYMBOL_GPL(percpu_ref_reinit); /** * percpu_ref_resurrect - modify a percpu refcount from dead to live * @ref: perpcu_ref to resurrect * * Modify @ref so that it's in the same state as before percpu_ref_kill() was * called. @ref must be dead but must not yet have exited. * * If @ref->release() frees @ref then the caller is responsible for * guaranteeing that @ref->release() does not get called while this * function is in progress. * * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while * this function is in progress. */ void percpu_ref_resurrect(struct percpu_ref *ref) { unsigned long __percpu *percpu_count; |
33e465ce7
|
376 377 378 |
unsigned long flags; spin_lock_irqsave(&percpu_ref_switch_lock, flags); |
18c9a6bbe
|
379 380 |
WARN_ON_ONCE(!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)); WARN_ON_ONCE(__ref_is_percpu(ref, &percpu_count)); |
f47ad4578
|
381 382 383 |
ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD; percpu_ref_get(ref); |
3f49bdd95
|
384 |
__percpu_ref_switch_mode(ref, NULL); |
33e465ce7
|
385 386 |
spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); |
f47ad4578
|
387 |
} |
18c9a6bbe
|
388 |
EXPORT_SYMBOL_GPL(percpu_ref_resurrect); |