Blame view

lib/percpu-refcount.c 11.7 KB
215e262f2   Kent Overstreet   percpu: implement...
1
2
3
4
  #define pr_fmt(fmt) "%s: " fmt "
  ", __func__
  
  #include <linux/kernel.h>
490c79a65   Tejun Heo   percpu_ref: decou...
5
6
  #include <linux/sched.h>
  #include <linux/wait.h>
215e262f2   Kent Overstreet   percpu: implement...
7
8
9
10
11
12
13
14
15
16
  #include <linux/percpu-refcount.h>
  
  /*
   * Initially, a percpu refcount is just a set of percpu counters. Initially, we
   * don't try to detect the ref hitting 0 - which means that get/put can just
   * increment or decrement the local counter. Note that the counter on a
   * particular cpu can (and will) wrap - this is fine, when we go to shutdown the
   * percpu counters will all sum to the correct value
   *
   * (More precisely: because moduler arithmatic is commutative the sum of all the
eecc16ba9   Tejun Heo   percpu_ref: repla...
17
18
   * percpu_count vars will be equal to what it would have been if all the gets
   * and puts were done to a single integer, even if some of the percpu integers
215e262f2   Kent Overstreet   percpu: implement...
19
20
21
22
23
24
25
26
27
28
29
30
   * overflow or underflow).
   *
   * The real trick to implementing percpu refcounts is shutdown. We can't detect
   * the ref hitting 0 on every put - this would require global synchronization
   * and defeat the whole purpose of using percpu refs.
   *
   * What we do is require the user to keep track of the initial refcount; we know
   * the ref can't hit 0 before the user drops the initial ref, so as long as we
   * convert to non percpu mode before the initial ref is dropped everything
   * works.
   *
   * Converting to non percpu mode is done with some RCUish stuff in
e625305b3   Tejun Heo   percpu-refcount: ...
31
32
   * percpu_ref_kill. Additionally, we need a bias value so that the
   * atomic_long_t can't hit 0 before we've added up all the percpu refs.
215e262f2   Kent Overstreet   percpu: implement...
33
   */
eecc16ba9   Tejun Heo   percpu_ref: repla...
34
  #define PERCPU_COUNT_BIAS	(1LU << (BITS_PER_LONG - 1))
215e262f2   Kent Overstreet   percpu: implement...
35

490c79a65   Tejun Heo   percpu_ref: decou...
36
  static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq);
eecc16ba9   Tejun Heo   percpu_ref: repla...
37
  static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
eae7975dd   Tejun Heo   percpu-refcount: ...
38
  {
eecc16ba9   Tejun Heo   percpu_ref: repla...
39
  	return (unsigned long __percpu *)
27344a901   Tejun Heo   percpu_ref: add P...
40
  		(ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);
eae7975dd   Tejun Heo   percpu-refcount: ...
41
  }
215e262f2   Kent Overstreet   percpu: implement...
42
43
  /**
   * percpu_ref_init - initialize a percpu refcount
ac899061a   Tejun Heo   percpu-refcount: ...
44
45
   * @ref: percpu_ref to initialize
   * @release: function which will be called when refcount hits 0
2aad2a86f   Tejun Heo   percpu_ref: add P...
46
   * @flags: PERCPU_REF_INIT_* flags
a34375ef9   Tejun Heo   percpu-refcount: ...
47
   * @gfp: allocation mask to use
215e262f2   Kent Overstreet   percpu: implement...
48
   *
2aad2a86f   Tejun Heo   percpu_ref: add P...
49
50
51
   * Initializes @ref.  If @flags is zero, @ref starts in percpu mode with a
   * refcount of 1; analagous to atomic_long_set(ref, 1).  See the
   * definitions of PERCPU_REF_INIT_* flags for flag behaviors.
215e262f2   Kent Overstreet   percpu: implement...
52
53
54
55
   *
   * Note that @release must not sleep - it may potentially be called from RCU
   * callback context by percpu_ref_kill().
   */
a34375ef9   Tejun Heo   percpu-refcount: ...
56
  int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
2aad2a86f   Tejun Heo   percpu_ref: add P...
57
  		    unsigned int flags, gfp_t gfp)
215e262f2   Kent Overstreet   percpu: implement...
58
  {
27344a901   Tejun Heo   percpu_ref: add P...
59
60
  	size_t align = max_t(size_t, 1 << __PERCPU_REF_FLAG_BITS,
  			     __alignof__(unsigned long));
2aad2a86f   Tejun Heo   percpu_ref: add P...
61
  	unsigned long start_count = 0;
215e262f2   Kent Overstreet   percpu: implement...
62

27344a901   Tejun Heo   percpu_ref: add P...
63
64
  	ref->percpu_count_ptr = (unsigned long)
  		__alloc_percpu_gfp(sizeof(unsigned long), align, gfp);
eecc16ba9   Tejun Heo   percpu_ref: repla...
65
  	if (!ref->percpu_count_ptr)
215e262f2   Kent Overstreet   percpu: implement...
66
  		return -ENOMEM;
1cae13e75   Tejun Heo   percpu_ref: make ...
67
  	ref->force_atomic = flags & PERCPU_REF_INIT_ATOMIC;
2aad2a86f   Tejun Heo   percpu_ref: add P...
68
69
70
71
72
73
74
75
76
77
78
  	if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD))
  		ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
  	else
  		start_count += PERCPU_COUNT_BIAS;
  
  	if (flags & PERCPU_REF_INIT_DEAD)
  		ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
  	else
  		start_count++;
  
  	atomic_long_set(&ref->count, start_count);
215e262f2   Kent Overstreet   percpu: implement...
79
80
81
  	ref->release = release;
  	return 0;
  }
5e9dd373d   Matias Bjorling   percpu_refcount: ...
82
  EXPORT_SYMBOL_GPL(percpu_ref_init);
215e262f2   Kent Overstreet   percpu: implement...
83

bc497bd33   Tejun Heo   percpu-refcount: ...
84
  /**
9a1049da9   Tejun Heo   percpu-refcount: ...
85
86
   * percpu_ref_exit - undo percpu_ref_init()
   * @ref: percpu_ref to exit
bc497bd33   Tejun Heo   percpu-refcount: ...
87
   *
9a1049da9   Tejun Heo   percpu-refcount: ...
88
89
90
91
92
   * This function exits @ref.  The caller is responsible for ensuring that
   * @ref is no longer in active use.  The usual places to invoke this
   * function from are the @ref->release() callback or in init failure path
   * where percpu_ref_init() succeeded but other parts of the initialization
   * of the embedding object failed.
bc497bd33   Tejun Heo   percpu-refcount: ...
93
   */
9a1049da9   Tejun Heo   percpu-refcount: ...
94
  void percpu_ref_exit(struct percpu_ref *ref)
bc497bd33   Tejun Heo   percpu-refcount: ...
95
  {
eecc16ba9   Tejun Heo   percpu_ref: repla...
96
  	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
bc497bd33   Tejun Heo   percpu-refcount: ...
97

eecc16ba9   Tejun Heo   percpu_ref: repla...
98
99
  	if (percpu_count) {
  		free_percpu(percpu_count);
27344a901   Tejun Heo   percpu_ref: add P...
100
  		ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD;
bc497bd33   Tejun Heo   percpu-refcount: ...
101
102
  	}
  }
9a1049da9   Tejun Heo   percpu-refcount: ...
103
  EXPORT_SYMBOL_GPL(percpu_ref_exit);
bc497bd33   Tejun Heo   percpu-refcount: ...
104

490c79a65   Tejun Heo   percpu_ref: decou...
105
106
107
108
109
110
111
112
113
114
115
116
117
  static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu)
  {
  	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
  
  	ref->confirm_switch(ref);
  	ref->confirm_switch = NULL;
  	wake_up_all(&percpu_ref_switch_waitq);
  
  	/* drop ref from percpu_ref_switch_to_atomic() */
  	percpu_ref_put(ref);
  }
  
  static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
215e262f2   Kent Overstreet   percpu: implement...
118
119
  {
  	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
eecc16ba9   Tejun Heo   percpu_ref: repla...
120
  	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
e625305b3   Tejun Heo   percpu-refcount: ...
121
  	unsigned long count = 0;
215e262f2   Kent Overstreet   percpu: implement...
122
  	int cpu;
215e262f2   Kent Overstreet   percpu: implement...
123
  	for_each_possible_cpu(cpu)
eecc16ba9   Tejun Heo   percpu_ref: repla...
124
  		count += *per_cpu_ptr(percpu_count, cpu);
215e262f2   Kent Overstreet   percpu: implement...
125

eecc16ba9   Tejun Heo   percpu_ref: repla...
126
  	pr_debug("global %ld percpu %ld",
e625305b3   Tejun Heo   percpu-refcount: ...
127
  		 atomic_long_read(&ref->count), (long)count);
215e262f2   Kent Overstreet   percpu: implement...
128
129
130
131
132
133
134
135
136
137
138
139
140
  
  	/*
  	 * It's crucial that we sum the percpu counters _before_ adding the sum
  	 * to &ref->count; since gets could be happening on one cpu while puts
  	 * happen on another, adding a single cpu's count could cause
  	 * @ref->count to hit 0 before we've got a consistent value - but the
  	 * sum of all the counts will be consistent and correct.
  	 *
  	 * Subtracting the bias value then has to happen _after_ adding count to
  	 * &ref->count; we need the bias value to prevent &ref->count from
  	 * reaching 0 before we add the percpu counts. But doing it at the same
  	 * time is equivalent and saves us atomic operations:
  	 */
eecc16ba9   Tejun Heo   percpu_ref: repla...
141
  	atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count);
215e262f2   Kent Overstreet   percpu: implement...
142

e625305b3   Tejun Heo   percpu-refcount: ...
143
  	WARN_ONCE(atomic_long_read(&ref->count) <= 0,
490c79a65   Tejun Heo   percpu_ref: decou...
144
  		  "percpu ref (%pf) <= 0 (%ld) after switching to atomic",
e625305b3   Tejun Heo   percpu-refcount: ...
145
  		  ref->release, atomic_long_read(&ref->count));
687b0ad27   Kent Overstreet   percpu-refcount: ...
146

490c79a65   Tejun Heo   percpu_ref: decou...
147
148
149
  	/* @ref is viewed as dead on all CPUs, send out switch confirmation */
  	percpu_ref_call_confirm_rcu(rcu);
  }
dbece3a0f   Tejun Heo   percpu-refcount: ...
150

490c79a65   Tejun Heo   percpu_ref: decou...
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
  static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref)
  {
  }
  
  static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
  					  percpu_ref_func_t *confirm_switch)
  {
  	if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) {
  		/* switching from percpu to atomic */
  		ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
  
  		/*
  		 * Non-NULL ->confirm_switch is used to indicate that
  		 * switching is in progress.  Use noop one if unspecified.
  		 */
  		WARN_ON_ONCE(ref->confirm_switch);
  		ref->confirm_switch =
  			confirm_switch ?: percpu_ref_noop_confirm_switch;
  
  		percpu_ref_get(ref);	/* put after confirmation */
  		call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
  	} else if (confirm_switch) {
  		/*
  		 * Somebody already set ATOMIC.  Switching may still be in
  		 * progress.  @confirm_switch must be invoked after the
  		 * switching is complete and a full sched RCU grace period
  		 * has passed.  Wait synchronously for the previous
  		 * switching and schedule @confirm_switch invocation.
  		 */
  		wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
  		ref->confirm_switch = confirm_switch;
  
  		percpu_ref_get(ref);	/* put after confirmation */
  		call_rcu_sched(&ref->rcu, percpu_ref_call_confirm_rcu);
  	}
215e262f2   Kent Overstreet   percpu: implement...
186
187
188
  }
  
  /**
490c79a65   Tejun Heo   percpu_ref: decou...
189
190
191
   * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode
   * @ref: percpu_ref to switch to atomic mode
   * @confirm_switch: optional confirmation callback
215e262f2   Kent Overstreet   percpu: implement...
192
   *
490c79a65   Tejun Heo   percpu_ref: decou...
193
194
195
196
197
198
199
200
   * There's no reason to use this function for the usual reference counting.
   * Use percpu_ref_kill[_and_confirm]().
   *
   * Schedule switching of @ref to atomic mode.  All its percpu counts will
   * be collected to the main atomic counter.  On completion, when all CPUs
   * are guaraneed to be in atomic mode, @confirm_switch, which may not
   * block, is invoked.  This function may be invoked concurrently with all
   * the get/put operations and can safely be mixed with kill and reinit
1cae13e75   Tejun Heo   percpu_ref: make ...
201
202
   * operations.  Note that @ref will stay in atomic mode across kill/reinit
   * cycles until percpu_ref_switch_to_percpu() is called.
215e262f2   Kent Overstreet   percpu: implement...
203
   *
490c79a65   Tejun Heo   percpu_ref: decou...
204
205
206
207
208
209
210
211
   * This function normally doesn't block and can be called from any context
   * but it may block if @confirm_kill is specified and @ref is already in
   * the process of switching to atomic mode.  In such cases, @confirm_switch
   * will be invoked after the switching is complete.
   *
   * Due to the way percpu_ref is implemented, @confirm_switch will be called
   * after at least one full sched RCU grace period has passed but this is an
   * implementation detail and must not be depended upon.
215e262f2   Kent Overstreet   percpu: implement...
212
   */
490c79a65   Tejun Heo   percpu_ref: decou...
213
214
  void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
  				 percpu_ref_func_t *confirm_switch)
215e262f2   Kent Overstreet   percpu: implement...
215
  {
1cae13e75   Tejun Heo   percpu_ref: make ...
216
  	ref->force_atomic = true;
490c79a65   Tejun Heo   percpu_ref: decou...
217
  	__percpu_ref_switch_to_atomic(ref, confirm_switch);
215e262f2   Kent Overstreet   percpu: implement...
218
  }
a22373701   Tejun Heo   percpu_ref: reloc...
219

f47ad4578   Tejun Heo   percpu_ref: decou...
220
  static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
a22373701   Tejun Heo   percpu_ref: reloc...
221
  {
eecc16ba9   Tejun Heo   percpu_ref: repla...
222
  	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
a22373701   Tejun Heo   percpu_ref: reloc...
223
  	int cpu;
eecc16ba9   Tejun Heo   percpu_ref: repla...
224
  	BUG_ON(!percpu_count);
a22373701   Tejun Heo   percpu_ref: reloc...
225

f47ad4578   Tejun Heo   percpu_ref: decou...
226
227
228
229
230
231
  	if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC))
  		return;
  
  	wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
  
  	atomic_long_add(PERCPU_COUNT_BIAS, &ref->count);
a22373701   Tejun Heo   percpu_ref: reloc...
232
233
234
  
  	/*
  	 * Restore per-cpu operation.  smp_store_release() is paired with
9e804d1f5   Tejun Heo   percpu_ref: renam...
235
236
  	 * smp_read_barrier_depends() in __ref_is_percpu() and guarantees
  	 * that the zeroing is visible to all percpu accesses which can see
f47ad4578   Tejun Heo   percpu_ref: decou...
237
  	 * the following __PERCPU_REF_ATOMIC clearing.
a22373701   Tejun Heo   percpu_ref: reloc...
238
239
  	 */
  	for_each_possible_cpu(cpu)
eecc16ba9   Tejun Heo   percpu_ref: repla...
240
  		*per_cpu_ptr(percpu_count, cpu) = 0;
a22373701   Tejun Heo   percpu_ref: reloc...
241

eecc16ba9   Tejun Heo   percpu_ref: repla...
242
  	smp_store_release(&ref->percpu_count_ptr,
f47ad4578   Tejun Heo   percpu_ref: decou...
243
244
245
246
247
248
249
250
251
252
253
254
  			  ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
  }
  
  /**
   * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode
   * @ref: percpu_ref to switch to percpu mode
   *
   * There's no reason to use this function for the usual reference counting.
   * To re-use an expired ref, use percpu_ref_reinit().
   *
   * Switch @ref to percpu mode.  This function may be invoked concurrently
   * with all the get/put operations and can safely be mixed with kill and
1cae13e75   Tejun Heo   percpu_ref: make ...
255
256
257
258
   * reinit operations.  This function reverses the sticky atomic state set
   * by PERCPU_REF_INIT_ATOMIC or percpu_ref_switch_to_atomic().  If @ref is
   * dying or dead, the actual switching takes place on the following
   * percpu_ref_reinit().
f47ad4578   Tejun Heo   percpu_ref: decou...
259
260
261
262
263
264
265
   *
   * This function normally doesn't block and can be called from any context
   * but it may block if @ref is in the process of switching to atomic mode
   * by percpu_ref_switch_atomic().
   */
  void percpu_ref_switch_to_percpu(struct percpu_ref *ref)
  {
1cae13e75   Tejun Heo   percpu_ref: make ...
266
  	ref->force_atomic = false;
f47ad4578   Tejun Heo   percpu_ref: decou...
267
268
269
  	/* a dying or dead ref can't be switched to percpu mode w/o reinit */
  	if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD))
  		__percpu_ref_switch_to_percpu(ref);
a22373701   Tejun Heo   percpu_ref: reloc...
270
  }
490c79a65   Tejun Heo   percpu_ref: decou...
271
272
273
274
275
276
277
278
279
280
281
282
283
  
  /**
   * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
   * @ref: percpu_ref to kill
   * @confirm_kill: optional confirmation callback
   *
   * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
   * @confirm_kill is not NULL.  @confirm_kill, which may not block, will be
   * called after @ref is seen as dead from all CPUs at which point all
   * further invocations of percpu_ref_tryget_live() will fail.  See
   * percpu_ref_tryget_live() for details.
   *
   * This function normally doesn't block and can be called from any context
f47ad4578   Tejun Heo   percpu_ref: decou...
284
285
   * but it may block if @confirm_kill is specified and @ref is in the
   * process of switching to atomic mode by percpu_ref_switch_atomic().
490c79a65   Tejun Heo   percpu_ref: decou...
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
   *
   * Due to the way percpu_ref is implemented, @confirm_switch will be called
   * after at least one full sched RCU grace period has passed but this is an
   * implementation detail and must not be depended upon.
   */
  void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
  				 percpu_ref_func_t *confirm_kill)
  {
  	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
  		  "%s called more than once on %pf!", __func__, ref->release);
  
  	ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
  	__percpu_ref_switch_to_atomic(ref, confirm_kill);
  	percpu_ref_put(ref);
  }
  EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
f47ad4578   Tejun Heo   percpu_ref: decou...
302
303
304
305
306
307
  
  /**
   * percpu_ref_reinit - re-initialize a percpu refcount
   * @ref: perpcu_ref to re-initialize
   *
   * Re-initialize @ref so that it's in the same state as when it finished
1cae13e75   Tejun Heo   percpu_ref: make ...
308
309
   * percpu_ref_init() ignoring %PERCPU_REF_INIT_DEAD.  @ref must have been
   * initialized successfully and reached 0 but not exited.
f47ad4578   Tejun Heo   percpu_ref: decou...
310
311
312
313
314
315
316
317
318
319
   *
   * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
   * this function is in progress.
   */
  void percpu_ref_reinit(struct percpu_ref *ref)
  {
  	WARN_ON_ONCE(!percpu_ref_is_zero(ref));
  
  	ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD;
  	percpu_ref_get(ref);
1cae13e75   Tejun Heo   percpu_ref: make ...
320
321
  	if (!ref->force_atomic)
  		__percpu_ref_switch_to_percpu(ref);
f47ad4578   Tejun Heo   percpu_ref: decou...
322
323
  }
  EXPORT_SYMBOL_GPL(percpu_ref_reinit);