Commit 13cc56013842a847a0f6ff805d9ed9181e753ef8
Exists in
smarc-imx_3.14.28_1.0.0_ga
and in
1 other branch
Merge branch 'for-3.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
Pull per-cpu changes from Tejun Heo: "This pull request contains Kent's per-cpu reference counter. It has gone through several iterations since the last time and the dynamic allocation is gone. The usual usage is relatively straight-forward although async kill confirm interface, which is not used int most cases, is somewhat icky. There also are some interface concerns - e.g. I'm not sure about passing in @relesae callback during init as that becomes funny when we later implement synchronous kill_and_drain - but nothing too serious and it's quite useable now. cgroup_subsys_state refcnting has already been converted and we should convert module refcnt (Kent?)" * 'for-3.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: percpu-refcount: use RCU-sched insted of normal RCU percpu-refcount: implement percpu_tryget() along with percpu_ref_kill_and_confirm() percpu-refcount: implement percpu_ref_cancel_init() percpu-refcount: add __must_check to percpu_ref_init() and don't use ACCESS_ONCE() in percpu_ref_kill_rcu() percpu-refcount: cosmetic updates percpu-refcount: consistently use plain (non-sched) RCU percpu-refcount: Don't use silly cmpxchg() percpu: implement generic percpu refcounting
Showing 3 changed files Side-by-side Diff
include/linux/percpu-refcount.h
1 | +/* | |
2 | + * Percpu refcounts: | |
3 | + * (C) 2012 Google, Inc. | |
4 | + * Author: Kent Overstreet <koverstreet@google.com> | |
5 | + * | |
6 | + * This implements a refcount with similar semantics to atomic_t - atomic_inc(), | |
7 | + * atomic_dec_and_test() - but percpu. | |
8 | + * | |
9 | + * There's one important difference between percpu refs and normal atomic_t | |
10 | + * refcounts; you have to keep track of your initial refcount, and then when you | |
11 | + * start shutting down you call percpu_ref_kill() _before_ dropping the initial | |
12 | + * refcount. | |
13 | + * | |
14 | + * The refcount will have a range of 0 to ((1U << 31) - 1), i.e. one bit less | |
15 | + * than an atomic_t - this is because of the way shutdown works, see | |
16 | + * percpu_ref_kill()/PCPU_COUNT_BIAS. | |
17 | + * | |
18 | + * Before you call percpu_ref_kill(), percpu_ref_put() does not check for the | |
19 | + * refcount hitting 0 - it can't, if it was in percpu mode. percpu_ref_kill() | |
20 | + * puts the ref back in single atomic_t mode, collecting the per cpu refs and | |
21 | + * issuing the appropriate barriers, and then marks the ref as shutting down so | |
22 | + * that percpu_ref_put() will check for the ref hitting 0. After it returns, | |
23 | + * it's safe to drop the initial ref. | |
24 | + * | |
25 | + * USAGE: | |
26 | + * | |
27 | + * See fs/aio.c for some example usage; it's used there for struct kioctx, which | |
28 | + * is created when userspaces calls io_setup(), and destroyed when userspace | |
29 | + * calls io_destroy() or the process exits. | |
30 | + * | |
31 | + * In the aio code, kill_ioctx() is called when we wish to destroy a kioctx; it | |
32 | + * calls percpu_ref_kill(), then hlist_del_rcu() and sychronize_rcu() to remove | |
33 | + * the kioctx from the proccess's list of kioctxs - after that, there can't be | |
34 | + * any new users of the kioctx (from lookup_ioctx()) and it's then safe to drop | |
35 | + * the initial ref with percpu_ref_put(). | |
36 | + * | |
37 | + * Code that does a two stage shutdown like this often needs some kind of | |
38 | + * explicit synchronization to ensure the initial refcount can only be dropped | |
39 | + * once - percpu_ref_kill() does this for you, it returns true once and false if | |
40 | + * someone else already called it. The aio code uses it this way, but it's not | |
41 | + * necessary if the code has some other mechanism to synchronize teardown. | |
42 | + * around. | |
43 | + */ | |
44 | + | |
45 | +#ifndef _LINUX_PERCPU_REFCOUNT_H | |
46 | +#define _LINUX_PERCPU_REFCOUNT_H | |
47 | + | |
48 | +#include <linux/atomic.h> | |
49 | +#include <linux/kernel.h> | |
50 | +#include <linux/percpu.h> | |
51 | +#include <linux/rcupdate.h> | |
52 | + | |
53 | +struct percpu_ref; | |
54 | +typedef void (percpu_ref_func_t)(struct percpu_ref *); | |
55 | + | |
56 | +struct percpu_ref { | |
57 | + atomic_t count; | |
58 | + /* | |
59 | + * The low bit of the pointer indicates whether the ref is in percpu | |
60 | + * mode; if set, then get/put will manipulate the atomic_t (this is a | |
61 | + * hack because we need to keep the pointer around for | |
62 | + * percpu_ref_kill_rcu()) | |
63 | + */ | |
64 | + unsigned __percpu *pcpu_count; | |
65 | + percpu_ref_func_t *release; | |
66 | + percpu_ref_func_t *confirm_kill; | |
67 | + struct rcu_head rcu; | |
68 | +}; | |
69 | + | |
70 | +int __must_check percpu_ref_init(struct percpu_ref *ref, | |
71 | + percpu_ref_func_t *release); | |
72 | +void percpu_ref_cancel_init(struct percpu_ref *ref); | |
73 | +void percpu_ref_kill_and_confirm(struct percpu_ref *ref, | |
74 | + percpu_ref_func_t *confirm_kill); | |
75 | + | |
76 | +/** | |
77 | + * percpu_ref_kill - drop the initial ref | |
78 | + * @ref: percpu_ref to kill | |
79 | + * | |
80 | + * Must be used to drop the initial ref on a percpu refcount; must be called | |
81 | + * precisely once before shutdown. | |
82 | + * | |
83 | + * Puts @ref in non percpu mode, then does a call_rcu() before gathering up the | |
84 | + * percpu counters and dropping the initial ref. | |
85 | + */ | |
86 | +static inline void percpu_ref_kill(struct percpu_ref *ref) | |
87 | +{ | |
88 | + return percpu_ref_kill_and_confirm(ref, NULL); | |
89 | +} | |
90 | + | |
91 | +#define PCPU_STATUS_BITS 2 | |
92 | +#define PCPU_STATUS_MASK ((1 << PCPU_STATUS_BITS) - 1) | |
93 | +#define PCPU_REF_PTR 0 | |
94 | +#define PCPU_REF_DEAD 1 | |
95 | + | |
96 | +#define REF_STATUS(count) (((unsigned long) count) & PCPU_STATUS_MASK) | |
97 | + | |
98 | +/** | |
99 | + * percpu_ref_get - increment a percpu refcount | |
100 | + * @ref: percpu_ref to get | |
101 | + * | |
102 | + * Analagous to atomic_inc(). | |
103 | + */ | |
104 | +static inline void percpu_ref_get(struct percpu_ref *ref) | |
105 | +{ | |
106 | + unsigned __percpu *pcpu_count; | |
107 | + | |
108 | + rcu_read_lock_sched(); | |
109 | + | |
110 | + pcpu_count = ACCESS_ONCE(ref->pcpu_count); | |
111 | + | |
112 | + if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) | |
113 | + __this_cpu_inc(*pcpu_count); | |
114 | + else | |
115 | + atomic_inc(&ref->count); | |
116 | + | |
117 | + rcu_read_unlock_sched(); | |
118 | +} | |
119 | + | |
120 | +/** | |
121 | + * percpu_ref_tryget - try to increment a percpu refcount | |
122 | + * @ref: percpu_ref to try-get | |
123 | + * | |
124 | + * Increment a percpu refcount unless it has already been killed. Returns | |
125 | + * %true on success; %false on failure. | |
126 | + * | |
127 | + * Completion of percpu_ref_kill() in itself doesn't guarantee that tryget | |
128 | + * will fail. For such guarantee, percpu_ref_kill_and_confirm() should be | |
129 | + * used. After the confirm_kill callback is invoked, it's guaranteed that | |
130 | + * no new reference will be given out by percpu_ref_tryget(). | |
131 | + */ | |
132 | +static inline bool percpu_ref_tryget(struct percpu_ref *ref) | |
133 | +{ | |
134 | + unsigned __percpu *pcpu_count; | |
135 | + int ret = false; | |
136 | + | |
137 | + rcu_read_lock_sched(); | |
138 | + | |
139 | + pcpu_count = ACCESS_ONCE(ref->pcpu_count); | |
140 | + | |
141 | + if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) { | |
142 | + __this_cpu_inc(*pcpu_count); | |
143 | + ret = true; | |
144 | + } | |
145 | + | |
146 | + rcu_read_unlock_sched(); | |
147 | + | |
148 | + return ret; | |
149 | +} | |
150 | + | |
151 | +/** | |
152 | + * percpu_ref_put - decrement a percpu refcount | |
153 | + * @ref: percpu_ref to put | |
154 | + * | |
155 | + * Decrement the refcount, and if 0, call the release function (which was passed | |
156 | + * to percpu_ref_init()) | |
157 | + */ | |
158 | +static inline void percpu_ref_put(struct percpu_ref *ref) | |
159 | +{ | |
160 | + unsigned __percpu *pcpu_count; | |
161 | + | |
162 | + rcu_read_lock_sched(); | |
163 | + | |
164 | + pcpu_count = ACCESS_ONCE(ref->pcpu_count); | |
165 | + | |
166 | + if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) | |
167 | + __this_cpu_dec(*pcpu_count); | |
168 | + else if (unlikely(atomic_dec_and_test(&ref->count))) | |
169 | + ref->release(ref); | |
170 | + | |
171 | + rcu_read_unlock_sched(); | |
172 | +} | |
173 | + | |
174 | +#endif |
lib/Makefile
... | ... | @@ -13,7 +13,7 @@ |
13 | 13 | sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ |
14 | 14 | proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ |
15 | 15 | is_single_threaded.o plist.o decompress.o kobject_uevent.o \ |
16 | - earlycpio.o | |
16 | + earlycpio.o percpu-refcount.o | |
17 | 17 | |
18 | 18 | obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o |
19 | 19 | lib-$(CONFIG_MMU) += ioremap.o |
lib/percpu-refcount.c
1 | +#define pr_fmt(fmt) "%s: " fmt "\n", __func__ | |
2 | + | |
3 | +#include <linux/kernel.h> | |
4 | +#include <linux/percpu-refcount.h> | |
5 | + | |
6 | +/* | |
7 | + * Initially, a percpu refcount is just a set of percpu counters. Initially, we | |
8 | + * don't try to detect the ref hitting 0 - which means that get/put can just | |
9 | + * increment or decrement the local counter. Note that the counter on a | |
10 | + * particular cpu can (and will) wrap - this is fine, when we go to shutdown the | |
11 | + * percpu counters will all sum to the correct value | |
12 | + * | |
13 | + * (More precisely: because moduler arithmatic is commutative the sum of all the | |
14 | + * pcpu_count vars will be equal to what it would have been if all the gets and | |
15 | + * puts were done to a single integer, even if some of the percpu integers | |
16 | + * overflow or underflow). | |
17 | + * | |
18 | + * The real trick to implementing percpu refcounts is shutdown. We can't detect | |
19 | + * the ref hitting 0 on every put - this would require global synchronization | |
20 | + * and defeat the whole purpose of using percpu refs. | |
21 | + * | |
22 | + * What we do is require the user to keep track of the initial refcount; we know | |
23 | + * the ref can't hit 0 before the user drops the initial ref, so as long as we | |
24 | + * convert to non percpu mode before the initial ref is dropped everything | |
25 | + * works. | |
26 | + * | |
27 | + * Converting to non percpu mode is done with some RCUish stuff in | |
28 | + * percpu_ref_kill. Additionally, we need a bias value so that the atomic_t | |
29 | + * can't hit 0 before we've added up all the percpu refs. | |
30 | + */ | |
31 | + | |
32 | +#define PCPU_COUNT_BIAS (1U << 31) | |
33 | + | |
34 | +/** | |
35 | + * percpu_ref_init - initialize a percpu refcount | |
36 | + * @ref: percpu_ref to initialize | |
37 | + * @release: function which will be called when refcount hits 0 | |
38 | + * | |
39 | + * Initializes the refcount in single atomic counter mode with a refcount of 1; | |
40 | + * analagous to atomic_set(ref, 1). | |
41 | + * | |
42 | + * Note that @release must not sleep - it may potentially be called from RCU | |
43 | + * callback context by percpu_ref_kill(). | |
44 | + */ | |
45 | +int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release) | |
46 | +{ | |
47 | + atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS); | |
48 | + | |
49 | + ref->pcpu_count = alloc_percpu(unsigned); | |
50 | + if (!ref->pcpu_count) | |
51 | + return -ENOMEM; | |
52 | + | |
53 | + ref->release = release; | |
54 | + return 0; | |
55 | +} | |
56 | + | |
57 | +/** | |
58 | + * percpu_ref_cancel_init - cancel percpu_ref_init() | |
59 | + * @ref: percpu_ref to cancel init for | |
60 | + * | |
61 | + * Once a percpu_ref is initialized, its destruction is initiated by | |
62 | + * percpu_ref_kill() and completes asynchronously, which can be painful to | |
63 | + * do when destroying a half-constructed object in init failure path. | |
64 | + * | |
65 | + * This function destroys @ref without invoking @ref->release and the | |
66 | + * memory area containing it can be freed immediately on return. To | |
67 | + * prevent accidental misuse, it's required that @ref has finished | |
68 | + * percpu_ref_init(), whether successful or not, but never used. | |
69 | + * | |
70 | + * The weird name and usage restriction are to prevent people from using | |
71 | + * this function by mistake for normal shutdown instead of | |
72 | + * percpu_ref_kill(). | |
73 | + */ | |
74 | +void percpu_ref_cancel_init(struct percpu_ref *ref) | |
75 | +{ | |
76 | + unsigned __percpu *pcpu_count = ref->pcpu_count; | |
77 | + int cpu; | |
78 | + | |
79 | + WARN_ON_ONCE(atomic_read(&ref->count) != 1 + PCPU_COUNT_BIAS); | |
80 | + | |
81 | + if (pcpu_count) { | |
82 | + for_each_possible_cpu(cpu) | |
83 | + WARN_ON_ONCE(*per_cpu_ptr(pcpu_count, cpu)); | |
84 | + free_percpu(ref->pcpu_count); | |
85 | + } | |
86 | +} | |
87 | + | |
88 | +static void percpu_ref_kill_rcu(struct rcu_head *rcu) | |
89 | +{ | |
90 | + struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu); | |
91 | + unsigned __percpu *pcpu_count = ref->pcpu_count; | |
92 | + unsigned count = 0; | |
93 | + int cpu; | |
94 | + | |
95 | + /* Mask out PCPU_REF_DEAD */ | |
96 | + pcpu_count = (unsigned __percpu *) | |
97 | + (((unsigned long) pcpu_count) & ~PCPU_STATUS_MASK); | |
98 | + | |
99 | + for_each_possible_cpu(cpu) | |
100 | + count += *per_cpu_ptr(pcpu_count, cpu); | |
101 | + | |
102 | + free_percpu(pcpu_count); | |
103 | + | |
104 | + pr_debug("global %i pcpu %i", atomic_read(&ref->count), (int) count); | |
105 | + | |
106 | + /* | |
107 | + * It's crucial that we sum the percpu counters _before_ adding the sum | |
108 | + * to &ref->count; since gets could be happening on one cpu while puts | |
109 | + * happen on another, adding a single cpu's count could cause | |
110 | + * @ref->count to hit 0 before we've got a consistent value - but the | |
111 | + * sum of all the counts will be consistent and correct. | |
112 | + * | |
113 | + * Subtracting the bias value then has to happen _after_ adding count to | |
114 | + * &ref->count; we need the bias value to prevent &ref->count from | |
115 | + * reaching 0 before we add the percpu counts. But doing it at the same | |
116 | + * time is equivalent and saves us atomic operations: | |
117 | + */ | |
118 | + | |
119 | + atomic_add((int) count - PCPU_COUNT_BIAS, &ref->count); | |
120 | + | |
121 | + /* @ref is viewed as dead on all CPUs, send out kill confirmation */ | |
122 | + if (ref->confirm_kill) | |
123 | + ref->confirm_kill(ref); | |
124 | + | |
125 | + /* | |
126 | + * Now we're in single atomic_t mode with a consistent refcount, so it's | |
127 | + * safe to drop our initial ref: | |
128 | + */ | |
129 | + percpu_ref_put(ref); | |
130 | +} | |
131 | + | |
132 | +/** | |
133 | + * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation | |
134 | + * @ref: percpu_ref to kill | |
135 | + * @confirm_kill: optional confirmation callback | |
136 | + * | |
137 | + * Equivalent to percpu_ref_kill() but also schedules kill confirmation if | |
138 | + * @confirm_kill is not NULL. @confirm_kill, which may not block, will be | |
139 | + * called after @ref is seen as dead from all CPUs - all further | |
140 | + * invocations of percpu_ref_tryget() will fail. See percpu_ref_tryget() | |
141 | + * for more details. | |
142 | + * | |
143 | + * Due to the way percpu_ref is implemented, @confirm_kill will be called | |
144 | + * after at least one full RCU grace period has passed but this is an | |
145 | + * implementation detail and callers must not depend on it. | |
146 | + */ | |
147 | +void percpu_ref_kill_and_confirm(struct percpu_ref *ref, | |
148 | + percpu_ref_func_t *confirm_kill) | |
149 | +{ | |
150 | + WARN_ONCE(REF_STATUS(ref->pcpu_count) == PCPU_REF_DEAD, | |
151 | + "percpu_ref_kill() called more than once!\n"); | |
152 | + | |
153 | + ref->pcpu_count = (unsigned __percpu *) | |
154 | + (((unsigned long) ref->pcpu_count)|PCPU_REF_DEAD); | |
155 | + ref->confirm_kill = confirm_kill; | |
156 | + | |
157 | + call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu); | |
158 | +} |