Commit 2a4fd070ee8561d918a3776388331bb7e92ea59e
1 parent
db61367038
Exists in
smarc-imx_3.14.28_1.0.0_ga
and in
1 other branch
blkcg: move bulk of blkcg_gq release operations to the RCU callback
Currently, when the last reference of a blkcg_gq is put, all then release operations sans the actual freeing happen directly in blkg_put(). As blkg_put() may be called under queue_lock, all pd_exit_fn()s may be too. This makes it impossible for pd_exit_fn()s to use del_timer_sync() on timers which grab the queue_lock which is an irq-safe lock due to the deadlock possibility described in the comment on top of del_timer_sync(). This can be easily avoided by perfoming the release operations in the RCU callback instead of directly from blkg_put(). This patch moves the blkcg_gq release operations to the RCU callback. As this leaves __blkg_release() with only call_rcu() invocation, blkg_rcu_free() is renamed to __blkg_release_rcu(), exported and call_rcu() invocation is now done directly from blkg_put() instead of going through __blkg_release() which is removed. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Vivek Goyal <vgoyal@redhat.com>
Showing 2 changed files with 18 additions and 20 deletions Inline Diff
block/blk-cgroup.c
1 | /* | 1 | /* |
2 | * Common Block IO controller cgroup interface | 2 | * Common Block IO controller cgroup interface |
3 | * | 3 | * |
4 | * Based on ideas and code from CFQ, CFS and BFQ: | 4 | * Based on ideas and code from CFQ, CFS and BFQ: |
5 | * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> | 5 | * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> |
6 | * | 6 | * |
7 | * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> | 7 | * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> |
8 | * Paolo Valente <paolo.valente@unimore.it> | 8 | * Paolo Valente <paolo.valente@unimore.it> |
9 | * | 9 | * |
10 | * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> | 10 | * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> |
11 | * Nauman Rafique <nauman@google.com> | 11 | * Nauman Rafique <nauman@google.com> |
12 | */ | 12 | */ |
13 | #include <linux/ioprio.h> | 13 | #include <linux/ioprio.h> |
14 | #include <linux/kdev_t.h> | 14 | #include <linux/kdev_t.h> |
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/err.h> | 16 | #include <linux/err.h> |
17 | #include <linux/blkdev.h> | 17 | #include <linux/blkdev.h> |
18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
19 | #include <linux/genhd.h> | 19 | #include <linux/genhd.h> |
20 | #include <linux/delay.h> | 20 | #include <linux/delay.h> |
21 | #include <linux/atomic.h> | 21 | #include <linux/atomic.h> |
22 | #include "blk-cgroup.h" | 22 | #include "blk-cgroup.h" |
23 | #include "blk.h" | 23 | #include "blk.h" |
24 | 24 | ||
25 | #define MAX_KEY_LEN 100 | 25 | #define MAX_KEY_LEN 100 |
26 | 26 | ||
27 | static DEFINE_MUTEX(blkcg_pol_mutex); | 27 | static DEFINE_MUTEX(blkcg_pol_mutex); |
28 | 28 | ||
29 | struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT, | 29 | struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT, |
30 | .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, }; | 30 | .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, }; |
31 | EXPORT_SYMBOL_GPL(blkcg_root); | 31 | EXPORT_SYMBOL_GPL(blkcg_root); |
32 | 32 | ||
33 | static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; | 33 | static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; |
34 | 34 | ||
35 | static bool blkcg_policy_enabled(struct request_queue *q, | 35 | static bool blkcg_policy_enabled(struct request_queue *q, |
36 | const struct blkcg_policy *pol) | 36 | const struct blkcg_policy *pol) |
37 | { | 37 | { |
38 | return pol && test_bit(pol->plid, q->blkcg_pols); | 38 | return pol && test_bit(pol->plid, q->blkcg_pols); |
39 | } | 39 | } |
40 | 40 | ||
41 | /** | 41 | /** |
42 | * blkg_free - free a blkg | 42 | * blkg_free - free a blkg |
43 | * @blkg: blkg to free | 43 | * @blkg: blkg to free |
44 | * | 44 | * |
45 | * Free @blkg which may be partially allocated. | 45 | * Free @blkg which may be partially allocated. |
46 | */ | 46 | */ |
47 | static void blkg_free(struct blkcg_gq *blkg) | 47 | static void blkg_free(struct blkcg_gq *blkg) |
48 | { | 48 | { |
49 | int i; | 49 | int i; |
50 | 50 | ||
51 | if (!blkg) | 51 | if (!blkg) |
52 | return; | 52 | return; |
53 | 53 | ||
54 | for (i = 0; i < BLKCG_MAX_POLS; i++) | 54 | for (i = 0; i < BLKCG_MAX_POLS; i++) |
55 | kfree(blkg->pd[i]); | 55 | kfree(blkg->pd[i]); |
56 | 56 | ||
57 | blk_exit_rl(&blkg->rl); | 57 | blk_exit_rl(&blkg->rl); |
58 | kfree(blkg); | 58 | kfree(blkg); |
59 | } | 59 | } |
60 | 60 | ||
61 | /** | 61 | /** |
62 | * blkg_alloc - allocate a blkg | 62 | * blkg_alloc - allocate a blkg |
63 | * @blkcg: block cgroup the new blkg is associated with | 63 | * @blkcg: block cgroup the new blkg is associated with |
64 | * @q: request_queue the new blkg is associated with | 64 | * @q: request_queue the new blkg is associated with |
65 | * @gfp_mask: allocation mask to use | 65 | * @gfp_mask: allocation mask to use |
66 | * | 66 | * |
67 | * Allocate a new blkg assocating @blkcg and @q. | 67 | * Allocate a new blkg assocating @blkcg and @q. |
68 | */ | 68 | */ |
69 | static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, | 69 | static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, |
70 | gfp_t gfp_mask) | 70 | gfp_t gfp_mask) |
71 | { | 71 | { |
72 | struct blkcg_gq *blkg; | 72 | struct blkcg_gq *blkg; |
73 | int i; | 73 | int i; |
74 | 74 | ||
75 | /* alloc and init base part */ | 75 | /* alloc and init base part */ |
76 | blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node); | 76 | blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node); |
77 | if (!blkg) | 77 | if (!blkg) |
78 | return NULL; | 78 | return NULL; |
79 | 79 | ||
80 | blkg->q = q; | 80 | blkg->q = q; |
81 | INIT_LIST_HEAD(&blkg->q_node); | 81 | INIT_LIST_HEAD(&blkg->q_node); |
82 | blkg->blkcg = blkcg; | 82 | blkg->blkcg = blkcg; |
83 | blkg->refcnt = 1; | 83 | blkg->refcnt = 1; |
84 | 84 | ||
85 | /* root blkg uses @q->root_rl, init rl only for !root blkgs */ | 85 | /* root blkg uses @q->root_rl, init rl only for !root blkgs */ |
86 | if (blkcg != &blkcg_root) { | 86 | if (blkcg != &blkcg_root) { |
87 | if (blk_init_rl(&blkg->rl, q, gfp_mask)) | 87 | if (blk_init_rl(&blkg->rl, q, gfp_mask)) |
88 | goto err_free; | 88 | goto err_free; |
89 | blkg->rl.blkg = blkg; | 89 | blkg->rl.blkg = blkg; |
90 | } | 90 | } |
91 | 91 | ||
92 | for (i = 0; i < BLKCG_MAX_POLS; i++) { | 92 | for (i = 0; i < BLKCG_MAX_POLS; i++) { |
93 | struct blkcg_policy *pol = blkcg_policy[i]; | 93 | struct blkcg_policy *pol = blkcg_policy[i]; |
94 | struct blkg_policy_data *pd; | 94 | struct blkg_policy_data *pd; |
95 | 95 | ||
96 | if (!blkcg_policy_enabled(q, pol)) | 96 | if (!blkcg_policy_enabled(q, pol)) |
97 | continue; | 97 | continue; |
98 | 98 | ||
99 | /* alloc per-policy data and attach it to blkg */ | 99 | /* alloc per-policy data and attach it to blkg */ |
100 | pd = kzalloc_node(pol->pd_size, gfp_mask, q->node); | 100 | pd = kzalloc_node(pol->pd_size, gfp_mask, q->node); |
101 | if (!pd) | 101 | if (!pd) |
102 | goto err_free; | 102 | goto err_free; |
103 | 103 | ||
104 | blkg->pd[i] = pd; | 104 | blkg->pd[i] = pd; |
105 | pd->blkg = blkg; | 105 | pd->blkg = blkg; |
106 | pd->plid = i; | 106 | pd->plid = i; |
107 | } | 107 | } |
108 | 108 | ||
109 | return blkg; | 109 | return blkg; |
110 | 110 | ||
111 | err_free: | 111 | err_free: |
112 | blkg_free(blkg); | 112 | blkg_free(blkg); |
113 | return NULL; | 113 | return NULL; |
114 | } | 114 | } |
115 | 115 | ||
116 | /** | 116 | /** |
117 | * __blkg_lookup - internal version of blkg_lookup() | 117 | * __blkg_lookup - internal version of blkg_lookup() |
118 | * @blkcg: blkcg of interest | 118 | * @blkcg: blkcg of interest |
119 | * @q: request_queue of interest | 119 | * @q: request_queue of interest |
120 | * @update_hint: whether to update lookup hint with the result or not | 120 | * @update_hint: whether to update lookup hint with the result or not |
121 | * | 121 | * |
122 | * This is internal version and shouldn't be used by policy | 122 | * This is internal version and shouldn't be used by policy |
123 | * implementations. Looks up blkgs for the @blkcg - @q pair regardless of | 123 | * implementations. Looks up blkgs for the @blkcg - @q pair regardless of |
124 | * @q's bypass state. If @update_hint is %true, the caller should be | 124 | * @q's bypass state. If @update_hint is %true, the caller should be |
125 | * holding @q->queue_lock and lookup hint is updated on success. | 125 | * holding @q->queue_lock and lookup hint is updated on success. |
126 | */ | 126 | */ |
127 | struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q, | 127 | struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q, |
128 | bool update_hint) | 128 | bool update_hint) |
129 | { | 129 | { |
130 | struct blkcg_gq *blkg; | 130 | struct blkcg_gq *blkg; |
131 | 131 | ||
132 | blkg = rcu_dereference(blkcg->blkg_hint); | 132 | blkg = rcu_dereference(blkcg->blkg_hint); |
133 | if (blkg && blkg->q == q) | 133 | if (blkg && blkg->q == q) |
134 | return blkg; | 134 | return blkg; |
135 | 135 | ||
136 | /* | 136 | /* |
137 | * Hint didn't match. Look up from the radix tree. Note that the | 137 | * Hint didn't match. Look up from the radix tree. Note that the |
138 | * hint can only be updated under queue_lock as otherwise @blkg | 138 | * hint can only be updated under queue_lock as otherwise @blkg |
139 | * could have already been removed from blkg_tree. The caller is | 139 | * could have already been removed from blkg_tree. The caller is |
140 | * responsible for grabbing queue_lock if @update_hint. | 140 | * responsible for grabbing queue_lock if @update_hint. |
141 | */ | 141 | */ |
142 | blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id); | 142 | blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id); |
143 | if (blkg && blkg->q == q) { | 143 | if (blkg && blkg->q == q) { |
144 | if (update_hint) { | 144 | if (update_hint) { |
145 | lockdep_assert_held(q->queue_lock); | 145 | lockdep_assert_held(q->queue_lock); |
146 | rcu_assign_pointer(blkcg->blkg_hint, blkg); | 146 | rcu_assign_pointer(blkcg->blkg_hint, blkg); |
147 | } | 147 | } |
148 | return blkg; | 148 | return blkg; |
149 | } | 149 | } |
150 | 150 | ||
151 | return NULL; | 151 | return NULL; |
152 | } | 152 | } |
153 | 153 | ||
154 | /** | 154 | /** |
155 | * blkg_lookup - lookup blkg for the specified blkcg - q pair | 155 | * blkg_lookup - lookup blkg for the specified blkcg - q pair |
156 | * @blkcg: blkcg of interest | 156 | * @blkcg: blkcg of interest |
157 | * @q: request_queue of interest | 157 | * @q: request_queue of interest |
158 | * | 158 | * |
159 | * Lookup blkg for the @blkcg - @q pair. This function should be called | 159 | * Lookup blkg for the @blkcg - @q pair. This function should be called |
160 | * under RCU read lock and is guaranteed to return %NULL if @q is bypassing | 160 | * under RCU read lock and is guaranteed to return %NULL if @q is bypassing |
161 | * - see blk_queue_bypass_start() for details. | 161 | * - see blk_queue_bypass_start() for details. |
162 | */ | 162 | */ |
163 | struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q) | 163 | struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q) |
164 | { | 164 | { |
165 | WARN_ON_ONCE(!rcu_read_lock_held()); | 165 | WARN_ON_ONCE(!rcu_read_lock_held()); |
166 | 166 | ||
167 | if (unlikely(blk_queue_bypass(q))) | 167 | if (unlikely(blk_queue_bypass(q))) |
168 | return NULL; | 168 | return NULL; |
169 | return __blkg_lookup(blkcg, q, false); | 169 | return __blkg_lookup(blkcg, q, false); |
170 | } | 170 | } |
171 | EXPORT_SYMBOL_GPL(blkg_lookup); | 171 | EXPORT_SYMBOL_GPL(blkg_lookup); |
172 | 172 | ||
173 | /* | 173 | /* |
174 | * If @new_blkg is %NULL, this function tries to allocate a new one as | 174 | * If @new_blkg is %NULL, this function tries to allocate a new one as |
175 | * necessary using %GFP_ATOMIC. @new_blkg is always consumed on return. | 175 | * necessary using %GFP_ATOMIC. @new_blkg is always consumed on return. |
176 | */ | 176 | */ |
177 | static struct blkcg_gq *blkg_create(struct blkcg *blkcg, | 177 | static struct blkcg_gq *blkg_create(struct blkcg *blkcg, |
178 | struct request_queue *q, | 178 | struct request_queue *q, |
179 | struct blkcg_gq *new_blkg) | 179 | struct blkcg_gq *new_blkg) |
180 | { | 180 | { |
181 | struct blkcg_gq *blkg; | 181 | struct blkcg_gq *blkg; |
182 | int i, ret; | 182 | int i, ret; |
183 | 183 | ||
184 | WARN_ON_ONCE(!rcu_read_lock_held()); | 184 | WARN_ON_ONCE(!rcu_read_lock_held()); |
185 | lockdep_assert_held(q->queue_lock); | 185 | lockdep_assert_held(q->queue_lock); |
186 | 186 | ||
187 | /* blkg holds a reference to blkcg */ | 187 | /* blkg holds a reference to blkcg */ |
188 | if (!css_tryget(&blkcg->css)) { | 188 | if (!css_tryget(&blkcg->css)) { |
189 | ret = -EINVAL; | 189 | ret = -EINVAL; |
190 | goto err_free_blkg; | 190 | goto err_free_blkg; |
191 | } | 191 | } |
192 | 192 | ||
193 | /* allocate */ | 193 | /* allocate */ |
194 | if (!new_blkg) { | 194 | if (!new_blkg) { |
195 | new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC); | 195 | new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC); |
196 | if (unlikely(!new_blkg)) { | 196 | if (unlikely(!new_blkg)) { |
197 | ret = -ENOMEM; | 197 | ret = -ENOMEM; |
198 | goto err_put_css; | 198 | goto err_put_css; |
199 | } | 199 | } |
200 | } | 200 | } |
201 | blkg = new_blkg; | 201 | blkg = new_blkg; |
202 | 202 | ||
203 | /* link parent */ | 203 | /* link parent */ |
204 | if (blkcg_parent(blkcg)) { | 204 | if (blkcg_parent(blkcg)) { |
205 | blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false); | 205 | blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false); |
206 | if (WARN_ON_ONCE(!blkg->parent)) { | 206 | if (WARN_ON_ONCE(!blkg->parent)) { |
207 | ret = -EINVAL; | 207 | ret = -EINVAL; |
208 | goto err_put_css; | 208 | goto err_put_css; |
209 | } | 209 | } |
210 | blkg_get(blkg->parent); | 210 | blkg_get(blkg->parent); |
211 | } | 211 | } |
212 | 212 | ||
213 | /* invoke per-policy init */ | 213 | /* invoke per-policy init */ |
214 | for (i = 0; i < BLKCG_MAX_POLS; i++) { | 214 | for (i = 0; i < BLKCG_MAX_POLS; i++) { |
215 | struct blkcg_policy *pol = blkcg_policy[i]; | 215 | struct blkcg_policy *pol = blkcg_policy[i]; |
216 | 216 | ||
217 | if (blkg->pd[i] && pol->pd_init_fn) | 217 | if (blkg->pd[i] && pol->pd_init_fn) |
218 | pol->pd_init_fn(blkg); | 218 | pol->pd_init_fn(blkg); |
219 | } | 219 | } |
220 | 220 | ||
221 | /* insert */ | 221 | /* insert */ |
222 | spin_lock(&blkcg->lock); | 222 | spin_lock(&blkcg->lock); |
223 | ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); | 223 | ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); |
224 | if (likely(!ret)) { | 224 | if (likely(!ret)) { |
225 | hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); | 225 | hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); |
226 | list_add(&blkg->q_node, &q->blkg_list); | 226 | list_add(&blkg->q_node, &q->blkg_list); |
227 | 227 | ||
228 | for (i = 0; i < BLKCG_MAX_POLS; i++) { | 228 | for (i = 0; i < BLKCG_MAX_POLS; i++) { |
229 | struct blkcg_policy *pol = blkcg_policy[i]; | 229 | struct blkcg_policy *pol = blkcg_policy[i]; |
230 | 230 | ||
231 | if (blkg->pd[i] && pol->pd_online_fn) | 231 | if (blkg->pd[i] && pol->pd_online_fn) |
232 | pol->pd_online_fn(blkg); | 232 | pol->pd_online_fn(blkg); |
233 | } | 233 | } |
234 | } | 234 | } |
235 | blkg->online = true; | 235 | blkg->online = true; |
236 | spin_unlock(&blkcg->lock); | 236 | spin_unlock(&blkcg->lock); |
237 | 237 | ||
238 | if (!ret) | 238 | if (!ret) |
239 | return blkg; | 239 | return blkg; |
240 | 240 | ||
241 | /* @blkg failed fully initialized, use the usual release path */ | 241 | /* @blkg failed fully initialized, use the usual release path */ |
242 | blkg_put(blkg); | 242 | blkg_put(blkg); |
243 | return ERR_PTR(ret); | 243 | return ERR_PTR(ret); |
244 | 244 | ||
245 | err_put_css: | 245 | err_put_css: |
246 | css_put(&blkcg->css); | 246 | css_put(&blkcg->css); |
247 | err_free_blkg: | 247 | err_free_blkg: |
248 | blkg_free(new_blkg); | 248 | blkg_free(new_blkg); |
249 | return ERR_PTR(ret); | 249 | return ERR_PTR(ret); |
250 | } | 250 | } |
251 | 251 | ||
252 | /** | 252 | /** |
253 | * blkg_lookup_create - lookup blkg, try to create one if not there | 253 | * blkg_lookup_create - lookup blkg, try to create one if not there |
254 | * @blkcg: blkcg of interest | 254 | * @blkcg: blkcg of interest |
255 | * @q: request_queue of interest | 255 | * @q: request_queue of interest |
256 | * | 256 | * |
257 | * Lookup blkg for the @blkcg - @q pair. If it doesn't exist, try to | 257 | * Lookup blkg for the @blkcg - @q pair. If it doesn't exist, try to |
258 | * create one. blkg creation is performed recursively from blkcg_root such | 258 | * create one. blkg creation is performed recursively from blkcg_root such |
259 | * that all non-root blkg's have access to the parent blkg. This function | 259 | * that all non-root blkg's have access to the parent blkg. This function |
260 | * should be called under RCU read lock and @q->queue_lock. | 260 | * should be called under RCU read lock and @q->queue_lock. |
261 | * | 261 | * |
262 | * Returns pointer to the looked up or created blkg on success, ERR_PTR() | 262 | * Returns pointer to the looked up or created blkg on success, ERR_PTR() |
263 | * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not | 263 | * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not |
264 | * dead and bypassing, returns ERR_PTR(-EBUSY). | 264 | * dead and bypassing, returns ERR_PTR(-EBUSY). |
265 | */ | 265 | */ |
266 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, | 266 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, |
267 | struct request_queue *q) | 267 | struct request_queue *q) |
268 | { | 268 | { |
269 | struct blkcg_gq *blkg; | 269 | struct blkcg_gq *blkg; |
270 | 270 | ||
271 | WARN_ON_ONCE(!rcu_read_lock_held()); | 271 | WARN_ON_ONCE(!rcu_read_lock_held()); |
272 | lockdep_assert_held(q->queue_lock); | 272 | lockdep_assert_held(q->queue_lock); |
273 | 273 | ||
274 | /* | 274 | /* |
275 | * This could be the first entry point of blkcg implementation and | 275 | * This could be the first entry point of blkcg implementation and |
276 | * we shouldn't allow anything to go through for a bypassing queue. | 276 | * we shouldn't allow anything to go through for a bypassing queue. |
277 | */ | 277 | */ |
278 | if (unlikely(blk_queue_bypass(q))) | 278 | if (unlikely(blk_queue_bypass(q))) |
279 | return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY); | 279 | return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY); |
280 | 280 | ||
281 | blkg = __blkg_lookup(blkcg, q, true); | 281 | blkg = __blkg_lookup(blkcg, q, true); |
282 | if (blkg) | 282 | if (blkg) |
283 | return blkg; | 283 | return blkg; |
284 | 284 | ||
285 | /* | 285 | /* |
286 | * Create blkgs walking down from blkcg_root to @blkcg, so that all | 286 | * Create blkgs walking down from blkcg_root to @blkcg, so that all |
287 | * non-root blkgs have access to their parents. | 287 | * non-root blkgs have access to their parents. |
288 | */ | 288 | */ |
289 | while (true) { | 289 | while (true) { |
290 | struct blkcg *pos = blkcg; | 290 | struct blkcg *pos = blkcg; |
291 | struct blkcg *parent = blkcg_parent(blkcg); | 291 | struct blkcg *parent = blkcg_parent(blkcg); |
292 | 292 | ||
293 | while (parent && !__blkg_lookup(parent, q, false)) { | 293 | while (parent && !__blkg_lookup(parent, q, false)) { |
294 | pos = parent; | 294 | pos = parent; |
295 | parent = blkcg_parent(parent); | 295 | parent = blkcg_parent(parent); |
296 | } | 296 | } |
297 | 297 | ||
298 | blkg = blkg_create(pos, q, NULL); | 298 | blkg = blkg_create(pos, q, NULL); |
299 | if (pos == blkcg || IS_ERR(blkg)) | 299 | if (pos == blkcg || IS_ERR(blkg)) |
300 | return blkg; | 300 | return blkg; |
301 | } | 301 | } |
302 | } | 302 | } |
303 | EXPORT_SYMBOL_GPL(blkg_lookup_create); | 303 | EXPORT_SYMBOL_GPL(blkg_lookup_create); |
304 | 304 | ||
305 | static void blkg_destroy(struct blkcg_gq *blkg) | 305 | static void blkg_destroy(struct blkcg_gq *blkg) |
306 | { | 306 | { |
307 | struct blkcg *blkcg = blkg->blkcg; | 307 | struct blkcg *blkcg = blkg->blkcg; |
308 | int i; | 308 | int i; |
309 | 309 | ||
310 | lockdep_assert_held(blkg->q->queue_lock); | 310 | lockdep_assert_held(blkg->q->queue_lock); |
311 | lockdep_assert_held(&blkcg->lock); | 311 | lockdep_assert_held(&blkcg->lock); |
312 | 312 | ||
313 | /* Something wrong if we are trying to remove same group twice */ | 313 | /* Something wrong if we are trying to remove same group twice */ |
314 | WARN_ON_ONCE(list_empty(&blkg->q_node)); | 314 | WARN_ON_ONCE(list_empty(&blkg->q_node)); |
315 | WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node)); | 315 | WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node)); |
316 | 316 | ||
317 | for (i = 0; i < BLKCG_MAX_POLS; i++) { | 317 | for (i = 0; i < BLKCG_MAX_POLS; i++) { |
318 | struct blkcg_policy *pol = blkcg_policy[i]; | 318 | struct blkcg_policy *pol = blkcg_policy[i]; |
319 | 319 | ||
320 | if (blkg->pd[i] && pol->pd_offline_fn) | 320 | if (blkg->pd[i] && pol->pd_offline_fn) |
321 | pol->pd_offline_fn(blkg); | 321 | pol->pd_offline_fn(blkg); |
322 | } | 322 | } |
323 | blkg->online = false; | 323 | blkg->online = false; |
324 | 324 | ||
325 | radix_tree_delete(&blkcg->blkg_tree, blkg->q->id); | 325 | radix_tree_delete(&blkcg->blkg_tree, blkg->q->id); |
326 | list_del_init(&blkg->q_node); | 326 | list_del_init(&blkg->q_node); |
327 | hlist_del_init_rcu(&blkg->blkcg_node); | 327 | hlist_del_init_rcu(&blkg->blkcg_node); |
328 | 328 | ||
329 | /* | 329 | /* |
330 | * Both setting lookup hint to and clearing it from @blkg are done | 330 | * Both setting lookup hint to and clearing it from @blkg are done |
331 | * under queue_lock. If it's not pointing to @blkg now, it never | 331 | * under queue_lock. If it's not pointing to @blkg now, it never |
332 | * will. Hint assignment itself can race safely. | 332 | * will. Hint assignment itself can race safely. |
333 | */ | 333 | */ |
334 | if (rcu_dereference_raw(blkcg->blkg_hint) == blkg) | 334 | if (rcu_dereference_raw(blkcg->blkg_hint) == blkg) |
335 | rcu_assign_pointer(blkcg->blkg_hint, NULL); | 335 | rcu_assign_pointer(blkcg->blkg_hint, NULL); |
336 | 336 | ||
337 | /* | 337 | /* |
338 | * Put the reference taken at the time of creation so that when all | 338 | * Put the reference taken at the time of creation so that when all |
339 | * queues are gone, group can be destroyed. | 339 | * queues are gone, group can be destroyed. |
340 | */ | 340 | */ |
341 | blkg_put(blkg); | 341 | blkg_put(blkg); |
342 | } | 342 | } |
343 | 343 | ||
344 | /** | 344 | /** |
345 | * blkg_destroy_all - destroy all blkgs associated with a request_queue | 345 | * blkg_destroy_all - destroy all blkgs associated with a request_queue |
346 | * @q: request_queue of interest | 346 | * @q: request_queue of interest |
347 | * | 347 | * |
348 | * Destroy all blkgs associated with @q. | 348 | * Destroy all blkgs associated with @q. |
349 | */ | 349 | */ |
350 | static void blkg_destroy_all(struct request_queue *q) | 350 | static void blkg_destroy_all(struct request_queue *q) |
351 | { | 351 | { |
352 | struct blkcg_gq *blkg, *n; | 352 | struct blkcg_gq *blkg, *n; |
353 | 353 | ||
354 | lockdep_assert_held(q->queue_lock); | 354 | lockdep_assert_held(q->queue_lock); |
355 | 355 | ||
356 | list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { | 356 | list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { |
357 | struct blkcg *blkcg = blkg->blkcg; | 357 | struct blkcg *blkcg = blkg->blkcg; |
358 | 358 | ||
359 | spin_lock(&blkcg->lock); | 359 | spin_lock(&blkcg->lock); |
360 | blkg_destroy(blkg); | 360 | blkg_destroy(blkg); |
361 | spin_unlock(&blkcg->lock); | 361 | spin_unlock(&blkcg->lock); |
362 | } | 362 | } |
363 | 363 | ||
364 | /* | 364 | /* |
365 | * root blkg is destroyed. Just clear the pointer since | 365 | * root blkg is destroyed. Just clear the pointer since |
366 | * root_rl does not take reference on root blkg. | 366 | * root_rl does not take reference on root blkg. |
367 | */ | 367 | */ |
368 | q->root_blkg = NULL; | 368 | q->root_blkg = NULL; |
369 | q->root_rl.blkg = NULL; | 369 | q->root_rl.blkg = NULL; |
370 | } | 370 | } |
371 | 371 | ||
372 | static void blkg_rcu_free(struct rcu_head *rcu_head) | 372 | /* |
373 | * A group is RCU protected, but having an rcu lock does not mean that one | ||
374 | * can access all the fields of blkg and assume these are valid. For | ||
375 | * example, don't try to follow throtl_data and request queue links. | ||
376 | * | ||
377 | * Having a reference to blkg under an rcu allows accesses to only values | ||
378 | * local to groups like group stats and group rate limits. | ||
379 | */ | ||
380 | void __blkg_release_rcu(struct rcu_head *rcu_head) | ||
373 | { | 381 | { |
374 | blkg_free(container_of(rcu_head, struct blkcg_gq, rcu_head)); | 382 | struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head); |
375 | } | ||
376 | |||
377 | void __blkg_release(struct blkcg_gq *blkg) | ||
378 | { | ||
379 | int i; | 383 | int i; |
380 | 384 | ||
381 | /* tell policies that this one is being freed */ | 385 | /* tell policies that this one is being freed */ |
382 | for (i = 0; i < BLKCG_MAX_POLS; i++) { | 386 | for (i = 0; i < BLKCG_MAX_POLS; i++) { |
383 | struct blkcg_policy *pol = blkcg_policy[i]; | 387 | struct blkcg_policy *pol = blkcg_policy[i]; |
384 | 388 | ||
385 | if (blkg->pd[i] && pol->pd_exit_fn) | 389 | if (blkg->pd[i] && pol->pd_exit_fn) |
386 | pol->pd_exit_fn(blkg); | 390 | pol->pd_exit_fn(blkg); |
387 | } | 391 | } |
388 | 392 | ||
389 | /* release the blkcg and parent blkg refs this blkg has been holding */ | 393 | /* release the blkcg and parent blkg refs this blkg has been holding */ |
390 | css_put(&blkg->blkcg->css); | 394 | css_put(&blkg->blkcg->css); |
391 | if (blkg->parent) | 395 | if (blkg->parent) { |
396 | spin_lock_irq(blkg->q->queue_lock); | ||
392 | blkg_put(blkg->parent); | 397 | blkg_put(blkg->parent); |
398 | spin_unlock_irq(blkg->q->queue_lock); | ||
399 | } | ||
393 | 400 | ||
394 | /* | 401 | blkg_free(blkg); |
395 | * A group is freed in rcu manner. But having an rcu lock does not | ||
396 | * mean that one can access all the fields of blkg and assume these | ||
397 | * are valid. For example, don't try to follow throtl_data and | ||
398 | * request queue links. | ||
399 | * | ||
400 | * Having a reference to blkg under an rcu allows acess to only | ||
401 | * values local to groups like group stats and group rate limits | ||
402 | */ | ||
403 | call_rcu(&blkg->rcu_head, blkg_rcu_free); | ||
404 | } | 402 | } |
405 | EXPORT_SYMBOL_GPL(__blkg_release); | 403 | EXPORT_SYMBOL_GPL(__blkg_release_rcu); |
406 | 404 | ||
407 | /* | 405 | /* |
408 | * The next function used by blk_queue_for_each_rl(). It's a bit tricky | 406 | * The next function used by blk_queue_for_each_rl(). It's a bit tricky |
409 | * because the root blkg uses @q->root_rl instead of its own rl. | 407 | * because the root blkg uses @q->root_rl instead of its own rl. |
410 | */ | 408 | */ |
411 | struct request_list *__blk_queue_next_rl(struct request_list *rl, | 409 | struct request_list *__blk_queue_next_rl(struct request_list *rl, |
412 | struct request_queue *q) | 410 | struct request_queue *q) |
413 | { | 411 | { |
414 | struct list_head *ent; | 412 | struct list_head *ent; |
415 | struct blkcg_gq *blkg; | 413 | struct blkcg_gq *blkg; |
416 | 414 | ||
417 | /* | 415 | /* |
418 | * Determine the current blkg list_head. The first entry is | 416 | * Determine the current blkg list_head. The first entry is |
419 | * root_rl which is off @q->blkg_list and mapped to the head. | 417 | * root_rl which is off @q->blkg_list and mapped to the head. |
420 | */ | 418 | */ |
421 | if (rl == &q->root_rl) { | 419 | if (rl == &q->root_rl) { |
422 | ent = &q->blkg_list; | 420 | ent = &q->blkg_list; |
423 | /* There are no more block groups, hence no request lists */ | 421 | /* There are no more block groups, hence no request lists */ |
424 | if (list_empty(ent)) | 422 | if (list_empty(ent)) |
425 | return NULL; | 423 | return NULL; |
426 | } else { | 424 | } else { |
427 | blkg = container_of(rl, struct blkcg_gq, rl); | 425 | blkg = container_of(rl, struct blkcg_gq, rl); |
428 | ent = &blkg->q_node; | 426 | ent = &blkg->q_node; |
429 | } | 427 | } |
430 | 428 | ||
431 | /* walk to the next list_head, skip root blkcg */ | 429 | /* walk to the next list_head, skip root blkcg */ |
432 | ent = ent->next; | 430 | ent = ent->next; |
433 | if (ent == &q->root_blkg->q_node) | 431 | if (ent == &q->root_blkg->q_node) |
434 | ent = ent->next; | 432 | ent = ent->next; |
435 | if (ent == &q->blkg_list) | 433 | if (ent == &q->blkg_list) |
436 | return NULL; | 434 | return NULL; |
437 | 435 | ||
438 | blkg = container_of(ent, struct blkcg_gq, q_node); | 436 | blkg = container_of(ent, struct blkcg_gq, q_node); |
439 | return &blkg->rl; | 437 | return &blkg->rl; |
440 | } | 438 | } |
441 | 439 | ||
442 | static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, | 440 | static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, |
443 | u64 val) | 441 | u64 val) |
444 | { | 442 | { |
445 | struct blkcg *blkcg = cgroup_to_blkcg(cgroup); | 443 | struct blkcg *blkcg = cgroup_to_blkcg(cgroup); |
446 | struct blkcg_gq *blkg; | 444 | struct blkcg_gq *blkg; |
447 | int i; | 445 | int i; |
448 | 446 | ||
449 | mutex_lock(&blkcg_pol_mutex); | 447 | mutex_lock(&blkcg_pol_mutex); |
450 | spin_lock_irq(&blkcg->lock); | 448 | spin_lock_irq(&blkcg->lock); |
451 | 449 | ||
452 | /* | 450 | /* |
453 | * Note that stat reset is racy - it doesn't synchronize against | 451 | * Note that stat reset is racy - it doesn't synchronize against |
454 | * stat updates. This is a debug feature which shouldn't exist | 452 | * stat updates. This is a debug feature which shouldn't exist |
455 | * anyway. If you get hit by a race, retry. | 453 | * anyway. If you get hit by a race, retry. |
456 | */ | 454 | */ |
457 | hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { | 455 | hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { |
458 | for (i = 0; i < BLKCG_MAX_POLS; i++) { | 456 | for (i = 0; i < BLKCG_MAX_POLS; i++) { |
459 | struct blkcg_policy *pol = blkcg_policy[i]; | 457 | struct blkcg_policy *pol = blkcg_policy[i]; |
460 | 458 | ||
461 | if (blkcg_policy_enabled(blkg->q, pol) && | 459 | if (blkcg_policy_enabled(blkg->q, pol) && |
462 | pol->pd_reset_stats_fn) | 460 | pol->pd_reset_stats_fn) |
463 | pol->pd_reset_stats_fn(blkg); | 461 | pol->pd_reset_stats_fn(blkg); |
464 | } | 462 | } |
465 | } | 463 | } |
466 | 464 | ||
467 | spin_unlock_irq(&blkcg->lock); | 465 | spin_unlock_irq(&blkcg->lock); |
468 | mutex_unlock(&blkcg_pol_mutex); | 466 | mutex_unlock(&blkcg_pol_mutex); |
469 | return 0; | 467 | return 0; |
470 | } | 468 | } |
471 | 469 | ||
472 | static const char *blkg_dev_name(struct blkcg_gq *blkg) | 470 | static const char *blkg_dev_name(struct blkcg_gq *blkg) |
473 | { | 471 | { |
474 | /* some drivers (floppy) instantiate a queue w/o disk registered */ | 472 | /* some drivers (floppy) instantiate a queue w/o disk registered */ |
475 | if (blkg->q->backing_dev_info.dev) | 473 | if (blkg->q->backing_dev_info.dev) |
476 | return dev_name(blkg->q->backing_dev_info.dev); | 474 | return dev_name(blkg->q->backing_dev_info.dev); |
477 | return NULL; | 475 | return NULL; |
478 | } | 476 | } |
479 | 477 | ||
480 | /** | 478 | /** |
481 | * blkcg_print_blkgs - helper for printing per-blkg data | 479 | * blkcg_print_blkgs - helper for printing per-blkg data |
482 | * @sf: seq_file to print to | 480 | * @sf: seq_file to print to |
483 | * @blkcg: blkcg of interest | 481 | * @blkcg: blkcg of interest |
484 | * @prfill: fill function to print out a blkg | 482 | * @prfill: fill function to print out a blkg |
485 | * @pol: policy in question | 483 | * @pol: policy in question |
486 | * @data: data to be passed to @prfill | 484 | * @data: data to be passed to @prfill |
487 | * @show_total: to print out sum of prfill return values or not | 485 | * @show_total: to print out sum of prfill return values or not |
488 | * | 486 | * |
489 | * This function invokes @prfill on each blkg of @blkcg if pd for the | 487 | * This function invokes @prfill on each blkg of @blkcg if pd for the |
490 | * policy specified by @pol exists. @prfill is invoked with @sf, the | 488 | * policy specified by @pol exists. @prfill is invoked with @sf, the |
491 | * policy data and @data and the matching queue lock held. If @show_total | 489 | * policy data and @data and the matching queue lock held. If @show_total |
492 | * is %true, the sum of the return values from @prfill is printed with | 490 | * is %true, the sum of the return values from @prfill is printed with |
493 | * "Total" label at the end. | 491 | * "Total" label at the end. |
494 | * | 492 | * |
495 | * This is to be used to construct print functions for | 493 | * This is to be used to construct print functions for |
496 | * cftype->read_seq_string method. | 494 | * cftype->read_seq_string method. |
497 | */ | 495 | */ |
498 | void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, | 496 | void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, |
499 | u64 (*prfill)(struct seq_file *, | 497 | u64 (*prfill)(struct seq_file *, |
500 | struct blkg_policy_data *, int), | 498 | struct blkg_policy_data *, int), |
501 | const struct blkcg_policy *pol, int data, | 499 | const struct blkcg_policy *pol, int data, |
502 | bool show_total) | 500 | bool show_total) |
503 | { | 501 | { |
504 | struct blkcg_gq *blkg; | 502 | struct blkcg_gq *blkg; |
505 | u64 total = 0; | 503 | u64 total = 0; |
506 | 504 | ||
507 | rcu_read_lock(); | 505 | rcu_read_lock(); |
508 | hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { | 506 | hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { |
509 | spin_lock_irq(blkg->q->queue_lock); | 507 | spin_lock_irq(blkg->q->queue_lock); |
510 | if (blkcg_policy_enabled(blkg->q, pol)) | 508 | if (blkcg_policy_enabled(blkg->q, pol)) |
511 | total += prfill(sf, blkg->pd[pol->plid], data); | 509 | total += prfill(sf, blkg->pd[pol->plid], data); |
512 | spin_unlock_irq(blkg->q->queue_lock); | 510 | spin_unlock_irq(blkg->q->queue_lock); |
513 | } | 511 | } |
514 | rcu_read_unlock(); | 512 | rcu_read_unlock(); |
515 | 513 | ||
516 | if (show_total) | 514 | if (show_total) |
517 | seq_printf(sf, "Total %llu\n", (unsigned long long)total); | 515 | seq_printf(sf, "Total %llu\n", (unsigned long long)total); |
518 | } | 516 | } |
519 | EXPORT_SYMBOL_GPL(blkcg_print_blkgs); | 517 | EXPORT_SYMBOL_GPL(blkcg_print_blkgs); |
520 | 518 | ||
521 | /** | 519 | /** |
522 | * __blkg_prfill_u64 - prfill helper for a single u64 value | 520 | * __blkg_prfill_u64 - prfill helper for a single u64 value |
523 | * @sf: seq_file to print to | 521 | * @sf: seq_file to print to |
524 | * @pd: policy private data of interest | 522 | * @pd: policy private data of interest |
525 | * @v: value to print | 523 | * @v: value to print |
526 | * | 524 | * |
527 | * Print @v to @sf for the device assocaited with @pd. | 525 | * Print @v to @sf for the device assocaited with @pd. |
528 | */ | 526 | */ |
529 | u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v) | 527 | u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v) |
530 | { | 528 | { |
531 | const char *dname = blkg_dev_name(pd->blkg); | 529 | const char *dname = blkg_dev_name(pd->blkg); |
532 | 530 | ||
533 | if (!dname) | 531 | if (!dname) |
534 | return 0; | 532 | return 0; |
535 | 533 | ||
536 | seq_printf(sf, "%s %llu\n", dname, (unsigned long long)v); | 534 | seq_printf(sf, "%s %llu\n", dname, (unsigned long long)v); |
537 | return v; | 535 | return v; |
538 | } | 536 | } |
539 | EXPORT_SYMBOL_GPL(__blkg_prfill_u64); | 537 | EXPORT_SYMBOL_GPL(__blkg_prfill_u64); |
540 | 538 | ||
541 | /** | 539 | /** |
542 | * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat | 540 | * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat |
543 | * @sf: seq_file to print to | 541 | * @sf: seq_file to print to |
544 | * @pd: policy private data of interest | 542 | * @pd: policy private data of interest |
545 | * @rwstat: rwstat to print | 543 | * @rwstat: rwstat to print |
546 | * | 544 | * |
547 | * Print @rwstat to @sf for the device assocaited with @pd. | 545 | * Print @rwstat to @sf for the device assocaited with @pd. |
548 | */ | 546 | */ |
549 | u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, | 547 | u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, |
550 | const struct blkg_rwstat *rwstat) | 548 | const struct blkg_rwstat *rwstat) |
551 | { | 549 | { |
552 | static const char *rwstr[] = { | 550 | static const char *rwstr[] = { |
553 | [BLKG_RWSTAT_READ] = "Read", | 551 | [BLKG_RWSTAT_READ] = "Read", |
554 | [BLKG_RWSTAT_WRITE] = "Write", | 552 | [BLKG_RWSTAT_WRITE] = "Write", |
555 | [BLKG_RWSTAT_SYNC] = "Sync", | 553 | [BLKG_RWSTAT_SYNC] = "Sync", |
556 | [BLKG_RWSTAT_ASYNC] = "Async", | 554 | [BLKG_RWSTAT_ASYNC] = "Async", |
557 | }; | 555 | }; |
558 | const char *dname = blkg_dev_name(pd->blkg); | 556 | const char *dname = blkg_dev_name(pd->blkg); |
559 | u64 v; | 557 | u64 v; |
560 | int i; | 558 | int i; |
561 | 559 | ||
562 | if (!dname) | 560 | if (!dname) |
563 | return 0; | 561 | return 0; |
564 | 562 | ||
565 | for (i = 0; i < BLKG_RWSTAT_NR; i++) | 563 | for (i = 0; i < BLKG_RWSTAT_NR; i++) |
566 | seq_printf(sf, "%s %s %llu\n", dname, rwstr[i], | 564 | seq_printf(sf, "%s %s %llu\n", dname, rwstr[i], |
567 | (unsigned long long)rwstat->cnt[i]); | 565 | (unsigned long long)rwstat->cnt[i]); |
568 | 566 | ||
569 | v = rwstat->cnt[BLKG_RWSTAT_READ] + rwstat->cnt[BLKG_RWSTAT_WRITE]; | 567 | v = rwstat->cnt[BLKG_RWSTAT_READ] + rwstat->cnt[BLKG_RWSTAT_WRITE]; |
570 | seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v); | 568 | seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v); |
571 | return v; | 569 | return v; |
572 | } | 570 | } |
573 | EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat); | 571 | EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat); |
574 | 572 | ||
575 | /** | 573 | /** |
576 | * blkg_prfill_stat - prfill callback for blkg_stat | 574 | * blkg_prfill_stat - prfill callback for blkg_stat |
577 | * @sf: seq_file to print to | 575 | * @sf: seq_file to print to |
578 | * @pd: policy private data of interest | 576 | * @pd: policy private data of interest |
579 | * @off: offset to the blkg_stat in @pd | 577 | * @off: offset to the blkg_stat in @pd |
580 | * | 578 | * |
581 | * prfill callback for printing a blkg_stat. | 579 | * prfill callback for printing a blkg_stat. |
582 | */ | 580 | */ |
583 | u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off) | 581 | u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off) |
584 | { | 582 | { |
585 | return __blkg_prfill_u64(sf, pd, blkg_stat_read((void *)pd + off)); | 583 | return __blkg_prfill_u64(sf, pd, blkg_stat_read((void *)pd + off)); |
586 | } | 584 | } |
587 | EXPORT_SYMBOL_GPL(blkg_prfill_stat); | 585 | EXPORT_SYMBOL_GPL(blkg_prfill_stat); |
588 | 586 | ||
589 | /** | 587 | /** |
590 | * blkg_prfill_rwstat - prfill callback for blkg_rwstat | 588 | * blkg_prfill_rwstat - prfill callback for blkg_rwstat |
591 | * @sf: seq_file to print to | 589 | * @sf: seq_file to print to |
592 | * @pd: policy private data of interest | 590 | * @pd: policy private data of interest |
593 | * @off: offset to the blkg_rwstat in @pd | 591 | * @off: offset to the blkg_rwstat in @pd |
594 | * | 592 | * |
595 | * prfill callback for printing a blkg_rwstat. | 593 | * prfill callback for printing a blkg_rwstat. |
596 | */ | 594 | */ |
597 | u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, | 595 | u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, |
598 | int off) | 596 | int off) |
599 | { | 597 | { |
600 | struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd + off); | 598 | struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd + off); |
601 | 599 | ||
602 | return __blkg_prfill_rwstat(sf, pd, &rwstat); | 600 | return __blkg_prfill_rwstat(sf, pd, &rwstat); |
603 | } | 601 | } |
604 | EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); | 602 | EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); |
605 | 603 | ||
606 | /** | 604 | /** |
607 | * blkg_stat_recursive_sum - collect hierarchical blkg_stat | 605 | * blkg_stat_recursive_sum - collect hierarchical blkg_stat |
608 | * @pd: policy private data of interest | 606 | * @pd: policy private data of interest |
609 | * @off: offset to the blkg_stat in @pd | 607 | * @off: offset to the blkg_stat in @pd |
610 | * | 608 | * |
611 | * Collect the blkg_stat specified by @off from @pd and all its online | 609 | * Collect the blkg_stat specified by @off from @pd and all its online |
612 | * descendants and return the sum. The caller must be holding the queue | 610 | * descendants and return the sum. The caller must be holding the queue |
613 | * lock for online tests. | 611 | * lock for online tests. |
614 | */ | 612 | */ |
615 | u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off) | 613 | u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off) |
616 | { | 614 | { |
617 | struct blkcg_policy *pol = blkcg_policy[pd->plid]; | 615 | struct blkcg_policy *pol = blkcg_policy[pd->plid]; |
618 | struct blkcg_gq *pos_blkg; | 616 | struct blkcg_gq *pos_blkg; |
619 | struct cgroup *pos_cgrp; | 617 | struct cgroup *pos_cgrp; |
620 | u64 sum; | 618 | u64 sum; |
621 | 619 | ||
622 | lockdep_assert_held(pd->blkg->q->queue_lock); | 620 | lockdep_assert_held(pd->blkg->q->queue_lock); |
623 | 621 | ||
624 | sum = blkg_stat_read((void *)pd + off); | 622 | sum = blkg_stat_read((void *)pd + off); |
625 | 623 | ||
626 | rcu_read_lock(); | 624 | rcu_read_lock(); |
627 | blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) { | 625 | blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) { |
628 | struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); | 626 | struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); |
629 | struct blkg_stat *stat = (void *)pos_pd + off; | 627 | struct blkg_stat *stat = (void *)pos_pd + off; |
630 | 628 | ||
631 | if (pos_blkg->online) | 629 | if (pos_blkg->online) |
632 | sum += blkg_stat_read(stat); | 630 | sum += blkg_stat_read(stat); |
633 | } | 631 | } |
634 | rcu_read_unlock(); | 632 | rcu_read_unlock(); |
635 | 633 | ||
636 | return sum; | 634 | return sum; |
637 | } | 635 | } |
638 | EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); | 636 | EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); |
639 | 637 | ||
640 | /** | 638 | /** |
641 | * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat | 639 | * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat |
642 | * @pd: policy private data of interest | 640 | * @pd: policy private data of interest |
643 | * @off: offset to the blkg_stat in @pd | 641 | * @off: offset to the blkg_stat in @pd |
644 | * | 642 | * |
645 | * Collect the blkg_rwstat specified by @off from @pd and all its online | 643 | * Collect the blkg_rwstat specified by @off from @pd and all its online |
646 | * descendants and return the sum. The caller must be holding the queue | 644 | * descendants and return the sum. The caller must be holding the queue |
647 | * lock for online tests. | 645 | * lock for online tests. |
648 | */ | 646 | */ |
649 | struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, | 647 | struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, |
650 | int off) | 648 | int off) |
651 | { | 649 | { |
652 | struct blkcg_policy *pol = blkcg_policy[pd->plid]; | 650 | struct blkcg_policy *pol = blkcg_policy[pd->plid]; |
653 | struct blkcg_gq *pos_blkg; | 651 | struct blkcg_gq *pos_blkg; |
654 | struct cgroup *pos_cgrp; | 652 | struct cgroup *pos_cgrp; |
655 | struct blkg_rwstat sum; | 653 | struct blkg_rwstat sum; |
656 | int i; | 654 | int i; |
657 | 655 | ||
658 | lockdep_assert_held(pd->blkg->q->queue_lock); | 656 | lockdep_assert_held(pd->blkg->q->queue_lock); |
659 | 657 | ||
660 | sum = blkg_rwstat_read((void *)pd + off); | 658 | sum = blkg_rwstat_read((void *)pd + off); |
661 | 659 | ||
662 | rcu_read_lock(); | 660 | rcu_read_lock(); |
663 | blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) { | 661 | blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) { |
664 | struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); | 662 | struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); |
665 | struct blkg_rwstat *rwstat = (void *)pos_pd + off; | 663 | struct blkg_rwstat *rwstat = (void *)pos_pd + off; |
666 | struct blkg_rwstat tmp; | 664 | struct blkg_rwstat tmp; |
667 | 665 | ||
668 | if (!pos_blkg->online) | 666 | if (!pos_blkg->online) |
669 | continue; | 667 | continue; |
670 | 668 | ||
671 | tmp = blkg_rwstat_read(rwstat); | 669 | tmp = blkg_rwstat_read(rwstat); |
672 | 670 | ||
673 | for (i = 0; i < BLKG_RWSTAT_NR; i++) | 671 | for (i = 0; i < BLKG_RWSTAT_NR; i++) |
674 | sum.cnt[i] += tmp.cnt[i]; | 672 | sum.cnt[i] += tmp.cnt[i]; |
675 | } | 673 | } |
676 | rcu_read_unlock(); | 674 | rcu_read_unlock(); |
677 | 675 | ||
678 | return sum; | 676 | return sum; |
679 | } | 677 | } |
680 | EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum); | 678 | EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum); |
681 | 679 | ||
682 | /** | 680 | /** |
683 | * blkg_conf_prep - parse and prepare for per-blkg config update | 681 | * blkg_conf_prep - parse and prepare for per-blkg config update |
684 | * @blkcg: target block cgroup | 682 | * @blkcg: target block cgroup |
685 | * @pol: target policy | 683 | * @pol: target policy |
686 | * @input: input string | 684 | * @input: input string |
687 | * @ctx: blkg_conf_ctx to be filled | 685 | * @ctx: blkg_conf_ctx to be filled |
688 | * | 686 | * |
689 | * Parse per-blkg config update from @input and initialize @ctx with the | 687 | * Parse per-blkg config update from @input and initialize @ctx with the |
690 | * result. @ctx->blkg points to the blkg to be updated and @ctx->v the new | 688 | * result. @ctx->blkg points to the blkg to be updated and @ctx->v the new |
691 | * value. This function returns with RCU read lock and queue lock held and | 689 | * value. This function returns with RCU read lock and queue lock held and |
692 | * must be paired with blkg_conf_finish(). | 690 | * must be paired with blkg_conf_finish(). |
693 | */ | 691 | */ |
694 | int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, | 692 | int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, |
695 | const char *input, struct blkg_conf_ctx *ctx) | 693 | const char *input, struct blkg_conf_ctx *ctx) |
696 | __acquires(rcu) __acquires(disk->queue->queue_lock) | 694 | __acquires(rcu) __acquires(disk->queue->queue_lock) |
697 | { | 695 | { |
698 | struct gendisk *disk; | 696 | struct gendisk *disk; |
699 | struct blkcg_gq *blkg; | 697 | struct blkcg_gq *blkg; |
700 | unsigned int major, minor; | 698 | unsigned int major, minor; |
701 | unsigned long long v; | 699 | unsigned long long v; |
702 | int part, ret; | 700 | int part, ret; |
703 | 701 | ||
704 | if (sscanf(input, "%u:%u %llu", &major, &minor, &v) != 3) | 702 | if (sscanf(input, "%u:%u %llu", &major, &minor, &v) != 3) |
705 | return -EINVAL; | 703 | return -EINVAL; |
706 | 704 | ||
707 | disk = get_gendisk(MKDEV(major, minor), &part); | 705 | disk = get_gendisk(MKDEV(major, minor), &part); |
708 | if (!disk || part) | 706 | if (!disk || part) |
709 | return -EINVAL; | 707 | return -EINVAL; |
710 | 708 | ||
711 | rcu_read_lock(); | 709 | rcu_read_lock(); |
712 | spin_lock_irq(disk->queue->queue_lock); | 710 | spin_lock_irq(disk->queue->queue_lock); |
713 | 711 | ||
714 | if (blkcg_policy_enabled(disk->queue, pol)) | 712 | if (blkcg_policy_enabled(disk->queue, pol)) |
715 | blkg = blkg_lookup_create(blkcg, disk->queue); | 713 | blkg = blkg_lookup_create(blkcg, disk->queue); |
716 | else | 714 | else |
717 | blkg = ERR_PTR(-EINVAL); | 715 | blkg = ERR_PTR(-EINVAL); |
718 | 716 | ||
719 | if (IS_ERR(blkg)) { | 717 | if (IS_ERR(blkg)) { |
720 | ret = PTR_ERR(blkg); | 718 | ret = PTR_ERR(blkg); |
721 | rcu_read_unlock(); | 719 | rcu_read_unlock(); |
722 | spin_unlock_irq(disk->queue->queue_lock); | 720 | spin_unlock_irq(disk->queue->queue_lock); |
723 | put_disk(disk); | 721 | put_disk(disk); |
724 | /* | 722 | /* |
725 | * If queue was bypassing, we should retry. Do so after a | 723 | * If queue was bypassing, we should retry. Do so after a |
726 | * short msleep(). It isn't strictly necessary but queue | 724 | * short msleep(). It isn't strictly necessary but queue |
727 | * can be bypassing for some time and it's always nice to | 725 | * can be bypassing for some time and it's always nice to |
728 | * avoid busy looping. | 726 | * avoid busy looping. |
729 | */ | 727 | */ |
730 | if (ret == -EBUSY) { | 728 | if (ret == -EBUSY) { |
731 | msleep(10); | 729 | msleep(10); |
732 | ret = restart_syscall(); | 730 | ret = restart_syscall(); |
733 | } | 731 | } |
734 | return ret; | 732 | return ret; |
735 | } | 733 | } |
736 | 734 | ||
737 | ctx->disk = disk; | 735 | ctx->disk = disk; |
738 | ctx->blkg = blkg; | 736 | ctx->blkg = blkg; |
739 | ctx->v = v; | 737 | ctx->v = v; |
740 | return 0; | 738 | return 0; |
741 | } | 739 | } |
742 | EXPORT_SYMBOL_GPL(blkg_conf_prep); | 740 | EXPORT_SYMBOL_GPL(blkg_conf_prep); |
743 | 741 | ||
744 | /** | 742 | /** |
745 | * blkg_conf_finish - finish up per-blkg config update | 743 | * blkg_conf_finish - finish up per-blkg config update |
746 | * @ctx: blkg_conf_ctx intiailized by blkg_conf_prep() | 744 | * @ctx: blkg_conf_ctx intiailized by blkg_conf_prep() |
747 | * | 745 | * |
748 | * Finish up after per-blkg config update. This function must be paired | 746 | * Finish up after per-blkg config update. This function must be paired |
749 | * with blkg_conf_prep(). | 747 | * with blkg_conf_prep(). |
750 | */ | 748 | */ |
751 | void blkg_conf_finish(struct blkg_conf_ctx *ctx) | 749 | void blkg_conf_finish(struct blkg_conf_ctx *ctx) |
752 | __releases(ctx->disk->queue->queue_lock) __releases(rcu) | 750 | __releases(ctx->disk->queue->queue_lock) __releases(rcu) |
753 | { | 751 | { |
754 | spin_unlock_irq(ctx->disk->queue->queue_lock); | 752 | spin_unlock_irq(ctx->disk->queue->queue_lock); |
755 | rcu_read_unlock(); | 753 | rcu_read_unlock(); |
756 | put_disk(ctx->disk); | 754 | put_disk(ctx->disk); |
757 | } | 755 | } |
758 | EXPORT_SYMBOL_GPL(blkg_conf_finish); | 756 | EXPORT_SYMBOL_GPL(blkg_conf_finish); |
759 | 757 | ||
760 | struct cftype blkcg_files[] = { | 758 | struct cftype blkcg_files[] = { |
761 | { | 759 | { |
762 | .name = "reset_stats", | 760 | .name = "reset_stats", |
763 | .write_u64 = blkcg_reset_stats, | 761 | .write_u64 = blkcg_reset_stats, |
764 | }, | 762 | }, |
765 | { } /* terminate */ | 763 | { } /* terminate */ |
766 | }; | 764 | }; |
767 | 765 | ||
768 | /** | 766 | /** |
769 | * blkcg_css_offline - cgroup css_offline callback | 767 | * blkcg_css_offline - cgroup css_offline callback |
770 | * @cgroup: cgroup of interest | 768 | * @cgroup: cgroup of interest |
771 | * | 769 | * |
772 | * This function is called when @cgroup is about to go away and responsible | 770 | * This function is called when @cgroup is about to go away and responsible |
773 | * for shooting down all blkgs associated with @cgroup. blkgs should be | 771 | * for shooting down all blkgs associated with @cgroup. blkgs should be |
774 | * removed while holding both q and blkcg locks. As blkcg lock is nested | 772 | * removed while holding both q and blkcg locks. As blkcg lock is nested |
775 | * inside q lock, this function performs reverse double lock dancing. | 773 | * inside q lock, this function performs reverse double lock dancing. |
776 | * | 774 | * |
777 | * This is the blkcg counterpart of ioc_release_fn(). | 775 | * This is the blkcg counterpart of ioc_release_fn(). |
778 | */ | 776 | */ |
779 | static void blkcg_css_offline(struct cgroup *cgroup) | 777 | static void blkcg_css_offline(struct cgroup *cgroup) |
780 | { | 778 | { |
781 | struct blkcg *blkcg = cgroup_to_blkcg(cgroup); | 779 | struct blkcg *blkcg = cgroup_to_blkcg(cgroup); |
782 | 780 | ||
783 | spin_lock_irq(&blkcg->lock); | 781 | spin_lock_irq(&blkcg->lock); |
784 | 782 | ||
785 | while (!hlist_empty(&blkcg->blkg_list)) { | 783 | while (!hlist_empty(&blkcg->blkg_list)) { |
786 | struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first, | 784 | struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first, |
787 | struct blkcg_gq, blkcg_node); | 785 | struct blkcg_gq, blkcg_node); |
788 | struct request_queue *q = blkg->q; | 786 | struct request_queue *q = blkg->q; |
789 | 787 | ||
790 | if (spin_trylock(q->queue_lock)) { | 788 | if (spin_trylock(q->queue_lock)) { |
791 | blkg_destroy(blkg); | 789 | blkg_destroy(blkg); |
792 | spin_unlock(q->queue_lock); | 790 | spin_unlock(q->queue_lock); |
793 | } else { | 791 | } else { |
794 | spin_unlock_irq(&blkcg->lock); | 792 | spin_unlock_irq(&blkcg->lock); |
795 | cpu_relax(); | 793 | cpu_relax(); |
796 | spin_lock_irq(&blkcg->lock); | 794 | spin_lock_irq(&blkcg->lock); |
797 | } | 795 | } |
798 | } | 796 | } |
799 | 797 | ||
800 | spin_unlock_irq(&blkcg->lock); | 798 | spin_unlock_irq(&blkcg->lock); |
801 | } | 799 | } |
802 | 800 | ||
803 | static void blkcg_css_free(struct cgroup *cgroup) | 801 | static void blkcg_css_free(struct cgroup *cgroup) |
804 | { | 802 | { |
805 | struct blkcg *blkcg = cgroup_to_blkcg(cgroup); | 803 | struct blkcg *blkcg = cgroup_to_blkcg(cgroup); |
806 | 804 | ||
807 | if (blkcg != &blkcg_root) | 805 | if (blkcg != &blkcg_root) |
808 | kfree(blkcg); | 806 | kfree(blkcg); |
809 | } | 807 | } |
810 | 808 | ||
811 | static struct cgroup_subsys_state *blkcg_css_alloc(struct cgroup *cgroup) | 809 | static struct cgroup_subsys_state *blkcg_css_alloc(struct cgroup *cgroup) |
812 | { | 810 | { |
813 | static atomic64_t id_seq = ATOMIC64_INIT(0); | 811 | static atomic64_t id_seq = ATOMIC64_INIT(0); |
814 | struct blkcg *blkcg; | 812 | struct blkcg *blkcg; |
815 | struct cgroup *parent = cgroup->parent; | 813 | struct cgroup *parent = cgroup->parent; |
816 | 814 | ||
817 | if (!parent) { | 815 | if (!parent) { |
818 | blkcg = &blkcg_root; | 816 | blkcg = &blkcg_root; |
819 | goto done; | 817 | goto done; |
820 | } | 818 | } |
821 | 819 | ||
822 | blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); | 820 | blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); |
823 | if (!blkcg) | 821 | if (!blkcg) |
824 | return ERR_PTR(-ENOMEM); | 822 | return ERR_PTR(-ENOMEM); |
825 | 823 | ||
826 | blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT; | 824 | blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT; |
827 | blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT; | 825 | blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT; |
828 | blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */ | 826 | blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */ |
829 | done: | 827 | done: |
830 | spin_lock_init(&blkcg->lock); | 828 | spin_lock_init(&blkcg->lock); |
831 | INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC); | 829 | INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC); |
832 | INIT_HLIST_HEAD(&blkcg->blkg_list); | 830 | INIT_HLIST_HEAD(&blkcg->blkg_list); |
833 | 831 | ||
834 | return &blkcg->css; | 832 | return &blkcg->css; |
835 | } | 833 | } |
836 | 834 | ||
837 | /** | 835 | /** |
838 | * blkcg_init_queue - initialize blkcg part of request queue | 836 | * blkcg_init_queue - initialize blkcg part of request queue |
839 | * @q: request_queue to initialize | 837 | * @q: request_queue to initialize |
840 | * | 838 | * |
841 | * Called from blk_alloc_queue_node(). Responsible for initializing blkcg | 839 | * Called from blk_alloc_queue_node(). Responsible for initializing blkcg |
842 | * part of new request_queue @q. | 840 | * part of new request_queue @q. |
843 | * | 841 | * |
844 | * RETURNS: | 842 | * RETURNS: |
845 | * 0 on success, -errno on failure. | 843 | * 0 on success, -errno on failure. |
846 | */ | 844 | */ |
847 | int blkcg_init_queue(struct request_queue *q) | 845 | int blkcg_init_queue(struct request_queue *q) |
848 | { | 846 | { |
849 | might_sleep(); | 847 | might_sleep(); |
850 | 848 | ||
851 | return blk_throtl_init(q); | 849 | return blk_throtl_init(q); |
852 | } | 850 | } |
853 | 851 | ||
854 | /** | 852 | /** |
855 | * blkcg_drain_queue - drain blkcg part of request_queue | 853 | * blkcg_drain_queue - drain blkcg part of request_queue |
856 | * @q: request_queue to drain | 854 | * @q: request_queue to drain |
857 | * | 855 | * |
858 | * Called from blk_drain_queue(). Responsible for draining blkcg part. | 856 | * Called from blk_drain_queue(). Responsible for draining blkcg part. |
859 | */ | 857 | */ |
860 | void blkcg_drain_queue(struct request_queue *q) | 858 | void blkcg_drain_queue(struct request_queue *q) |
861 | { | 859 | { |
862 | lockdep_assert_held(q->queue_lock); | 860 | lockdep_assert_held(q->queue_lock); |
863 | 861 | ||
864 | blk_throtl_drain(q); | 862 | blk_throtl_drain(q); |
865 | } | 863 | } |
866 | 864 | ||
867 | /** | 865 | /** |
868 | * blkcg_exit_queue - exit and release blkcg part of request_queue | 866 | * blkcg_exit_queue - exit and release blkcg part of request_queue |
869 | * @q: request_queue being released | 867 | * @q: request_queue being released |
870 | * | 868 | * |
871 | * Called from blk_release_queue(). Responsible for exiting blkcg part. | 869 | * Called from blk_release_queue(). Responsible for exiting blkcg part. |
872 | */ | 870 | */ |
873 | void blkcg_exit_queue(struct request_queue *q) | 871 | void blkcg_exit_queue(struct request_queue *q) |
874 | { | 872 | { |
875 | spin_lock_irq(q->queue_lock); | 873 | spin_lock_irq(q->queue_lock); |
876 | blkg_destroy_all(q); | 874 | blkg_destroy_all(q); |
877 | spin_unlock_irq(q->queue_lock); | 875 | spin_unlock_irq(q->queue_lock); |
878 | 876 | ||
879 | blk_throtl_exit(q); | 877 | blk_throtl_exit(q); |
880 | } | 878 | } |
881 | 879 | ||
882 | /* | 880 | /* |
883 | * We cannot support shared io contexts, as we have no mean to support | 881 | * We cannot support shared io contexts, as we have no mean to support |
884 | * two tasks with the same ioc in two different groups without major rework | 882 | * two tasks with the same ioc in two different groups without major rework |
885 | * of the main cic data structures. For now we allow a task to change | 883 | * of the main cic data structures. For now we allow a task to change |
886 | * its cgroup only if it's the only owner of its ioc. | 884 | * its cgroup only if it's the only owner of its ioc. |
887 | */ | 885 | */ |
888 | static int blkcg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | 886 | static int blkcg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) |
889 | { | 887 | { |
890 | struct task_struct *task; | 888 | struct task_struct *task; |
891 | struct io_context *ioc; | 889 | struct io_context *ioc; |
892 | int ret = 0; | 890 | int ret = 0; |
893 | 891 | ||
894 | /* task_lock() is needed to avoid races with exit_io_context() */ | 892 | /* task_lock() is needed to avoid races with exit_io_context() */ |
895 | cgroup_taskset_for_each(task, cgrp, tset) { | 893 | cgroup_taskset_for_each(task, cgrp, tset) { |
896 | task_lock(task); | 894 | task_lock(task); |
897 | ioc = task->io_context; | 895 | ioc = task->io_context; |
898 | if (ioc && atomic_read(&ioc->nr_tasks) > 1) | 896 | if (ioc && atomic_read(&ioc->nr_tasks) > 1) |
899 | ret = -EINVAL; | 897 | ret = -EINVAL; |
900 | task_unlock(task); | 898 | task_unlock(task); |
901 | if (ret) | 899 | if (ret) |
902 | break; | 900 | break; |
903 | } | 901 | } |
904 | return ret; | 902 | return ret; |
905 | } | 903 | } |
906 | 904 | ||
907 | struct cgroup_subsys blkio_subsys = { | 905 | struct cgroup_subsys blkio_subsys = { |
908 | .name = "blkio", | 906 | .name = "blkio", |
909 | .css_alloc = blkcg_css_alloc, | 907 | .css_alloc = blkcg_css_alloc, |
910 | .css_offline = blkcg_css_offline, | 908 | .css_offline = blkcg_css_offline, |
911 | .css_free = blkcg_css_free, | 909 | .css_free = blkcg_css_free, |
912 | .can_attach = blkcg_can_attach, | 910 | .can_attach = blkcg_can_attach, |
913 | .subsys_id = blkio_subsys_id, | 911 | .subsys_id = blkio_subsys_id, |
914 | .base_cftypes = blkcg_files, | 912 | .base_cftypes = blkcg_files, |
915 | .module = THIS_MODULE, | 913 | .module = THIS_MODULE, |
916 | 914 | ||
917 | /* | 915 | /* |
918 | * blkio subsystem is utterly broken in terms of hierarchy support. | 916 | * blkio subsystem is utterly broken in terms of hierarchy support. |
919 | * It treats all cgroups equally regardless of where they're | 917 | * It treats all cgroups equally regardless of where they're |
920 | * located in the hierarchy - all cgroups are treated as if they're | 918 | * located in the hierarchy - all cgroups are treated as if they're |
921 | * right below the root. Fix it and remove the following. | 919 | * right below the root. Fix it and remove the following. |
922 | */ | 920 | */ |
923 | .broken_hierarchy = true, | 921 | .broken_hierarchy = true, |
924 | }; | 922 | }; |
925 | EXPORT_SYMBOL_GPL(blkio_subsys); | 923 | EXPORT_SYMBOL_GPL(blkio_subsys); |
926 | 924 | ||
927 | /** | 925 | /** |
928 | * blkcg_activate_policy - activate a blkcg policy on a request_queue | 926 | * blkcg_activate_policy - activate a blkcg policy on a request_queue |
929 | * @q: request_queue of interest | 927 | * @q: request_queue of interest |
930 | * @pol: blkcg policy to activate | 928 | * @pol: blkcg policy to activate |
931 | * | 929 | * |
932 | * Activate @pol on @q. Requires %GFP_KERNEL context. @q goes through | 930 | * Activate @pol on @q. Requires %GFP_KERNEL context. @q goes through |
933 | * bypass mode to populate its blkgs with policy_data for @pol. | 931 | * bypass mode to populate its blkgs with policy_data for @pol. |
934 | * | 932 | * |
935 | * Activation happens with @q bypassed, so nobody would be accessing blkgs | 933 | * Activation happens with @q bypassed, so nobody would be accessing blkgs |
936 | * from IO path. Update of each blkg is protected by both queue and blkcg | 934 | * from IO path. Update of each blkg is protected by both queue and blkcg |
937 | * locks so that holding either lock and testing blkcg_policy_enabled() is | 935 | * locks so that holding either lock and testing blkcg_policy_enabled() is |
938 | * always enough for dereferencing policy data. | 936 | * always enough for dereferencing policy data. |
939 | * | 937 | * |
940 | * The caller is responsible for synchronizing [de]activations and policy | 938 | * The caller is responsible for synchronizing [de]activations and policy |
941 | * [un]registerations. Returns 0 on success, -errno on failure. | 939 | * [un]registerations. Returns 0 on success, -errno on failure. |
942 | */ | 940 | */ |
943 | int blkcg_activate_policy(struct request_queue *q, | 941 | int blkcg_activate_policy(struct request_queue *q, |
944 | const struct blkcg_policy *pol) | 942 | const struct blkcg_policy *pol) |
945 | { | 943 | { |
946 | LIST_HEAD(pds); | 944 | LIST_HEAD(pds); |
947 | struct blkcg_gq *blkg, *new_blkg; | 945 | struct blkcg_gq *blkg, *new_blkg; |
948 | struct blkg_policy_data *pd, *n; | 946 | struct blkg_policy_data *pd, *n; |
949 | int cnt = 0, ret; | 947 | int cnt = 0, ret; |
950 | bool preloaded; | 948 | bool preloaded; |
951 | 949 | ||
952 | if (blkcg_policy_enabled(q, pol)) | 950 | if (blkcg_policy_enabled(q, pol)) |
953 | return 0; | 951 | return 0; |
954 | 952 | ||
955 | /* preallocations for root blkg */ | 953 | /* preallocations for root blkg */ |
956 | new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL); | 954 | new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL); |
957 | if (!new_blkg) | 955 | if (!new_blkg) |
958 | return -ENOMEM; | 956 | return -ENOMEM; |
959 | 957 | ||
960 | blk_queue_bypass_start(q); | 958 | blk_queue_bypass_start(q); |
961 | 959 | ||
962 | preloaded = !radix_tree_preload(GFP_KERNEL); | 960 | preloaded = !radix_tree_preload(GFP_KERNEL); |
963 | 961 | ||
964 | /* | 962 | /* |
965 | * Make sure the root blkg exists and count the existing blkgs. As | 963 | * Make sure the root blkg exists and count the existing blkgs. As |
966 | * @q is bypassing at this point, blkg_lookup_create() can't be | 964 | * @q is bypassing at this point, blkg_lookup_create() can't be |
967 | * used. Open code it. | 965 | * used. Open code it. |
968 | */ | 966 | */ |
969 | spin_lock_irq(q->queue_lock); | 967 | spin_lock_irq(q->queue_lock); |
970 | 968 | ||
971 | rcu_read_lock(); | 969 | rcu_read_lock(); |
972 | blkg = __blkg_lookup(&blkcg_root, q, false); | 970 | blkg = __blkg_lookup(&blkcg_root, q, false); |
973 | if (blkg) | 971 | if (blkg) |
974 | blkg_free(new_blkg); | 972 | blkg_free(new_blkg); |
975 | else | 973 | else |
976 | blkg = blkg_create(&blkcg_root, q, new_blkg); | 974 | blkg = blkg_create(&blkcg_root, q, new_blkg); |
977 | rcu_read_unlock(); | 975 | rcu_read_unlock(); |
978 | 976 | ||
979 | if (preloaded) | 977 | if (preloaded) |
980 | radix_tree_preload_end(); | 978 | radix_tree_preload_end(); |
981 | 979 | ||
982 | if (IS_ERR(blkg)) { | 980 | if (IS_ERR(blkg)) { |
983 | ret = PTR_ERR(blkg); | 981 | ret = PTR_ERR(blkg); |
984 | goto out_unlock; | 982 | goto out_unlock; |
985 | } | 983 | } |
986 | q->root_blkg = blkg; | 984 | q->root_blkg = blkg; |
987 | q->root_rl.blkg = blkg; | 985 | q->root_rl.blkg = blkg; |
988 | 986 | ||
989 | list_for_each_entry(blkg, &q->blkg_list, q_node) | 987 | list_for_each_entry(blkg, &q->blkg_list, q_node) |
990 | cnt++; | 988 | cnt++; |
991 | 989 | ||
992 | spin_unlock_irq(q->queue_lock); | 990 | spin_unlock_irq(q->queue_lock); |
993 | 991 | ||
994 | /* allocate policy_data for all existing blkgs */ | 992 | /* allocate policy_data for all existing blkgs */ |
995 | while (cnt--) { | 993 | while (cnt--) { |
996 | pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node); | 994 | pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node); |
997 | if (!pd) { | 995 | if (!pd) { |
998 | ret = -ENOMEM; | 996 | ret = -ENOMEM; |
999 | goto out_free; | 997 | goto out_free; |
1000 | } | 998 | } |
1001 | list_add_tail(&pd->alloc_node, &pds); | 999 | list_add_tail(&pd->alloc_node, &pds); |
1002 | } | 1000 | } |
1003 | 1001 | ||
1004 | /* | 1002 | /* |
1005 | * Install the allocated pds. With @q bypassing, no new blkg | 1003 | * Install the allocated pds. With @q bypassing, no new blkg |
1006 | * should have been created while the queue lock was dropped. | 1004 | * should have been created while the queue lock was dropped. |
1007 | */ | 1005 | */ |
1008 | spin_lock_irq(q->queue_lock); | 1006 | spin_lock_irq(q->queue_lock); |
1009 | 1007 | ||
1010 | list_for_each_entry(blkg, &q->blkg_list, q_node) { | 1008 | list_for_each_entry(blkg, &q->blkg_list, q_node) { |
1011 | if (WARN_ON(list_empty(&pds))) { | 1009 | if (WARN_ON(list_empty(&pds))) { |
1012 | /* umm... this shouldn't happen, just abort */ | 1010 | /* umm... this shouldn't happen, just abort */ |
1013 | ret = -ENOMEM; | 1011 | ret = -ENOMEM; |
1014 | goto out_unlock; | 1012 | goto out_unlock; |
1015 | } | 1013 | } |
1016 | pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node); | 1014 | pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node); |
1017 | list_del_init(&pd->alloc_node); | 1015 | list_del_init(&pd->alloc_node); |
1018 | 1016 | ||
1019 | /* grab blkcg lock too while installing @pd on @blkg */ | 1017 | /* grab blkcg lock too while installing @pd on @blkg */ |
1020 | spin_lock(&blkg->blkcg->lock); | 1018 | spin_lock(&blkg->blkcg->lock); |
1021 | 1019 | ||
1022 | blkg->pd[pol->plid] = pd; | 1020 | blkg->pd[pol->plid] = pd; |
1023 | pd->blkg = blkg; | 1021 | pd->blkg = blkg; |
1024 | pd->plid = pol->plid; | 1022 | pd->plid = pol->plid; |
1025 | pol->pd_init_fn(blkg); | 1023 | pol->pd_init_fn(blkg); |
1026 | 1024 | ||
1027 | spin_unlock(&blkg->blkcg->lock); | 1025 | spin_unlock(&blkg->blkcg->lock); |
1028 | } | 1026 | } |
1029 | 1027 | ||
1030 | __set_bit(pol->plid, q->blkcg_pols); | 1028 | __set_bit(pol->plid, q->blkcg_pols); |
1031 | ret = 0; | 1029 | ret = 0; |
1032 | out_unlock: | 1030 | out_unlock: |
1033 | spin_unlock_irq(q->queue_lock); | 1031 | spin_unlock_irq(q->queue_lock); |
1034 | out_free: | 1032 | out_free: |
1035 | blk_queue_bypass_end(q); | 1033 | blk_queue_bypass_end(q); |
1036 | list_for_each_entry_safe(pd, n, &pds, alloc_node) | 1034 | list_for_each_entry_safe(pd, n, &pds, alloc_node) |
1037 | kfree(pd); | 1035 | kfree(pd); |
1038 | return ret; | 1036 | return ret; |
1039 | } | 1037 | } |
1040 | EXPORT_SYMBOL_GPL(blkcg_activate_policy); | 1038 | EXPORT_SYMBOL_GPL(blkcg_activate_policy); |
1041 | 1039 | ||
1042 | /** | 1040 | /** |
1043 | * blkcg_deactivate_policy - deactivate a blkcg policy on a request_queue | 1041 | * blkcg_deactivate_policy - deactivate a blkcg policy on a request_queue |
1044 | * @q: request_queue of interest | 1042 | * @q: request_queue of interest |
1045 | * @pol: blkcg policy to deactivate | 1043 | * @pol: blkcg policy to deactivate |
1046 | * | 1044 | * |
1047 | * Deactivate @pol on @q. Follows the same synchronization rules as | 1045 | * Deactivate @pol on @q. Follows the same synchronization rules as |
1048 | * blkcg_activate_policy(). | 1046 | * blkcg_activate_policy(). |
1049 | */ | 1047 | */ |
1050 | void blkcg_deactivate_policy(struct request_queue *q, | 1048 | void blkcg_deactivate_policy(struct request_queue *q, |
1051 | const struct blkcg_policy *pol) | 1049 | const struct blkcg_policy *pol) |
1052 | { | 1050 | { |
1053 | struct blkcg_gq *blkg; | 1051 | struct blkcg_gq *blkg; |
1054 | 1052 | ||
1055 | if (!blkcg_policy_enabled(q, pol)) | 1053 | if (!blkcg_policy_enabled(q, pol)) |
1056 | return; | 1054 | return; |
1057 | 1055 | ||
1058 | blk_queue_bypass_start(q); | 1056 | blk_queue_bypass_start(q); |
1059 | spin_lock_irq(q->queue_lock); | 1057 | spin_lock_irq(q->queue_lock); |
1060 | 1058 | ||
1061 | __clear_bit(pol->plid, q->blkcg_pols); | 1059 | __clear_bit(pol->plid, q->blkcg_pols); |
1062 | 1060 | ||
1063 | /* if no policy is left, no need for blkgs - shoot them down */ | 1061 | /* if no policy is left, no need for blkgs - shoot them down */ |
1064 | if (bitmap_empty(q->blkcg_pols, BLKCG_MAX_POLS)) | 1062 | if (bitmap_empty(q->blkcg_pols, BLKCG_MAX_POLS)) |
1065 | blkg_destroy_all(q); | 1063 | blkg_destroy_all(q); |
1066 | 1064 | ||
1067 | list_for_each_entry(blkg, &q->blkg_list, q_node) { | 1065 | list_for_each_entry(blkg, &q->blkg_list, q_node) { |
1068 | /* grab blkcg lock too while removing @pd from @blkg */ | 1066 | /* grab blkcg lock too while removing @pd from @blkg */ |
1069 | spin_lock(&blkg->blkcg->lock); | 1067 | spin_lock(&blkg->blkcg->lock); |
1070 | 1068 | ||
1071 | if (pol->pd_offline_fn) | 1069 | if (pol->pd_offline_fn) |
1072 | pol->pd_offline_fn(blkg); | 1070 | pol->pd_offline_fn(blkg); |
1073 | if (pol->pd_exit_fn) | 1071 | if (pol->pd_exit_fn) |
1074 | pol->pd_exit_fn(blkg); | 1072 | pol->pd_exit_fn(blkg); |
1075 | 1073 | ||
1076 | kfree(blkg->pd[pol->plid]); | 1074 | kfree(blkg->pd[pol->plid]); |
1077 | blkg->pd[pol->plid] = NULL; | 1075 | blkg->pd[pol->plid] = NULL; |
1078 | 1076 | ||
1079 | spin_unlock(&blkg->blkcg->lock); | 1077 | spin_unlock(&blkg->blkcg->lock); |
1080 | } | 1078 | } |
1081 | 1079 | ||
1082 | spin_unlock_irq(q->queue_lock); | 1080 | spin_unlock_irq(q->queue_lock); |
1083 | blk_queue_bypass_end(q); | 1081 | blk_queue_bypass_end(q); |
1084 | } | 1082 | } |
1085 | EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); | 1083 | EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); |
1086 | 1084 | ||
1087 | /** | 1085 | /** |
1088 | * blkcg_policy_register - register a blkcg policy | 1086 | * blkcg_policy_register - register a blkcg policy |
1089 | * @pol: blkcg policy to register | 1087 | * @pol: blkcg policy to register |
1090 | * | 1088 | * |
1091 | * Register @pol with blkcg core. Might sleep and @pol may be modified on | 1089 | * Register @pol with blkcg core. Might sleep and @pol may be modified on |
1092 | * successful registration. Returns 0 on success and -errno on failure. | 1090 | * successful registration. Returns 0 on success and -errno on failure. |
1093 | */ | 1091 | */ |
1094 | int blkcg_policy_register(struct blkcg_policy *pol) | 1092 | int blkcg_policy_register(struct blkcg_policy *pol) |
1095 | { | 1093 | { |
1096 | int i, ret; | 1094 | int i, ret; |
1097 | 1095 | ||
1098 | if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data))) | 1096 | if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data))) |
1099 | return -EINVAL; | 1097 | return -EINVAL; |
1100 | 1098 | ||
1101 | mutex_lock(&blkcg_pol_mutex); | 1099 | mutex_lock(&blkcg_pol_mutex); |
1102 | 1100 | ||
1103 | /* find an empty slot */ | 1101 | /* find an empty slot */ |
1104 | ret = -ENOSPC; | 1102 | ret = -ENOSPC; |
1105 | for (i = 0; i < BLKCG_MAX_POLS; i++) | 1103 | for (i = 0; i < BLKCG_MAX_POLS; i++) |
1106 | if (!blkcg_policy[i]) | 1104 | if (!blkcg_policy[i]) |
1107 | break; | 1105 | break; |
1108 | if (i >= BLKCG_MAX_POLS) | 1106 | if (i >= BLKCG_MAX_POLS) |
1109 | goto out_unlock; | 1107 | goto out_unlock; |
1110 | 1108 | ||
1111 | /* register and update blkgs */ | 1109 | /* register and update blkgs */ |
1112 | pol->plid = i; | 1110 | pol->plid = i; |
1113 | blkcg_policy[i] = pol; | 1111 | blkcg_policy[i] = pol; |
1114 | 1112 | ||
1115 | /* everything is in place, add intf files for the new policy */ | 1113 | /* everything is in place, add intf files for the new policy */ |
1116 | if (pol->cftypes) | 1114 | if (pol->cftypes) |
1117 | WARN_ON(cgroup_add_cftypes(&blkio_subsys, pol->cftypes)); | 1115 | WARN_ON(cgroup_add_cftypes(&blkio_subsys, pol->cftypes)); |
1118 | ret = 0; | 1116 | ret = 0; |
1119 | out_unlock: | 1117 | out_unlock: |
1120 | mutex_unlock(&blkcg_pol_mutex); | 1118 | mutex_unlock(&blkcg_pol_mutex); |
1121 | return ret; | 1119 | return ret; |
1122 | } | 1120 | } |
1123 | EXPORT_SYMBOL_GPL(blkcg_policy_register); | 1121 | EXPORT_SYMBOL_GPL(blkcg_policy_register); |
1124 | 1122 | ||
1125 | /** | 1123 | /** |
1126 | * blkcg_policy_unregister - unregister a blkcg policy | 1124 | * blkcg_policy_unregister - unregister a blkcg policy |
1127 | * @pol: blkcg policy to unregister | 1125 | * @pol: blkcg policy to unregister |
1128 | * | 1126 | * |
1129 | * Undo blkcg_policy_register(@pol). Might sleep. | 1127 | * Undo blkcg_policy_register(@pol). Might sleep. |
1130 | */ | 1128 | */ |
1131 | void blkcg_policy_unregister(struct blkcg_policy *pol) | 1129 | void blkcg_policy_unregister(struct blkcg_policy *pol) |
1132 | { | 1130 | { |
1133 | mutex_lock(&blkcg_pol_mutex); | 1131 | mutex_lock(&blkcg_pol_mutex); |
1134 | 1132 | ||
1135 | if (WARN_ON(blkcg_policy[pol->plid] != pol)) | 1133 | if (WARN_ON(blkcg_policy[pol->plid] != pol)) |
1136 | goto out_unlock; | 1134 | goto out_unlock; |
1137 | 1135 |
block/blk-cgroup.h
1 | #ifndef _BLK_CGROUP_H | 1 | #ifndef _BLK_CGROUP_H |
2 | #define _BLK_CGROUP_H | 2 | #define _BLK_CGROUP_H |
3 | /* | 3 | /* |
4 | * Common Block IO controller cgroup interface | 4 | * Common Block IO controller cgroup interface |
5 | * | 5 | * |
6 | * Based on ideas and code from CFQ, CFS and BFQ: | 6 | * Based on ideas and code from CFQ, CFS and BFQ: |
7 | * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> | 7 | * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> |
8 | * | 8 | * |
9 | * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> | 9 | * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> |
10 | * Paolo Valente <paolo.valente@unimore.it> | 10 | * Paolo Valente <paolo.valente@unimore.it> |
11 | * | 11 | * |
12 | * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> | 12 | * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> |
13 | * Nauman Rafique <nauman@google.com> | 13 | * Nauman Rafique <nauman@google.com> |
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <linux/cgroup.h> | 16 | #include <linux/cgroup.h> |
17 | #include <linux/u64_stats_sync.h> | 17 | #include <linux/u64_stats_sync.h> |
18 | #include <linux/seq_file.h> | 18 | #include <linux/seq_file.h> |
19 | #include <linux/radix-tree.h> | 19 | #include <linux/radix-tree.h> |
20 | #include <linux/blkdev.h> | 20 | #include <linux/blkdev.h> |
21 | 21 | ||
22 | /* Max limits for throttle policy */ | 22 | /* Max limits for throttle policy */ |
23 | #define THROTL_IOPS_MAX UINT_MAX | 23 | #define THROTL_IOPS_MAX UINT_MAX |
24 | 24 | ||
25 | /* CFQ specific, out here for blkcg->cfq_weight */ | 25 | /* CFQ specific, out here for blkcg->cfq_weight */ |
26 | #define CFQ_WEIGHT_MIN 10 | 26 | #define CFQ_WEIGHT_MIN 10 |
27 | #define CFQ_WEIGHT_MAX 1000 | 27 | #define CFQ_WEIGHT_MAX 1000 |
28 | #define CFQ_WEIGHT_DEFAULT 500 | 28 | #define CFQ_WEIGHT_DEFAULT 500 |
29 | 29 | ||
30 | #ifdef CONFIG_BLK_CGROUP | 30 | #ifdef CONFIG_BLK_CGROUP |
31 | 31 | ||
32 | enum blkg_rwstat_type { | 32 | enum blkg_rwstat_type { |
33 | BLKG_RWSTAT_READ, | 33 | BLKG_RWSTAT_READ, |
34 | BLKG_RWSTAT_WRITE, | 34 | BLKG_RWSTAT_WRITE, |
35 | BLKG_RWSTAT_SYNC, | 35 | BLKG_RWSTAT_SYNC, |
36 | BLKG_RWSTAT_ASYNC, | 36 | BLKG_RWSTAT_ASYNC, |
37 | 37 | ||
38 | BLKG_RWSTAT_NR, | 38 | BLKG_RWSTAT_NR, |
39 | BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR, | 39 | BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR, |
40 | }; | 40 | }; |
41 | 41 | ||
42 | struct blkcg_gq; | 42 | struct blkcg_gq; |
43 | 43 | ||
44 | struct blkcg { | 44 | struct blkcg { |
45 | struct cgroup_subsys_state css; | 45 | struct cgroup_subsys_state css; |
46 | spinlock_t lock; | 46 | spinlock_t lock; |
47 | 47 | ||
48 | struct radix_tree_root blkg_tree; | 48 | struct radix_tree_root blkg_tree; |
49 | struct blkcg_gq *blkg_hint; | 49 | struct blkcg_gq *blkg_hint; |
50 | struct hlist_head blkg_list; | 50 | struct hlist_head blkg_list; |
51 | 51 | ||
52 | /* for policies to test whether associated blkcg has changed */ | 52 | /* for policies to test whether associated blkcg has changed */ |
53 | uint64_t id; | 53 | uint64_t id; |
54 | 54 | ||
55 | /* TODO: per-policy storage in blkcg */ | 55 | /* TODO: per-policy storage in blkcg */ |
56 | unsigned int cfq_weight; /* belongs to cfq */ | 56 | unsigned int cfq_weight; /* belongs to cfq */ |
57 | unsigned int cfq_leaf_weight; | 57 | unsigned int cfq_leaf_weight; |
58 | }; | 58 | }; |
59 | 59 | ||
60 | struct blkg_stat { | 60 | struct blkg_stat { |
61 | struct u64_stats_sync syncp; | 61 | struct u64_stats_sync syncp; |
62 | uint64_t cnt; | 62 | uint64_t cnt; |
63 | }; | 63 | }; |
64 | 64 | ||
65 | struct blkg_rwstat { | 65 | struct blkg_rwstat { |
66 | struct u64_stats_sync syncp; | 66 | struct u64_stats_sync syncp; |
67 | uint64_t cnt[BLKG_RWSTAT_NR]; | 67 | uint64_t cnt[BLKG_RWSTAT_NR]; |
68 | }; | 68 | }; |
69 | 69 | ||
70 | /* | 70 | /* |
71 | * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a | 71 | * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a |
72 | * request_queue (q). This is used by blkcg policies which need to track | 72 | * request_queue (q). This is used by blkcg policies which need to track |
73 | * information per blkcg - q pair. | 73 | * information per blkcg - q pair. |
74 | * | 74 | * |
75 | * There can be multiple active blkcg policies and each has its private | 75 | * There can be multiple active blkcg policies and each has its private |
76 | * data on each blkg, the size of which is determined by | 76 | * data on each blkg, the size of which is determined by |
77 | * blkcg_policy->pd_size. blkcg core allocates and frees such areas | 77 | * blkcg_policy->pd_size. blkcg core allocates and frees such areas |
78 | * together with blkg and invokes pd_init/exit_fn() methods. | 78 | * together with blkg and invokes pd_init/exit_fn() methods. |
79 | * | 79 | * |
80 | * Such private data must embed struct blkg_policy_data (pd) at the | 80 | * Such private data must embed struct blkg_policy_data (pd) at the |
81 | * beginning and pd_size can't be smaller than pd. | 81 | * beginning and pd_size can't be smaller than pd. |
82 | */ | 82 | */ |
83 | struct blkg_policy_data { | 83 | struct blkg_policy_data { |
84 | /* the blkg and policy id this per-policy data belongs to */ | 84 | /* the blkg and policy id this per-policy data belongs to */ |
85 | struct blkcg_gq *blkg; | 85 | struct blkcg_gq *blkg; |
86 | int plid; | 86 | int plid; |
87 | 87 | ||
88 | /* used during policy activation */ | 88 | /* used during policy activation */ |
89 | struct list_head alloc_node; | 89 | struct list_head alloc_node; |
90 | }; | 90 | }; |
91 | 91 | ||
92 | /* association between a blk cgroup and a request queue */ | 92 | /* association between a blk cgroup and a request queue */ |
93 | struct blkcg_gq { | 93 | struct blkcg_gq { |
94 | /* Pointer to the associated request_queue */ | 94 | /* Pointer to the associated request_queue */ |
95 | struct request_queue *q; | 95 | struct request_queue *q; |
96 | struct list_head q_node; | 96 | struct list_head q_node; |
97 | struct hlist_node blkcg_node; | 97 | struct hlist_node blkcg_node; |
98 | struct blkcg *blkcg; | 98 | struct blkcg *blkcg; |
99 | 99 | ||
100 | /* all non-root blkcg_gq's are guaranteed to have access to parent */ | 100 | /* all non-root blkcg_gq's are guaranteed to have access to parent */ |
101 | struct blkcg_gq *parent; | 101 | struct blkcg_gq *parent; |
102 | 102 | ||
103 | /* request allocation list for this blkcg-q pair */ | 103 | /* request allocation list for this blkcg-q pair */ |
104 | struct request_list rl; | 104 | struct request_list rl; |
105 | 105 | ||
106 | /* reference count */ | 106 | /* reference count */ |
107 | int refcnt; | 107 | int refcnt; |
108 | 108 | ||
109 | /* is this blkg online? protected by both blkcg and q locks */ | 109 | /* is this blkg online? protected by both blkcg and q locks */ |
110 | bool online; | 110 | bool online; |
111 | 111 | ||
112 | struct blkg_policy_data *pd[BLKCG_MAX_POLS]; | 112 | struct blkg_policy_data *pd[BLKCG_MAX_POLS]; |
113 | 113 | ||
114 | struct rcu_head rcu_head; | 114 | struct rcu_head rcu_head; |
115 | }; | 115 | }; |
116 | 116 | ||
117 | typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg); | 117 | typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg); |
118 | typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg); | 118 | typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg); |
119 | typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg); | 119 | typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg); |
120 | typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg); | 120 | typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg); |
121 | typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg); | 121 | typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg); |
122 | 122 | ||
123 | struct blkcg_policy { | 123 | struct blkcg_policy { |
124 | int plid; | 124 | int plid; |
125 | /* policy specific private data size */ | 125 | /* policy specific private data size */ |
126 | size_t pd_size; | 126 | size_t pd_size; |
127 | /* cgroup files for the policy */ | 127 | /* cgroup files for the policy */ |
128 | struct cftype *cftypes; | 128 | struct cftype *cftypes; |
129 | 129 | ||
130 | /* operations */ | 130 | /* operations */ |
131 | blkcg_pol_init_pd_fn *pd_init_fn; | 131 | blkcg_pol_init_pd_fn *pd_init_fn; |
132 | blkcg_pol_online_pd_fn *pd_online_fn; | 132 | blkcg_pol_online_pd_fn *pd_online_fn; |
133 | blkcg_pol_offline_pd_fn *pd_offline_fn; | 133 | blkcg_pol_offline_pd_fn *pd_offline_fn; |
134 | blkcg_pol_exit_pd_fn *pd_exit_fn; | 134 | blkcg_pol_exit_pd_fn *pd_exit_fn; |
135 | blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; | 135 | blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; |
136 | }; | 136 | }; |
137 | 137 | ||
138 | extern struct blkcg blkcg_root; | 138 | extern struct blkcg blkcg_root; |
139 | 139 | ||
140 | struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q); | 140 | struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q); |
141 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, | 141 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, |
142 | struct request_queue *q); | 142 | struct request_queue *q); |
143 | int blkcg_init_queue(struct request_queue *q); | 143 | int blkcg_init_queue(struct request_queue *q); |
144 | void blkcg_drain_queue(struct request_queue *q); | 144 | void blkcg_drain_queue(struct request_queue *q); |
145 | void blkcg_exit_queue(struct request_queue *q); | 145 | void blkcg_exit_queue(struct request_queue *q); |
146 | 146 | ||
147 | /* Blkio controller policy registration */ | 147 | /* Blkio controller policy registration */ |
148 | int blkcg_policy_register(struct blkcg_policy *pol); | 148 | int blkcg_policy_register(struct blkcg_policy *pol); |
149 | void blkcg_policy_unregister(struct blkcg_policy *pol); | 149 | void blkcg_policy_unregister(struct blkcg_policy *pol); |
150 | int blkcg_activate_policy(struct request_queue *q, | 150 | int blkcg_activate_policy(struct request_queue *q, |
151 | const struct blkcg_policy *pol); | 151 | const struct blkcg_policy *pol); |
152 | void blkcg_deactivate_policy(struct request_queue *q, | 152 | void blkcg_deactivate_policy(struct request_queue *q, |
153 | const struct blkcg_policy *pol); | 153 | const struct blkcg_policy *pol); |
154 | 154 | ||
155 | void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, | 155 | void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, |
156 | u64 (*prfill)(struct seq_file *, | 156 | u64 (*prfill)(struct seq_file *, |
157 | struct blkg_policy_data *, int), | 157 | struct blkg_policy_data *, int), |
158 | const struct blkcg_policy *pol, int data, | 158 | const struct blkcg_policy *pol, int data, |
159 | bool show_total); | 159 | bool show_total); |
160 | u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); | 160 | u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); |
161 | u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, | 161 | u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, |
162 | const struct blkg_rwstat *rwstat); | 162 | const struct blkg_rwstat *rwstat); |
163 | u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); | 163 | u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); |
164 | u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, | 164 | u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, |
165 | int off); | 165 | int off); |
166 | 166 | ||
167 | u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off); | 167 | u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off); |
168 | struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, | 168 | struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, |
169 | int off); | 169 | int off); |
170 | 170 | ||
171 | struct blkg_conf_ctx { | 171 | struct blkg_conf_ctx { |
172 | struct gendisk *disk; | 172 | struct gendisk *disk; |
173 | struct blkcg_gq *blkg; | 173 | struct blkcg_gq *blkg; |
174 | u64 v; | 174 | u64 v; |
175 | }; | 175 | }; |
176 | 176 | ||
177 | int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, | 177 | int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, |
178 | const char *input, struct blkg_conf_ctx *ctx); | 178 | const char *input, struct blkg_conf_ctx *ctx); |
179 | void blkg_conf_finish(struct blkg_conf_ctx *ctx); | 179 | void blkg_conf_finish(struct blkg_conf_ctx *ctx); |
180 | 180 | ||
181 | 181 | ||
182 | static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) | 182 | static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) |
183 | { | 183 | { |
184 | return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), | 184 | return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), |
185 | struct blkcg, css); | 185 | struct blkcg, css); |
186 | } | 186 | } |
187 | 187 | ||
188 | static inline struct blkcg *task_blkcg(struct task_struct *tsk) | 188 | static inline struct blkcg *task_blkcg(struct task_struct *tsk) |
189 | { | 189 | { |
190 | return container_of(task_subsys_state(tsk, blkio_subsys_id), | 190 | return container_of(task_subsys_state(tsk, blkio_subsys_id), |
191 | struct blkcg, css); | 191 | struct blkcg, css); |
192 | } | 192 | } |
193 | 193 | ||
194 | static inline struct blkcg *bio_blkcg(struct bio *bio) | 194 | static inline struct blkcg *bio_blkcg(struct bio *bio) |
195 | { | 195 | { |
196 | if (bio && bio->bi_css) | 196 | if (bio && bio->bi_css) |
197 | return container_of(bio->bi_css, struct blkcg, css); | 197 | return container_of(bio->bi_css, struct blkcg, css); |
198 | return task_blkcg(current); | 198 | return task_blkcg(current); |
199 | } | 199 | } |
200 | 200 | ||
201 | /** | 201 | /** |
202 | * blkcg_parent - get the parent of a blkcg | 202 | * blkcg_parent - get the parent of a blkcg |
203 | * @blkcg: blkcg of interest | 203 | * @blkcg: blkcg of interest |
204 | * | 204 | * |
205 | * Return the parent blkcg of @blkcg. Can be called anytime. | 205 | * Return the parent blkcg of @blkcg. Can be called anytime. |
206 | */ | 206 | */ |
207 | static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) | 207 | static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) |
208 | { | 208 | { |
209 | struct cgroup *pcg = blkcg->css.cgroup->parent; | 209 | struct cgroup *pcg = blkcg->css.cgroup->parent; |
210 | 210 | ||
211 | return pcg ? cgroup_to_blkcg(pcg) : NULL; | 211 | return pcg ? cgroup_to_blkcg(pcg) : NULL; |
212 | } | 212 | } |
213 | 213 | ||
214 | /** | 214 | /** |
215 | * blkg_to_pdata - get policy private data | 215 | * blkg_to_pdata - get policy private data |
216 | * @blkg: blkg of interest | 216 | * @blkg: blkg of interest |
217 | * @pol: policy of interest | 217 | * @pol: policy of interest |
218 | * | 218 | * |
219 | * Return pointer to private data associated with the @blkg-@pol pair. | 219 | * Return pointer to private data associated with the @blkg-@pol pair. |
220 | */ | 220 | */ |
221 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, | 221 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, |
222 | struct blkcg_policy *pol) | 222 | struct blkcg_policy *pol) |
223 | { | 223 | { |
224 | return blkg ? blkg->pd[pol->plid] : NULL; | 224 | return blkg ? blkg->pd[pol->plid] : NULL; |
225 | } | 225 | } |
226 | 226 | ||
227 | /** | 227 | /** |
228 | * pdata_to_blkg - get blkg associated with policy private data | 228 | * pdata_to_blkg - get blkg associated with policy private data |
229 | * @pd: policy private data of interest | 229 | * @pd: policy private data of interest |
230 | * | 230 | * |
231 | * @pd is policy private data. Determine the blkg it's associated with. | 231 | * @pd is policy private data. Determine the blkg it's associated with. |
232 | */ | 232 | */ |
233 | static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) | 233 | static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) |
234 | { | 234 | { |
235 | return pd ? pd->blkg : NULL; | 235 | return pd ? pd->blkg : NULL; |
236 | } | 236 | } |
237 | 237 | ||
238 | /** | 238 | /** |
239 | * blkg_path - format cgroup path of blkg | 239 | * blkg_path - format cgroup path of blkg |
240 | * @blkg: blkg of interest | 240 | * @blkg: blkg of interest |
241 | * @buf: target buffer | 241 | * @buf: target buffer |
242 | * @buflen: target buffer length | 242 | * @buflen: target buffer length |
243 | * | 243 | * |
244 | * Format the path of the cgroup of @blkg into @buf. | 244 | * Format the path of the cgroup of @blkg into @buf. |
245 | */ | 245 | */ |
246 | static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) | 246 | static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) |
247 | { | 247 | { |
248 | int ret; | 248 | int ret; |
249 | 249 | ||
250 | ret = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); | 250 | ret = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); |
251 | if (ret) | 251 | if (ret) |
252 | strncpy(buf, "<unavailable>", buflen); | 252 | strncpy(buf, "<unavailable>", buflen); |
253 | return ret; | 253 | return ret; |
254 | } | 254 | } |
255 | 255 | ||
256 | /** | 256 | /** |
257 | * blkg_get - get a blkg reference | 257 | * blkg_get - get a blkg reference |
258 | * @blkg: blkg to get | 258 | * @blkg: blkg to get |
259 | * | 259 | * |
260 | * The caller should be holding queue_lock and an existing reference. | 260 | * The caller should be holding queue_lock and an existing reference. |
261 | */ | 261 | */ |
262 | static inline void blkg_get(struct blkcg_gq *blkg) | 262 | static inline void blkg_get(struct blkcg_gq *blkg) |
263 | { | 263 | { |
264 | lockdep_assert_held(blkg->q->queue_lock); | 264 | lockdep_assert_held(blkg->q->queue_lock); |
265 | WARN_ON_ONCE(!blkg->refcnt); | 265 | WARN_ON_ONCE(!blkg->refcnt); |
266 | blkg->refcnt++; | 266 | blkg->refcnt++; |
267 | } | 267 | } |
268 | 268 | ||
269 | void __blkg_release(struct blkcg_gq *blkg); | 269 | void __blkg_release_rcu(struct rcu_head *rcu); |
270 | 270 | ||
271 | /** | 271 | /** |
272 | * blkg_put - put a blkg reference | 272 | * blkg_put - put a blkg reference |
273 | * @blkg: blkg to put | 273 | * @blkg: blkg to put |
274 | * | 274 | * |
275 | * The caller should be holding queue_lock. | 275 | * The caller should be holding queue_lock. |
276 | */ | 276 | */ |
277 | static inline void blkg_put(struct blkcg_gq *blkg) | 277 | static inline void blkg_put(struct blkcg_gq *blkg) |
278 | { | 278 | { |
279 | lockdep_assert_held(blkg->q->queue_lock); | 279 | lockdep_assert_held(blkg->q->queue_lock); |
280 | WARN_ON_ONCE(blkg->refcnt <= 0); | 280 | WARN_ON_ONCE(blkg->refcnt <= 0); |
281 | if (!--blkg->refcnt) | 281 | if (!--blkg->refcnt) |
282 | __blkg_release(blkg); | 282 | call_rcu(&blkg->rcu_head, __blkg_release_rcu); |
283 | } | 283 | } |
284 | 284 | ||
285 | struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q, | 285 | struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q, |
286 | bool update_hint); | 286 | bool update_hint); |
287 | 287 | ||
288 | /** | 288 | /** |
289 | * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants | 289 | * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants |
290 | * @d_blkg: loop cursor pointing to the current descendant | 290 | * @d_blkg: loop cursor pointing to the current descendant |
291 | * @pos_cgrp: used for iteration | 291 | * @pos_cgrp: used for iteration |
292 | * @p_blkg: target blkg to walk descendants of | 292 | * @p_blkg: target blkg to walk descendants of |
293 | * | 293 | * |
294 | * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU | 294 | * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU |
295 | * read locked. If called under either blkcg or queue lock, the iteration | 295 | * read locked. If called under either blkcg or queue lock, the iteration |
296 | * is guaranteed to include all and only online blkgs. The caller may | 296 | * is guaranteed to include all and only online blkgs. The caller may |
297 | * update @pos_cgrp by calling cgroup_rightmost_descendant() to skip | 297 | * update @pos_cgrp by calling cgroup_rightmost_descendant() to skip |
298 | * subtree. | 298 | * subtree. |
299 | */ | 299 | */ |
300 | #define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \ | 300 | #define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \ |
301 | cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \ | 301 | cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \ |
302 | if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \ | 302 | if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \ |
303 | (p_blkg)->q, false))) | 303 | (p_blkg)->q, false))) |
304 | 304 | ||
305 | /** | 305 | /** |
306 | * blkg_for_each_descendant_post - post-order walk of a blkg's descendants | 306 | * blkg_for_each_descendant_post - post-order walk of a blkg's descendants |
307 | * @d_blkg: loop cursor pointing to the current descendant | 307 | * @d_blkg: loop cursor pointing to the current descendant |
308 | * @pos_cgrp: used for iteration | 308 | * @pos_cgrp: used for iteration |
309 | * @p_blkg: target blkg to walk descendants of | 309 | * @p_blkg: target blkg to walk descendants of |
310 | * | 310 | * |
311 | * Similar to blkg_for_each_descendant_pre() but performs post-order | 311 | * Similar to blkg_for_each_descendant_pre() but performs post-order |
312 | * traversal instead. Synchronization rules are the same. | 312 | * traversal instead. Synchronization rules are the same. |
313 | */ | 313 | */ |
314 | #define blkg_for_each_descendant_post(d_blkg, pos_cgrp, p_blkg) \ | 314 | #define blkg_for_each_descendant_post(d_blkg, pos_cgrp, p_blkg) \ |
315 | cgroup_for_each_descendant_post((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \ | 315 | cgroup_for_each_descendant_post((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \ |
316 | if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \ | 316 | if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \ |
317 | (p_blkg)->q, false))) | 317 | (p_blkg)->q, false))) |
318 | 318 | ||
319 | /** | 319 | /** |
320 | * blk_get_rl - get request_list to use | 320 | * blk_get_rl - get request_list to use |
321 | * @q: request_queue of interest | 321 | * @q: request_queue of interest |
322 | * @bio: bio which will be attached to the allocated request (may be %NULL) | 322 | * @bio: bio which will be attached to the allocated request (may be %NULL) |
323 | * | 323 | * |
324 | * The caller wants to allocate a request from @q to use for @bio. Find | 324 | * The caller wants to allocate a request from @q to use for @bio. Find |
325 | * the request_list to use and obtain a reference on it. Should be called | 325 | * the request_list to use and obtain a reference on it. Should be called |
326 | * under queue_lock. This function is guaranteed to return non-%NULL | 326 | * under queue_lock. This function is guaranteed to return non-%NULL |
327 | * request_list. | 327 | * request_list. |
328 | */ | 328 | */ |
329 | static inline struct request_list *blk_get_rl(struct request_queue *q, | 329 | static inline struct request_list *blk_get_rl(struct request_queue *q, |
330 | struct bio *bio) | 330 | struct bio *bio) |
331 | { | 331 | { |
332 | struct blkcg *blkcg; | 332 | struct blkcg *blkcg; |
333 | struct blkcg_gq *blkg; | 333 | struct blkcg_gq *blkg; |
334 | 334 | ||
335 | rcu_read_lock(); | 335 | rcu_read_lock(); |
336 | 336 | ||
337 | blkcg = bio_blkcg(bio); | 337 | blkcg = bio_blkcg(bio); |
338 | 338 | ||
339 | /* bypass blkg lookup and use @q->root_rl directly for root */ | 339 | /* bypass blkg lookup and use @q->root_rl directly for root */ |
340 | if (blkcg == &blkcg_root) | 340 | if (blkcg == &blkcg_root) |
341 | goto root_rl; | 341 | goto root_rl; |
342 | 342 | ||
343 | /* | 343 | /* |
344 | * Try to use blkg->rl. blkg lookup may fail under memory pressure | 344 | * Try to use blkg->rl. blkg lookup may fail under memory pressure |
345 | * or if either the blkcg or queue is going away. Fall back to | 345 | * or if either the blkcg or queue is going away. Fall back to |
346 | * root_rl in such cases. | 346 | * root_rl in such cases. |
347 | */ | 347 | */ |
348 | blkg = blkg_lookup_create(blkcg, q); | 348 | blkg = blkg_lookup_create(blkcg, q); |
349 | if (unlikely(IS_ERR(blkg))) | 349 | if (unlikely(IS_ERR(blkg))) |
350 | goto root_rl; | 350 | goto root_rl; |
351 | 351 | ||
352 | blkg_get(blkg); | 352 | blkg_get(blkg); |
353 | rcu_read_unlock(); | 353 | rcu_read_unlock(); |
354 | return &blkg->rl; | 354 | return &blkg->rl; |
355 | root_rl: | 355 | root_rl: |
356 | rcu_read_unlock(); | 356 | rcu_read_unlock(); |
357 | return &q->root_rl; | 357 | return &q->root_rl; |
358 | } | 358 | } |
359 | 359 | ||
360 | /** | 360 | /** |
361 | * blk_put_rl - put request_list | 361 | * blk_put_rl - put request_list |
362 | * @rl: request_list to put | 362 | * @rl: request_list to put |
363 | * | 363 | * |
364 | * Put the reference acquired by blk_get_rl(). Should be called under | 364 | * Put the reference acquired by blk_get_rl(). Should be called under |
365 | * queue_lock. | 365 | * queue_lock. |
366 | */ | 366 | */ |
367 | static inline void blk_put_rl(struct request_list *rl) | 367 | static inline void blk_put_rl(struct request_list *rl) |
368 | { | 368 | { |
369 | /* root_rl may not have blkg set */ | 369 | /* root_rl may not have blkg set */ |
370 | if (rl->blkg && rl->blkg->blkcg != &blkcg_root) | 370 | if (rl->blkg && rl->blkg->blkcg != &blkcg_root) |
371 | blkg_put(rl->blkg); | 371 | blkg_put(rl->blkg); |
372 | } | 372 | } |
373 | 373 | ||
374 | /** | 374 | /** |
375 | * blk_rq_set_rl - associate a request with a request_list | 375 | * blk_rq_set_rl - associate a request with a request_list |
376 | * @rq: request of interest | 376 | * @rq: request of interest |
377 | * @rl: target request_list | 377 | * @rl: target request_list |
378 | * | 378 | * |
379 | * Associate @rq with @rl so that accounting and freeing can know the | 379 | * Associate @rq with @rl so that accounting and freeing can know the |
380 | * request_list @rq came from. | 380 | * request_list @rq came from. |
381 | */ | 381 | */ |
382 | static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) | 382 | static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) |
383 | { | 383 | { |
384 | rq->rl = rl; | 384 | rq->rl = rl; |
385 | } | 385 | } |
386 | 386 | ||
387 | /** | 387 | /** |
388 | * blk_rq_rl - return the request_list a request came from | 388 | * blk_rq_rl - return the request_list a request came from |
389 | * @rq: request of interest | 389 | * @rq: request of interest |
390 | * | 390 | * |
391 | * Return the request_list @rq is allocated from. | 391 | * Return the request_list @rq is allocated from. |
392 | */ | 392 | */ |
393 | static inline struct request_list *blk_rq_rl(struct request *rq) | 393 | static inline struct request_list *blk_rq_rl(struct request *rq) |
394 | { | 394 | { |
395 | return rq->rl; | 395 | return rq->rl; |
396 | } | 396 | } |
397 | 397 | ||
398 | struct request_list *__blk_queue_next_rl(struct request_list *rl, | 398 | struct request_list *__blk_queue_next_rl(struct request_list *rl, |
399 | struct request_queue *q); | 399 | struct request_queue *q); |
400 | /** | 400 | /** |
401 | * blk_queue_for_each_rl - iterate through all request_lists of a request_queue | 401 | * blk_queue_for_each_rl - iterate through all request_lists of a request_queue |
402 | * | 402 | * |
403 | * Should be used under queue_lock. | 403 | * Should be used under queue_lock. |
404 | */ | 404 | */ |
405 | #define blk_queue_for_each_rl(rl, q) \ | 405 | #define blk_queue_for_each_rl(rl, q) \ |
406 | for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) | 406 | for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) |
407 | 407 | ||
408 | /** | 408 | /** |
409 | * blkg_stat_add - add a value to a blkg_stat | 409 | * blkg_stat_add - add a value to a blkg_stat |
410 | * @stat: target blkg_stat | 410 | * @stat: target blkg_stat |
411 | * @val: value to add | 411 | * @val: value to add |
412 | * | 412 | * |
413 | * Add @val to @stat. The caller is responsible for synchronizing calls to | 413 | * Add @val to @stat. The caller is responsible for synchronizing calls to |
414 | * this function. | 414 | * this function. |
415 | */ | 415 | */ |
416 | static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) | 416 | static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) |
417 | { | 417 | { |
418 | u64_stats_update_begin(&stat->syncp); | 418 | u64_stats_update_begin(&stat->syncp); |
419 | stat->cnt += val; | 419 | stat->cnt += val; |
420 | u64_stats_update_end(&stat->syncp); | 420 | u64_stats_update_end(&stat->syncp); |
421 | } | 421 | } |
422 | 422 | ||
423 | /** | 423 | /** |
424 | * blkg_stat_read - read the current value of a blkg_stat | 424 | * blkg_stat_read - read the current value of a blkg_stat |
425 | * @stat: blkg_stat to read | 425 | * @stat: blkg_stat to read |
426 | * | 426 | * |
427 | * Read the current value of @stat. This function can be called without | 427 | * Read the current value of @stat. This function can be called without |
428 | * synchroniztion and takes care of u64 atomicity. | 428 | * synchroniztion and takes care of u64 atomicity. |
429 | */ | 429 | */ |
430 | static inline uint64_t blkg_stat_read(struct blkg_stat *stat) | 430 | static inline uint64_t blkg_stat_read(struct blkg_stat *stat) |
431 | { | 431 | { |
432 | unsigned int start; | 432 | unsigned int start; |
433 | uint64_t v; | 433 | uint64_t v; |
434 | 434 | ||
435 | do { | 435 | do { |
436 | start = u64_stats_fetch_begin(&stat->syncp); | 436 | start = u64_stats_fetch_begin(&stat->syncp); |
437 | v = stat->cnt; | 437 | v = stat->cnt; |
438 | } while (u64_stats_fetch_retry(&stat->syncp, start)); | 438 | } while (u64_stats_fetch_retry(&stat->syncp, start)); |
439 | 439 | ||
440 | return v; | 440 | return v; |
441 | } | 441 | } |
442 | 442 | ||
443 | /** | 443 | /** |
444 | * blkg_stat_reset - reset a blkg_stat | 444 | * blkg_stat_reset - reset a blkg_stat |
445 | * @stat: blkg_stat to reset | 445 | * @stat: blkg_stat to reset |
446 | */ | 446 | */ |
447 | static inline void blkg_stat_reset(struct blkg_stat *stat) | 447 | static inline void blkg_stat_reset(struct blkg_stat *stat) |
448 | { | 448 | { |
449 | stat->cnt = 0; | 449 | stat->cnt = 0; |
450 | } | 450 | } |
451 | 451 | ||
452 | /** | 452 | /** |
453 | * blkg_stat_merge - merge a blkg_stat into another | 453 | * blkg_stat_merge - merge a blkg_stat into another |
454 | * @to: the destination blkg_stat | 454 | * @to: the destination blkg_stat |
455 | * @from: the source | 455 | * @from: the source |
456 | * | 456 | * |
457 | * Add @from's count to @to. | 457 | * Add @from's count to @to. |
458 | */ | 458 | */ |
459 | static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from) | 459 | static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from) |
460 | { | 460 | { |
461 | blkg_stat_add(to, blkg_stat_read(from)); | 461 | blkg_stat_add(to, blkg_stat_read(from)); |
462 | } | 462 | } |
463 | 463 | ||
464 | /** | 464 | /** |
465 | * blkg_rwstat_add - add a value to a blkg_rwstat | 465 | * blkg_rwstat_add - add a value to a blkg_rwstat |
466 | * @rwstat: target blkg_rwstat | 466 | * @rwstat: target blkg_rwstat |
467 | * @rw: mask of REQ_{WRITE|SYNC} | 467 | * @rw: mask of REQ_{WRITE|SYNC} |
468 | * @val: value to add | 468 | * @val: value to add |
469 | * | 469 | * |
470 | * Add @val to @rwstat. The counters are chosen according to @rw. The | 470 | * Add @val to @rwstat. The counters are chosen according to @rw. The |
471 | * caller is responsible for synchronizing calls to this function. | 471 | * caller is responsible for synchronizing calls to this function. |
472 | */ | 472 | */ |
473 | static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, | 473 | static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, |
474 | int rw, uint64_t val) | 474 | int rw, uint64_t val) |
475 | { | 475 | { |
476 | u64_stats_update_begin(&rwstat->syncp); | 476 | u64_stats_update_begin(&rwstat->syncp); |
477 | 477 | ||
478 | if (rw & REQ_WRITE) | 478 | if (rw & REQ_WRITE) |
479 | rwstat->cnt[BLKG_RWSTAT_WRITE] += val; | 479 | rwstat->cnt[BLKG_RWSTAT_WRITE] += val; |
480 | else | 480 | else |
481 | rwstat->cnt[BLKG_RWSTAT_READ] += val; | 481 | rwstat->cnt[BLKG_RWSTAT_READ] += val; |
482 | if (rw & REQ_SYNC) | 482 | if (rw & REQ_SYNC) |
483 | rwstat->cnt[BLKG_RWSTAT_SYNC] += val; | 483 | rwstat->cnt[BLKG_RWSTAT_SYNC] += val; |
484 | else | 484 | else |
485 | rwstat->cnt[BLKG_RWSTAT_ASYNC] += val; | 485 | rwstat->cnt[BLKG_RWSTAT_ASYNC] += val; |
486 | 486 | ||
487 | u64_stats_update_end(&rwstat->syncp); | 487 | u64_stats_update_end(&rwstat->syncp); |
488 | } | 488 | } |
489 | 489 | ||
490 | /** | 490 | /** |
491 | * blkg_rwstat_read - read the current values of a blkg_rwstat | 491 | * blkg_rwstat_read - read the current values of a blkg_rwstat |
492 | * @rwstat: blkg_rwstat to read | 492 | * @rwstat: blkg_rwstat to read |
493 | * | 493 | * |
494 | * Read the current snapshot of @rwstat and return it as the return value. | 494 | * Read the current snapshot of @rwstat and return it as the return value. |
495 | * This function can be called without synchronization and takes care of | 495 | * This function can be called without synchronization and takes care of |
496 | * u64 atomicity. | 496 | * u64 atomicity. |
497 | */ | 497 | */ |
498 | static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) | 498 | static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) |
499 | { | 499 | { |
500 | unsigned int start; | 500 | unsigned int start; |
501 | struct blkg_rwstat tmp; | 501 | struct blkg_rwstat tmp; |
502 | 502 | ||
503 | do { | 503 | do { |
504 | start = u64_stats_fetch_begin(&rwstat->syncp); | 504 | start = u64_stats_fetch_begin(&rwstat->syncp); |
505 | tmp = *rwstat; | 505 | tmp = *rwstat; |
506 | } while (u64_stats_fetch_retry(&rwstat->syncp, start)); | 506 | } while (u64_stats_fetch_retry(&rwstat->syncp, start)); |
507 | 507 | ||
508 | return tmp; | 508 | return tmp; |
509 | } | 509 | } |
510 | 510 | ||
511 | /** | 511 | /** |
512 | * blkg_rwstat_total - read the total count of a blkg_rwstat | 512 | * blkg_rwstat_total - read the total count of a blkg_rwstat |
513 | * @rwstat: blkg_rwstat to read | 513 | * @rwstat: blkg_rwstat to read |
514 | * | 514 | * |
515 | * Return the total count of @rwstat regardless of the IO direction. This | 515 | * Return the total count of @rwstat regardless of the IO direction. This |
516 | * function can be called without synchronization and takes care of u64 | 516 | * function can be called without synchronization and takes care of u64 |
517 | * atomicity. | 517 | * atomicity. |
518 | */ | 518 | */ |
519 | static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) | 519 | static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) |
520 | { | 520 | { |
521 | struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); | 521 | struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); |
522 | 522 | ||
523 | return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]; | 523 | return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]; |
524 | } | 524 | } |
525 | 525 | ||
526 | /** | 526 | /** |
527 | * blkg_rwstat_reset - reset a blkg_rwstat | 527 | * blkg_rwstat_reset - reset a blkg_rwstat |
528 | * @rwstat: blkg_rwstat to reset | 528 | * @rwstat: blkg_rwstat to reset |
529 | */ | 529 | */ |
530 | static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) | 530 | static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) |
531 | { | 531 | { |
532 | memset(rwstat->cnt, 0, sizeof(rwstat->cnt)); | 532 | memset(rwstat->cnt, 0, sizeof(rwstat->cnt)); |
533 | } | 533 | } |
534 | 534 | ||
535 | /** | 535 | /** |
536 | * blkg_rwstat_merge - merge a blkg_rwstat into another | 536 | * blkg_rwstat_merge - merge a blkg_rwstat into another |
537 | * @to: the destination blkg_rwstat | 537 | * @to: the destination blkg_rwstat |
538 | * @from: the source | 538 | * @from: the source |
539 | * | 539 | * |
540 | * Add @from's counts to @to. | 540 | * Add @from's counts to @to. |
541 | */ | 541 | */ |
542 | static inline void blkg_rwstat_merge(struct blkg_rwstat *to, | 542 | static inline void blkg_rwstat_merge(struct blkg_rwstat *to, |
543 | struct blkg_rwstat *from) | 543 | struct blkg_rwstat *from) |
544 | { | 544 | { |
545 | struct blkg_rwstat v = blkg_rwstat_read(from); | 545 | struct blkg_rwstat v = blkg_rwstat_read(from); |
546 | int i; | 546 | int i; |
547 | 547 | ||
548 | u64_stats_update_begin(&to->syncp); | 548 | u64_stats_update_begin(&to->syncp); |
549 | for (i = 0; i < BLKG_RWSTAT_NR; i++) | 549 | for (i = 0; i < BLKG_RWSTAT_NR; i++) |
550 | to->cnt[i] += v.cnt[i]; | 550 | to->cnt[i] += v.cnt[i]; |
551 | u64_stats_update_end(&to->syncp); | 551 | u64_stats_update_end(&to->syncp); |
552 | } | 552 | } |
553 | 553 | ||
554 | #else /* CONFIG_BLK_CGROUP */ | 554 | #else /* CONFIG_BLK_CGROUP */ |
555 | 555 | ||
556 | struct cgroup; | 556 | struct cgroup; |
557 | struct blkcg; | 557 | struct blkcg; |
558 | 558 | ||
559 | struct blkg_policy_data { | 559 | struct blkg_policy_data { |
560 | }; | 560 | }; |
561 | 561 | ||
562 | struct blkcg_gq { | 562 | struct blkcg_gq { |
563 | }; | 563 | }; |
564 | 564 | ||
565 | struct blkcg_policy { | 565 | struct blkcg_policy { |
566 | }; | 566 | }; |
567 | 567 | ||
568 | static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } | 568 | static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } |
569 | static inline int blkcg_init_queue(struct request_queue *q) { return 0; } | 569 | static inline int blkcg_init_queue(struct request_queue *q) { return 0; } |
570 | static inline void blkcg_drain_queue(struct request_queue *q) { } | 570 | static inline void blkcg_drain_queue(struct request_queue *q) { } |
571 | static inline void blkcg_exit_queue(struct request_queue *q) { } | 571 | static inline void blkcg_exit_queue(struct request_queue *q) { } |
572 | static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } | 572 | static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } |
573 | static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } | 573 | static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } |
574 | static inline int blkcg_activate_policy(struct request_queue *q, | 574 | static inline int blkcg_activate_policy(struct request_queue *q, |
575 | const struct blkcg_policy *pol) { return 0; } | 575 | const struct blkcg_policy *pol) { return 0; } |
576 | static inline void blkcg_deactivate_policy(struct request_queue *q, | 576 | static inline void blkcg_deactivate_policy(struct request_queue *q, |
577 | const struct blkcg_policy *pol) { } | 577 | const struct blkcg_policy *pol) { } |
578 | 578 | ||
579 | static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; } | 579 | static inline struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) { return NULL; } |
580 | static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } | 580 | static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } |
581 | 581 | ||
582 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, | 582 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, |
583 | struct blkcg_policy *pol) { return NULL; } | 583 | struct blkcg_policy *pol) { return NULL; } |
584 | static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } | 584 | static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } |
585 | static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } | 585 | static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } |
586 | static inline void blkg_get(struct blkcg_gq *blkg) { } | 586 | static inline void blkg_get(struct blkcg_gq *blkg) { } |
587 | static inline void blkg_put(struct blkcg_gq *blkg) { } | 587 | static inline void blkg_put(struct blkcg_gq *blkg) { } |
588 | 588 | ||
589 | static inline struct request_list *blk_get_rl(struct request_queue *q, | 589 | static inline struct request_list *blk_get_rl(struct request_queue *q, |
590 | struct bio *bio) { return &q->root_rl; } | 590 | struct bio *bio) { return &q->root_rl; } |
591 | static inline void blk_put_rl(struct request_list *rl) { } | 591 | static inline void blk_put_rl(struct request_list *rl) { } |
592 | static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } | 592 | static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } |
593 | static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } | 593 | static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } |
594 | 594 | ||
595 | #define blk_queue_for_each_rl(rl, q) \ | 595 | #define blk_queue_for_each_rl(rl, q) \ |
596 | for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) | 596 | for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) |
597 | 597 | ||
598 | #endif /* CONFIG_BLK_CGROUP */ | 598 | #endif /* CONFIG_BLK_CGROUP */ |
599 | #endif /* _BLK_CGROUP_H */ | 599 | #endif /* _BLK_CGROUP_H */ |
600 | 600 |