Blame view
block/blk-mq-tag.c
11.6 KB
75bb4625b
|
1 |
/* |
88459642c
|
2 3 4 |
* Tag allocation using scalable bitmaps. Uses active queue tracking to support * fairer distribution of tags between multiple submitters when a shared tag map * is used. |
75bb4625b
|
5 6 7 |
* * Copyright (C) 2013-2014 Jens Axboe */ |
320ae51fe
|
8 9 |
#include <linux/kernel.h> #include <linux/module.h> |
320ae51fe
|
10 11 12 13 14 |
#include <linux/blk-mq.h> #include "blk.h" #include "blk-mq.h" #include "blk-mq-tag.h" |
320ae51fe
|
15 16 |
bool blk_mq_has_free_tags(struct blk_mq_tags *tags) { |
4bb659b15
|
17 18 |
if (!tags) return true; |
88459642c
|
19 |
return sbitmap_any_bit_clear(&tags->bitmap_tags.sb); |
0d2602ca3
|
20 21 22 23 |
} /* * If a previously inactive queue goes active, bump the active user count. |
d263ed992
|
24 25 26 |
* We need to do this before try to allocate driver tag, then even if fail * to get tag when first time, the other shared-tag users could reserve * budget for it. |
0d2602ca3
|
27 28 29 30 31 32 33 34 35 36 37 |
*/ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) { if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) && !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) atomic_inc(&hctx->tags->active_queues); return true; } /* |
aed3ea94b
|
38 |
* Wakeup all potentially sleeping on tags |
0d2602ca3
|
39 |
*/ |
aed3ea94b
|
40 |
void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve) |
0d2602ca3
|
41 |
{ |
88459642c
|
42 43 44 |
sbitmap_queue_wake_all(&tags->bitmap_tags); if (include_reserve) sbitmap_queue_wake_all(&tags->breserved_tags); |
0d2602ca3
|
45 46 47 |
} /* |
e3a2b3f93
|
48 49 50 51 52 53 54 55 56 57 58 |
* If a previously busy queue goes inactive, potential waiters could now * be allowed to queue. Wake them up and check. */ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) { struct blk_mq_tags *tags = hctx->tags; if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) return; atomic_dec(&tags->active_queues); |
aed3ea94b
|
59 |
blk_mq_tag_wakeup_all(tags, false); |
e3a2b3f93
|
60 61 62 |
} /* |
0d2602ca3
|
63 64 65 66 |
* For shared tag users, we track the number of currently active users * and attempt to provide a fair share of the tag depth for each of them. */ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, |
88459642c
|
67 |
struct sbitmap_queue *bt) |
0d2602ca3
|
68 69 70 71 72 73 74 75 76 77 78 |
{ unsigned int depth, users; if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED)) return true; if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) return true; /* * Don't try dividing an ant */ |
88459642c
|
79 |
if (bt->sb.depth == 1) |
0d2602ca3
|
80 81 82 83 84 85 86 87 88 |
return true; users = atomic_read(&hctx->tags->active_queues); if (!users) return true; /* * Allow at least some tags */ |
88459642c
|
89 |
depth = max((bt->sb.depth + users - 1) / users, 4U); |
0d2602ca3
|
90 91 |
return atomic_read(&hctx->nr_active) < depth; } |
200e86b33
|
92 93 |
static int __blk_mq_get_tag(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt) |
4bb659b15
|
94 |
{ |
200e86b33
|
95 96 |
if (!(data->flags & BLK_MQ_REQ_INTERNAL) && !hctx_may_queue(data->hctx, bt)) |
0d2602ca3
|
97 |
return -1; |
229a92873
|
98 99 100 101 |
if (data->shallow_depth) return __sbitmap_queue_get_shallow(bt, data->shallow_depth); else return __sbitmap_queue_get(bt); |
4bb659b15
|
102 |
} |
4941115be
|
103 |
unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) |
320ae51fe
|
104 |
{ |
4941115be
|
105 106 |
struct blk_mq_tags *tags = blk_mq_tags_from_data(data); struct sbitmap_queue *bt; |
88459642c
|
107 |
struct sbq_wait_state *ws; |
4bb659b15
|
108 |
DEFINE_WAIT(wait); |
4941115be
|
109 |
unsigned int tag_offset; |
bd6737f1a
|
110 |
bool drop_ctx; |
320ae51fe
|
111 |
int tag; |
4941115be
|
112 113 114 115 116 117 118 119 120 121 122 |
if (data->flags & BLK_MQ_REQ_RESERVED) { if (unlikely(!tags->nr_reserved_tags)) { WARN_ON_ONCE(1); return BLK_MQ_TAG_FAIL; } bt = &tags->breserved_tags; tag_offset = 0; } else { bt = &tags->bitmap_tags; tag_offset = tags->nr_reserved_tags; } |
200e86b33
|
123 |
tag = __blk_mq_get_tag(data, bt); |
4bb659b15
|
124 |
if (tag != -1) |
4941115be
|
125 |
goto found_tag; |
4bb659b15
|
126 |
|
6f3b0e8bc
|
127 |
if (data->flags & BLK_MQ_REQ_NOWAIT) |
4941115be
|
128 |
return BLK_MQ_TAG_FAIL; |
4bb659b15
|
129 |
|
4941115be
|
130 |
ws = bt_wait_ptr(bt, data->hctx); |
bd6737f1a
|
131 |
drop_ctx = data->ctx == NULL; |
4bb659b15
|
132 |
do { |
e6fc46498
|
133 |
struct sbitmap_queue *bt_prev; |
b32232073
|
134 135 136 |
/* * We're out of tags on this hardware queue, kick any * pending IO submits before going to sleep waiting for |
8cecb07d7
|
137 |
* some to complete. |
b32232073
|
138 |
*/ |
8cecb07d7
|
139 |
blk_mq_run_hw_queue(data->hctx, false); |
b32232073
|
140 |
|
080ff3511
|
141 142 143 144 |
/* * Retry tag allocation after running the hardware queue, * as running the queue may also have found completions. */ |
200e86b33
|
145 |
tag = __blk_mq_get_tag(data, bt); |
080ff3511
|
146 147 |
if (tag != -1) break; |
4e5dff41b
|
148 149 150 151 152 153 |
prepare_to_wait_exclusive(&ws->wait, &wait, TASK_UNINTERRUPTIBLE); tag = __blk_mq_get_tag(data, bt); if (tag != -1) break; |
bd6737f1a
|
154 155 |
if (data->ctx) blk_mq_put_ctx(data->ctx); |
cb96a42cc
|
156 |
|
e6fc46498
|
157 |
bt_prev = bt; |
4bb659b15
|
158 |
io_schedule(); |
cb96a42cc
|
159 160 |
data->ctx = blk_mq_get_ctx(data->q); |
7d7e0f90b
|
161 |
data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu); |
4941115be
|
162 163 164 165 166 |
tags = blk_mq_tags_from_data(data); if (data->flags & BLK_MQ_REQ_RESERVED) bt = &tags->breserved_tags; else bt = &tags->bitmap_tags; |
88459642c
|
167 |
finish_wait(&ws->wait, &wait); |
e6fc46498
|
168 169 170 171 172 173 174 175 |
/* * If destination hw queue is changed, fake wake up on * previous queue for compensating the wake up miss, so * other allocations on previous queue won't be starved. */ if (bt != bt_prev) sbitmap_queue_wake_up(bt_prev); |
4941115be
|
176 |
ws = bt_wait_ptr(bt, data->hctx); |
4bb659b15
|
177 |
} while (1); |
bd6737f1a
|
178 179 |
if (drop_ctx && data->ctx) blk_mq_put_ctx(data->ctx); |
88459642c
|
180 |
finish_wait(&ws->wait, &wait); |
320ae51fe
|
181 |
|
4941115be
|
182 183 |
found_tag: return tag + tag_offset; |
320ae51fe
|
184 |
} |
4941115be
|
185 186 |
void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, unsigned int tag) |
320ae51fe
|
187 |
{ |
415b806de
|
188 |
if (!blk_mq_tag_is_reserved(tags, tag)) { |
4bb659b15
|
189 |
const int real_tag = tag - tags->nr_reserved_tags; |
70114c393
|
190 |
BUG_ON(real_tag >= tags->nr_tags); |
f4a644db8
|
191 |
sbitmap_queue_clear(&tags->bitmap_tags, real_tag, ctx->cpu); |
70114c393
|
192 193 |
} else { BUG_ON(tag >= tags->nr_reserved_tags); |
f4a644db8
|
194 |
sbitmap_queue_clear(&tags->breserved_tags, tag, ctx->cpu); |
70114c393
|
195 |
} |
320ae51fe
|
196 |
} |
88459642c
|
197 198 199 200 201 202 203 204 |
struct bt_iter_data { struct blk_mq_hw_ctx *hctx; busy_iter_fn *fn; void *data; bool reserved; }; static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) |
320ae51fe
|
205 |
{ |
88459642c
|
206 207 208 209 |
struct bt_iter_data *iter_data = data; struct blk_mq_hw_ctx *hctx = iter_data->hctx; struct blk_mq_tags *tags = hctx->tags; bool reserved = iter_data->reserved; |
81481eb42
|
210 |
struct request *rq; |
4bb659b15
|
211 |
|
88459642c
|
212 213 214 |
if (!reserved) bitnr += tags->nr_reserved_tags; rq = tags->rqs[bitnr]; |
4bb659b15
|
215 |
|
7f5562d5e
|
216 217 218 219 220 |
/* * We can hit rq == NULL here, because the tagging functions * test and set the bit before assining ->rqs[]. */ if (rq && rq->q == hctx->queue) |
88459642c
|
221 222 223 |
iter_data->fn(hctx, rq, iter_data->data, reserved); return true; } |
4bb659b15
|
224 |
|
88459642c
|
225 226 227 228 229 230 231 232 233 234 235 |
static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt, busy_iter_fn *fn, void *data, bool reserved) { struct bt_iter_data iter_data = { .hctx = hctx, .fn = fn, .data = data, .reserved = reserved, }; sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data); |
320ae51fe
|
236 |
} |
88459642c
|
237 238 239 240 241 242 243 244 |
struct bt_tags_iter_data { struct blk_mq_tags *tags; busy_tag_iter_fn *fn; void *data; bool reserved; }; static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) |
f26cdc853
|
245 |
{ |
88459642c
|
246 247 248 |
struct bt_tags_iter_data *iter_data = data; struct blk_mq_tags *tags = iter_data->tags; bool reserved = iter_data->reserved; |
f26cdc853
|
249 |
struct request *rq; |
f26cdc853
|
250 |
|
88459642c
|
251 252 |
if (!reserved) bitnr += tags->nr_reserved_tags; |
7f5562d5e
|
253 254 255 256 257 |
/* * We can hit rq == NULL here, because the tagging functions * test and set the bit before assining ->rqs[]. */ |
88459642c
|
258 |
rq = tags->rqs[bitnr]; |
2d5ba0e2d
|
259 |
if (rq && blk_mq_request_started(rq)) |
7f5562d5e
|
260 |
iter_data->fn(rq, iter_data->data, reserved); |
f26cdc853
|
261 |
|
88459642c
|
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 |
return true; } static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt, busy_tag_iter_fn *fn, void *data, bool reserved) { struct bt_tags_iter_data iter_data = { .tags = tags, .fn = fn, .data = data, .reserved = reserved, }; if (tags->rqs) sbitmap_for_each_set(&bt->sb, bt_tags_iter, &iter_data); |
f26cdc853
|
277 |
} |
e8f1e1630
|
278 279 |
static void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, void *priv) |
f26cdc853
|
280 281 |
{ if (tags->nr_reserved_tags) |
88459642c
|
282 283 |
bt_tags_for_each(tags, &tags->breserved_tags, fn, priv, true); bt_tags_for_each(tags, &tags->bitmap_tags, fn, priv, false); |
f26cdc853
|
284 |
} |
f26cdc853
|
285 |
|
e0489487e
|
286 287 288 289 290 291 292 293 294 295 296 |
void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv) { int i; for (i = 0; i < tagset->nr_hw_queues; i++) { if (tagset->tags && tagset->tags[i]) blk_mq_all_tag_busy_iter(tagset->tags[i], fn, priv); } } EXPORT_SYMBOL(blk_mq_tagset_busy_iter); |
0bf6cd5b9
|
297 |
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, |
81481eb42
|
298 |
void *priv) |
320ae51fe
|
299 |
{ |
0bf6cd5b9
|
300 301 |
struct blk_mq_hw_ctx *hctx; int i; |
f5bbbbe4d
|
302 303 |
/* * __blk_mq_update_nr_hw_queues will update the nr_hw_queues and |
530ca2c9b
|
304 305 |
* queue_hw_ctx after freeze the queue, so we use q_usage_counter * to avoid race with it. |
f5bbbbe4d
|
306 |
*/ |
530ca2c9b
|
307 |
if (!percpu_ref_tryget(&q->q_usage_counter)) |
f5bbbbe4d
|
308 |
return; |
0bf6cd5b9
|
309 310 311 312 313 314 315 316 317 318 319 320 |
queue_for_each_hw_ctx(q, hctx, i) { struct blk_mq_tags *tags = hctx->tags; /* * If not software queues are currently mapped to this * hardware queue, there's nothing to check */ if (!blk_mq_hw_queue_mapped(hctx)) continue; if (tags->nr_reserved_tags) |
88459642c
|
321 322 |
bt_for_each(hctx, &tags->breserved_tags, fn, priv, true); bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false); |
4bb659b15
|
323 |
} |
530ca2c9b
|
324 |
blk_queue_exit(q); |
4bb659b15
|
325 |
} |
f4a644db8
|
326 327 |
static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth, bool round_robin, int node) |
4bb659b15
|
328 |
{ |
f4a644db8
|
329 330 |
return sbitmap_queue_init_node(bt, depth, -1, round_robin, GFP_KERNEL, node); |
4bb659b15
|
331 332 333 |
} static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, |
24391c0dc
|
334 |
int node, int alloc_policy) |
4bb659b15
|
335 336 |
{ unsigned int depth = tags->nr_tags - tags->nr_reserved_tags; |
f4a644db8
|
337 |
bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR; |
4bb659b15
|
338 |
|
f4a644db8
|
339 |
if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node)) |
88459642c
|
340 |
goto free_tags; |
f4a644db8
|
341 342 |
if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, round_robin, node)) |
88459642c
|
343 |
goto free_bitmap_tags; |
4bb659b15
|
344 345 |
return tags; |
88459642c
|
346 347 348 |
free_bitmap_tags: sbitmap_queue_free(&tags->bitmap_tags); free_tags: |
4bb659b15
|
349 350 351 |
kfree(tags); return NULL; } |
320ae51fe
|
352 |
struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, |
24391c0dc
|
353 354 |
unsigned int reserved_tags, int node, int alloc_policy) |
320ae51fe
|
355 |
{ |
320ae51fe
|
356 |
struct blk_mq_tags *tags; |
320ae51fe
|
357 358 359 360 361 362 363 364 365 366 |
if (total_tags > BLK_MQ_TAG_MAX) { pr_err("blk-mq: tag depth too large "); return NULL; } tags = kzalloc_node(sizeof(*tags), GFP_KERNEL, node); if (!tags) return NULL; |
320ae51fe
|
367 368 |
tags->nr_tags = total_tags; tags->nr_reserved_tags = reserved_tags; |
320ae51fe
|
369 |
|
24391c0dc
|
370 |
return blk_mq_init_bitmap_tags(tags, node, alloc_policy); |
320ae51fe
|
371 372 373 374 |
} void blk_mq_free_tags(struct blk_mq_tags *tags) { |
88459642c
|
375 376 |
sbitmap_queue_free(&tags->bitmap_tags); sbitmap_queue_free(&tags->breserved_tags); |
320ae51fe
|
377 378 |
kfree(tags); } |
70f36b600
|
379 380 381 |
int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags **tagsptr, unsigned int tdepth, bool can_grow) |
e3a2b3f93
|
382 |
{ |
70f36b600
|
383 384 385 |
struct blk_mq_tags *tags = *tagsptr; if (tdepth <= tags->nr_reserved_tags) |
e3a2b3f93
|
386 387 388 |
return -EINVAL; /* |
70f36b600
|
389 390 |
* If we are allowed to grow beyond the original size, allocate * a new set of tags before freeing the old one. |
e3a2b3f93
|
391 |
*/ |
70f36b600
|
392 393 394 395 396 397 398 399 400 401 402 403 404 405 |
if (tdepth > tags->nr_tags) { struct blk_mq_tag_set *set = hctx->queue->tag_set; struct blk_mq_tags *new; bool ret; if (!can_grow) return -EINVAL; /* * We need some sort of upper limit, set it high enough that * no valid use cases should require more. */ if (tdepth > 16 * BLKDEV_MAX_RQ) return -EINVAL; |
75d6e175f
|
406 407 |
new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth, tags->nr_reserved_tags); |
70f36b600
|
408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 |
if (!new) return -ENOMEM; ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth); if (ret) { blk_mq_free_rq_map(new); return -ENOMEM; } blk_mq_free_rqs(set, *tagsptr, hctx->queue_num); blk_mq_free_rq_map(*tagsptr); *tagsptr = new; } else { /* * Don't need (or can't) update reserved tags here, they * remain static and should never need resizing. */ |
75d6e175f
|
424 425 |
sbitmap_queue_resize(&tags->bitmap_tags, tdepth - tags->nr_reserved_tags); |
70f36b600
|
426 |
} |
88459642c
|
427 |
|
e3a2b3f93
|
428 429 |
return 0; } |
205fb5f5b
|
430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 |
/** * blk_mq_unique_tag() - return a tag that is unique queue-wide * @rq: request for which to compute a unique tag * * The tag field in struct request is unique per hardware queue but not over * all hardware queues. Hence this function that returns a tag with the * hardware context index in the upper bits and the per hardware queue tag in * the lower bits. * * Note: When called for a request that is queued on a non-multiqueue request * queue, the hardware context index is set to zero. */ u32 blk_mq_unique_tag(struct request *rq) { struct request_queue *q = rq->q; struct blk_mq_hw_ctx *hctx; int hwq = 0; if (q->mq_ops) { |
7d7e0f90b
|
449 |
hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu); |
205fb5f5b
|
450 451 452 453 454 455 456 |
hwq = hctx->queue_num; } return (hwq << BLK_MQ_UNIQUE_TAG_BITS) | (rq->tag & BLK_MQ_UNIQUE_TAG_MASK); } EXPORT_SYMBOL(blk_mq_unique_tag); |