Blame view

block/blk-mq-sched.c 16.1 KB
bd166ef18   Jens Axboe   blk-mq-sched: add...
1
2
3
4
5
6
7
8
9
10
11
12
13
  /*
   * blk-mq scheduling framework
   *
   * Copyright (C) 2016 Jens Axboe
   */
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/blk-mq.h>
  
  #include <trace/events/block.h>
  
  #include "blk.h"
  #include "blk-mq.h"
d332ce091   Omar Sandoval   blk-mq-debugfs: a...
14
  #include "blk-mq-debugfs.h"
bd166ef18   Jens Axboe   blk-mq-sched: add...
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
  #include "blk-mq-sched.h"
  #include "blk-mq-tag.h"
  #include "blk-wbt.h"
  
  void blk_mq_sched_free_hctx_data(struct request_queue *q,
  				 void (*exit)(struct blk_mq_hw_ctx *))
  {
  	struct blk_mq_hw_ctx *hctx;
  	int i;
  
  	queue_for_each_hw_ctx(q, hctx, i) {
  		if (exit && hctx->sched_data)
  			exit(hctx);
  		kfree(hctx->sched_data);
  		hctx->sched_data = NULL;
  	}
  }
  EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
44e8c2bff   Christoph Hellwig   blk-mq: refactor ...
33
  void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio)
bd166ef18   Jens Axboe   blk-mq-sched: add...
34
  {
44e8c2bff   Christoph Hellwig   blk-mq: refactor ...
35
36
  	struct request_queue *q = rq->q;
  	struct io_context *ioc = rq_ioc(bio);
bd166ef18   Jens Axboe   blk-mq-sched: add...
37
38
39
40
41
42
43
44
45
46
47
  	struct io_cq *icq;
  
  	spin_lock_irq(q->queue_lock);
  	icq = ioc_lookup_icq(ioc, q);
  	spin_unlock_irq(q->queue_lock);
  
  	if (!icq) {
  		icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
  		if (!icq)
  			return;
  	}
ea511e3c2   Christoph Hellwig   blk-mq: remove bl...
48
  	get_io_context(icq->ioc);
44e8c2bff   Christoph Hellwig   blk-mq: refactor ...
49
  	rq->elv.icq = icq;
bd166ef18   Jens Axboe   blk-mq-sched: add...
50
  }
8e8320c93   Jens Axboe   blk-mq: fix perfo...
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
  /*
   * Mark a hardware queue as needing a restart. For shared queues, maintain
   * a count of how many hardware queues are marked for restart.
   */
  static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
  {
  	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
  		return;
  
  	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
  		struct request_queue *q = hctx->queue;
  
  		if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
  			atomic_inc(&q->shared_hctx_restart);
  	} else
  		set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
  }
05b794139   Jens Axboe   Revert "blk-mq: d...
68
  static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
8e8320c93   Jens Axboe   blk-mq: fix perfo...
69
70
  {
  	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
05b794139   Jens Axboe   Revert "blk-mq: d...
71
  		return false;
8e8320c93   Jens Axboe   blk-mq: fix perfo...
72

05b794139   Jens Axboe   Revert "blk-mq: d...
73
74
75
76
77
78
79
  	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
  		struct request_queue *q = hctx->queue;
  
  		if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
  			atomic_dec(&q->shared_hctx_restart);
  	} else
  		clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
8e8320c93   Jens Axboe   blk-mq: fix perfo...
80

79f720a75   Jens Axboe   blk-mq: only run ...
81
  	return blk_mq_run_hw_queue(hctx, true);
8e8320c93   Jens Axboe   blk-mq: fix perfo...
82
  }
1f460b63d   Ming Lei   blk-mq: don't res...
83
84
85
86
87
88
  /*
   * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
   * its queue by itself in its completion handler, so we don't need to
   * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
   */
  static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
caf8eb0d6   Ming Lei   blk-mq-sched: mov...
89
90
91
92
93
94
  {
  	struct request_queue *q = hctx->queue;
  	struct elevator_queue *e = q->elevator;
  	LIST_HEAD(rq_list);
  
  	do {
de1482974   Ming Lei   blk-mq: introduce...
95
  		struct request *rq;
caf8eb0d6   Ming Lei   blk-mq-sched: mov...
96

de1482974   Ming Lei   blk-mq: introduce...
97
98
  		if (e->type->ops.mq.has_work &&
  				!e->type->ops.mq.has_work(hctx))
caf8eb0d6   Ming Lei   blk-mq-sched: mov...
99
  			break;
de1482974   Ming Lei   blk-mq: introduce...
100

88022d720   Ming Lei   blk-mq: don't han...
101
  		if (!blk_mq_get_dispatch_budget(hctx))
1f460b63d   Ming Lei   blk-mq: don't res...
102
  			break;
de1482974   Ming Lei   blk-mq: introduce...
103
104
105
106
107
  
  		rq = e->type->ops.mq.dispatch_request(hctx);
  		if (!rq) {
  			blk_mq_put_dispatch_budget(hctx);
  			break;
de1482974   Ming Lei   blk-mq: introduce...
108
109
110
111
112
113
114
  		}
  
  		/*
  		 * Now this rq owns the budget which has to be released
  		 * if this rq won't be queued to driver via .queue_rq()
  		 * in blk_mq_dispatch_rq_list().
  		 */
caf8eb0d6   Ming Lei   blk-mq-sched: mov...
115
  		list_add(&rq->queuelist, &rq_list);
de1482974   Ming Lei   blk-mq: introduce...
116
  	} while (blk_mq_dispatch_rq_list(q, &rq_list, true));
caf8eb0d6   Ming Lei   blk-mq-sched: mov...
117
  }
b347689ff   Ming Lei   blk-mq-sched: imp...
118
119
120
121
122
123
124
125
126
127
  static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
  					  struct blk_mq_ctx *ctx)
  {
  	unsigned idx = ctx->index_hw;
  
  	if (++idx == hctx->nr_ctx)
  		idx = 0;
  
  	return hctx->ctxs[idx];
  }
1f460b63d   Ming Lei   blk-mq: don't res...
128
129
130
131
132
133
  /*
   * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
   * its queue by itself in its completion handler, so we don't need to
   * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
   */
  static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
b347689ff   Ming Lei   blk-mq-sched: imp...
134
135
136
137
138
139
140
  {
  	struct request_queue *q = hctx->queue;
  	LIST_HEAD(rq_list);
  	struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
  
  	do {
  		struct request *rq;
b347689ff   Ming Lei   blk-mq-sched: imp...
141
142
143
  
  		if (!sbitmap_any_bit_set(&hctx->ctx_map))
  			break;
88022d720   Ming Lei   blk-mq: don't han...
144
  		if (!blk_mq_get_dispatch_budget(hctx))
1f460b63d   Ming Lei   blk-mq: don't res...
145
  			break;
b347689ff   Ming Lei   blk-mq-sched: imp...
146
147
148
149
150
  
  		rq = blk_mq_dequeue_from_ctx(hctx, ctx);
  		if (!rq) {
  			blk_mq_put_dispatch_budget(hctx);
  			break;
b347689ff   Ming Lei   blk-mq-sched: imp...
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
  		}
  
  		/*
  		 * Now this rq owns the budget which has to be released
  		 * if this rq won't be queued to driver via .queue_rq()
  		 * in blk_mq_dispatch_rq_list().
  		 */
  		list_add(&rq->queuelist, &rq_list);
  
  		/* round robin for fair dispatch */
  		ctx = blk_mq_next_ctx(hctx, rq->mq_ctx);
  
  	} while (blk_mq_dispatch_rq_list(q, &rq_list, true));
  
  	WRITE_ONCE(hctx->dispatch_from, ctx);
b347689ff   Ming Lei   blk-mq-sched: imp...
166
  }
1f460b63d   Ming Lei   blk-mq: don't res...
167
  void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
bd166ef18   Jens Axboe   blk-mq-sched: add...
168
  {
81380ca10   Omar Sandoval   blk-mq: use the r...
169
170
  	struct request_queue *q = hctx->queue;
  	struct elevator_queue *e = q->elevator;
64765a75e   Jens Axboe   blk-mq-sched: ask...
171
  	const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request;
bd166ef18   Jens Axboe   blk-mq-sched: add...
172
  	LIST_HEAD(rq_list);
f4560ffe8   Ming Lei   blk-mq: use QUEUE...
173
174
  	/* RCU or SRCU read lock is needed before checking quiesced flag */
  	if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
1f460b63d   Ming Lei   blk-mq: don't res...
175
  		return;
bd166ef18   Jens Axboe   blk-mq-sched: add...
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
  
  	hctx->run++;
  
  	/*
  	 * If we have previous entries on our dispatch list, grab them first for
  	 * more fair dispatch.
  	 */
  	if (!list_empty_careful(&hctx->dispatch)) {
  		spin_lock(&hctx->lock);
  		if (!list_empty(&hctx->dispatch))
  			list_splice_init(&hctx->dispatch, &rq_list);
  		spin_unlock(&hctx->lock);
  	}
  
  	/*
  	 * Only ask the scheduler for requests, if we didn't have residual
  	 * requests from the dispatch list. This is to avoid the case where
  	 * we only ever dispatch a fraction of the requests available because
  	 * of low device queue depth. Once we pull requests out of the IO
  	 * scheduler, we can no longer merge or sort them. So it's best to
  	 * leave them there for as long as we can. Mark the hw queue as
  	 * needing a restart in that case.
caf8eb0d6   Ming Lei   blk-mq-sched: mov...
198
199
200
201
  	 *
  	 * We want to dispatch from the scheduler if there was nothing
  	 * on the dispatch list or we were able to dispatch from the
  	 * dispatch list.
bd166ef18   Jens Axboe   blk-mq-sched: add...
202
  	 */
c13660a08   Jens Axboe   blk-mq-sched: cha...
203
  	if (!list_empty(&rq_list)) {
d38d35155   Omar Sandoval   blk-mq-sched: sep...
204
  		blk_mq_sched_mark_restart_hctx(hctx);
b347689ff   Ming Lei   blk-mq-sched: imp...
205
206
  		if (blk_mq_dispatch_rq_list(q, &rq_list, false)) {
  			if (has_sched_dispatch)
1f460b63d   Ming Lei   blk-mq: don't res...
207
  				blk_mq_do_dispatch_sched(hctx);
b347689ff   Ming Lei   blk-mq-sched: imp...
208
  			else
1f460b63d   Ming Lei   blk-mq: don't res...
209
  				blk_mq_do_dispatch_ctx(hctx);
b347689ff   Ming Lei   blk-mq-sched: imp...
210
  		}
caf8eb0d6   Ming Lei   blk-mq-sched: mov...
211
  	} else if (has_sched_dispatch) {
1f460b63d   Ming Lei   blk-mq: don't res...
212
  		blk_mq_do_dispatch_sched(hctx);
b347689ff   Ming Lei   blk-mq-sched: imp...
213
214
215
216
217
218
219
220
221
  	} else if (q->mq_ops->get_budget) {
  		/*
  		 * If we need to get budget before queuing request, we
  		 * dequeue request one by one from sw queue for avoiding
  		 * to mess up I/O merge when dispatch runs out of resource.
  		 *
  		 * TODO: get more budgets, and dequeue more requests in
  		 * one time.
  		 */
1f460b63d   Ming Lei   blk-mq: don't res...
222
  		blk_mq_do_dispatch_ctx(hctx);
caf8eb0d6   Ming Lei   blk-mq-sched: mov...
223
  	} else {
c13660a08   Jens Axboe   blk-mq-sched: cha...
224
  		blk_mq_flush_busy_ctxs(hctx, &rq_list);
de1482974   Ming Lei   blk-mq: introduce...
225
  		blk_mq_dispatch_rq_list(q, &rq_list, false);
64765a75e   Jens Axboe   blk-mq-sched: ask...
226
  	}
bd166ef18   Jens Axboe   blk-mq-sched: add...
227
  }
e4d750c97   Jens Axboe   block: free merge...
228
229
  bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
  			    struct request **merged_request)
bd166ef18   Jens Axboe   blk-mq-sched: add...
230
231
  {
  	struct request *rq;
bd166ef18   Jens Axboe   blk-mq-sched: add...
232

34fe7c054   Christoph Hellwig   block: enumify EL...
233
234
  	switch (elv_merge(q, &rq, bio)) {
  	case ELEVATOR_BACK_MERGE:
bd166ef18   Jens Axboe   blk-mq-sched: add...
235
236
  		if (!blk_mq_sched_allow_merge(q, rq, bio))
  			return false;
34fe7c054   Christoph Hellwig   block: enumify EL...
237
238
239
240
241
242
243
  		if (!bio_attempt_back_merge(q, rq, bio))
  			return false;
  		*merged_request = attempt_back_merge(q, rq);
  		if (!*merged_request)
  			elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
  		return true;
  	case ELEVATOR_FRONT_MERGE:
bd166ef18   Jens Axboe   blk-mq-sched: add...
244
245
  		if (!blk_mq_sched_allow_merge(q, rq, bio))
  			return false;
34fe7c054   Christoph Hellwig   block: enumify EL...
246
247
248
249
250
251
  		if (!bio_attempt_front_merge(q, rq, bio))
  			return false;
  		*merged_request = attempt_front_merge(q, rq);
  		if (!*merged_request)
  			elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
  		return true;
bea99a500   Keith Busch   blk-mq-sched: Ena...
252
253
  	case ELEVATOR_DISCARD_MERGE:
  		return bio_attempt_discard_merge(q, rq, bio);
34fe7c054   Christoph Hellwig   block: enumify EL...
254
255
  	default:
  		return false;
bd166ef18   Jens Axboe   blk-mq-sched: add...
256
  	}
bd166ef18   Jens Axboe   blk-mq-sched: add...
257
258
  }
  EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
9bddeb2a5   Ming Lei   blk-mq: make per-...
259
  /*
9c5587346   Jens Axboe   blk-mq: abstract ...
260
261
   * Iterate list of requests and see if we can merge this bio with any
   * of them.
9bddeb2a5   Ming Lei   blk-mq: make per-...
262
   */
9c5587346   Jens Axboe   blk-mq: abstract ...
263
264
  bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
  			   struct bio *bio)
9bddeb2a5   Ming Lei   blk-mq: make per-...
265
266
267
  {
  	struct request *rq;
  	int checked = 8;
9c5587346   Jens Axboe   blk-mq: abstract ...
268
  	list_for_each_entry_reverse(rq, list, queuelist) {
9bddeb2a5   Ming Lei   blk-mq: make per-...
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
  		bool merged = false;
  
  		if (!checked--)
  			break;
  
  		if (!blk_rq_merge_ok(rq, bio))
  			continue;
  
  		switch (blk_try_merge(rq, bio)) {
  		case ELEVATOR_BACK_MERGE:
  			if (blk_mq_sched_allow_merge(q, rq, bio))
  				merged = bio_attempt_back_merge(q, rq, bio);
  			break;
  		case ELEVATOR_FRONT_MERGE:
  			if (blk_mq_sched_allow_merge(q, rq, bio))
  				merged = bio_attempt_front_merge(q, rq, bio);
  			break;
  		case ELEVATOR_DISCARD_MERGE:
  			merged = bio_attempt_discard_merge(q, rq, bio);
  			break;
  		default:
  			continue;
  		}
9bddeb2a5   Ming Lei   blk-mq: make per-...
292
293
294
295
296
  		return merged;
  	}
  
  	return false;
  }
9c5587346   Jens Axboe   blk-mq: abstract ...
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
  EXPORT_SYMBOL_GPL(blk_mq_bio_list_merge);
  
  /*
   * Reverse check our software queue for entries that we could potentially
   * merge with. Currently includes a hand-wavy stop count of 8, to not spend
   * too much time checking for merges.
   */
  static bool blk_mq_attempt_merge(struct request_queue *q,
  				 struct blk_mq_ctx *ctx, struct bio *bio)
  {
  	lockdep_assert_held(&ctx->lock);
  
  	if (blk_mq_bio_list_merge(q, &ctx->rq_list, bio)) {
  		ctx->rq_merged++;
  		return true;
  	}
  
  	return false;
  }
9bddeb2a5   Ming Lei   blk-mq: make per-...
316

bd166ef18   Jens Axboe   blk-mq-sched: add...
317
318
319
  bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
  {
  	struct elevator_queue *e = q->elevator;
9bddeb2a5   Ming Lei   blk-mq: make per-...
320
321
322
  	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
  	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
  	bool ret = false;
bd166ef18   Jens Axboe   blk-mq-sched: add...
323

9bddeb2a5   Ming Lei   blk-mq: make per-...
324
  	if (e && e->type->ops.mq.bio_merge) {
bd166ef18   Jens Axboe   blk-mq-sched: add...
325
326
327
  		blk_mq_put_ctx(ctx);
  		return e->type->ops.mq.bio_merge(hctx, bio);
  	}
9bddeb2a5   Ming Lei   blk-mq: make per-...
328
329
330
331
332
333
334
335
336
  	if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) {
  		/* default per sw-queue merge */
  		spin_lock(&ctx->lock);
  		ret = blk_mq_attempt_merge(q, ctx, bio);
  		spin_unlock(&ctx->lock);
  	}
  
  	blk_mq_put_ctx(ctx);
  	return ret;
bd166ef18   Jens Axboe   blk-mq-sched: add...
337
338
339
340
341
342
343
344
345
346
347
348
349
  }
  
  bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
  {
  	return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
  }
  EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
  
  void blk_mq_sched_request_inserted(struct request *rq)
  {
  	trace_block_rq_insert(rq->q, rq);
  }
  EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
0cacba6cf   Omar Sandoval   blk-mq-sched: byp...
350
  static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
a6a252e64   Ming Lei   blk-mq-sched: dec...
351
  				       bool has_sched,
0cacba6cf   Omar Sandoval   blk-mq-sched: byp...
352
  				       struct request *rq)
bd166ef18   Jens Axboe   blk-mq-sched: add...
353
  {
a6a252e64   Ming Lei   blk-mq-sched: dec...
354
355
356
357
358
359
360
  	/* dispatch flush rq directly */
  	if (rq->rq_flags & RQF_FLUSH_SEQ) {
  		spin_lock(&hctx->lock);
  		list_add(&rq->queuelist, &hctx->dispatch);
  		spin_unlock(&hctx->lock);
  		return true;
  	}
923218f61   Ming Lei   blk-mq: don't all...
361
  	if (has_sched)
bd166ef18   Jens Axboe   blk-mq-sched: add...
362
  		rq->rq_flags |= RQF_SORTED;
bd166ef18   Jens Axboe   blk-mq-sched: add...
363

a6a252e64   Ming Lei   blk-mq-sched: dec...
364
  	return false;
bd166ef18   Jens Axboe   blk-mq-sched: add...
365
  }
bd166ef18   Jens Axboe   blk-mq-sched: add...
366

05b794139   Jens Axboe   Revert "blk-mq: d...
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
  /**
   * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
   * @pos:    loop cursor.
   * @skip:   the list element that will not be examined. Iteration starts at
   *          @skip->next.
   * @head:   head of the list to examine. This list must have at least one
   *          element, namely @skip.
   * @member: name of the list_head structure within typeof(*pos).
   */
  #define list_for_each_entry_rcu_rr(pos, skip, head, member)		\
  	for ((pos) = (skip);						\
  	     (pos = (pos)->member.next != (head) ? list_entry_rcu(	\
  			(pos)->member.next, typeof(*pos), member) :	\
  	      list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \
  	     (pos) != (skip); )
  
  /*
   * Called after a driver tag has been freed to check whether a hctx needs to
   * be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware
   * queues in a round-robin fashion if the tag set of @hctx is shared with other
   * hardware queues.
   */
  void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
  {
  	struct blk_mq_tags *const tags = hctx->tags;
  	struct blk_mq_tag_set *const set = hctx->queue->tag_set;
  	struct request_queue *const queue = hctx->queue, *q;
  	struct blk_mq_hw_ctx *hctx2;
  	unsigned int i, j;
  
  	if (set->flags & BLK_MQ_F_TAG_SHARED) {
  		/*
  		 * If this is 0, then we know that no hardware queues
  		 * have RESTART marked. We're done.
  		 */
  		if (!atomic_read(&queue->shared_hctx_restart))
  			return;
  
  		rcu_read_lock();
  		list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
  					   tag_set_list) {
  			queue_for_each_hw_ctx(q, hctx2, i)
  				if (hctx2->tags == tags &&
  				    blk_mq_sched_restart_hctx(hctx2))
  					goto done;
  		}
  		j = hctx->queue_num + 1;
  		for (i = 0; i < queue->nr_hw_queues; i++, j++) {
  			if (j == queue->nr_hw_queues)
  				j = 0;
  			hctx2 = queue->queue_hw_ctx[j];
  			if (hctx2->tags == tags &&
  			    blk_mq_sched_restart_hctx(hctx2))
  				break;
  		}
  done:
  		rcu_read_unlock();
  	} else {
  		blk_mq_sched_restart_hctx(hctx);
  	}
  }
bd6737f1a   Jens Axboe   blk-mq-sched: add...
428
  void blk_mq_sched_insert_request(struct request *rq, bool at_head,
9e97d2951   Mike Snitzer   blk-mq-sched: rem...
429
  				 bool run_queue, bool async)
bd6737f1a   Jens Axboe   blk-mq-sched: add...
430
431
432
433
434
  {
  	struct request_queue *q = rq->q;
  	struct elevator_queue *e = q->elevator;
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
  	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
a6a252e64   Ming Lei   blk-mq-sched: dec...
435
436
  	/* flush rq in flush machinery need to be dispatched directly */
  	if (!(rq->rq_flags & RQF_FLUSH_SEQ) && op_is_flush(rq->cmd_flags)) {
923218f61   Ming Lei   blk-mq: don't all...
437
438
  		blk_insert_flush(rq);
  		goto run;
bd6737f1a   Jens Axboe   blk-mq-sched: add...
439
  	}
923218f61   Ming Lei   blk-mq: don't all...
440
  	WARN_ON(e && (rq->tag != -1));
a6a252e64   Ming Lei   blk-mq-sched: dec...
441
  	if (blk_mq_sched_bypass_insert(hctx, !!e, rq))
0cacba6cf   Omar Sandoval   blk-mq-sched: byp...
442
  		goto run;
bd6737f1a   Jens Axboe   blk-mq-sched: add...
443
444
445
446
447
448
449
450
451
452
  	if (e && e->type->ops.mq.insert_requests) {
  		LIST_HEAD(list);
  
  		list_add(&rq->queuelist, &list);
  		e->type->ops.mq.insert_requests(hctx, &list, at_head);
  	} else {
  		spin_lock(&ctx->lock);
  		__blk_mq_insert_request(hctx, rq, at_head);
  		spin_unlock(&ctx->lock);
  	}
0cacba6cf   Omar Sandoval   blk-mq-sched: byp...
453
  run:
bd6737f1a   Jens Axboe   blk-mq-sched: add...
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
  	if (run_queue)
  		blk_mq_run_hw_queue(hctx, async);
  }
  
  void blk_mq_sched_insert_requests(struct request_queue *q,
  				  struct blk_mq_ctx *ctx,
  				  struct list_head *list, bool run_queue_async)
  {
  	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
  	struct elevator_queue *e = hctx->queue->elevator;
  
  	if (e && e->type->ops.mq.insert_requests)
  		e->type->ops.mq.insert_requests(hctx, list, false);
  	else
  		blk_mq_insert_requests(hctx, ctx, list);
  
  	blk_mq_run_hw_queue(hctx, run_queue_async);
  }
bd166ef18   Jens Axboe   blk-mq-sched: add...
472
473
474
475
476
477
478
479
480
481
  static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
  				   struct blk_mq_hw_ctx *hctx,
  				   unsigned int hctx_idx)
  {
  	if (hctx->sched_tags) {
  		blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
  		blk_mq_free_rq_map(hctx->sched_tags);
  		hctx->sched_tags = NULL;
  	}
  }
6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
  static int blk_mq_sched_alloc_tags(struct request_queue *q,
  				   struct blk_mq_hw_ctx *hctx,
  				   unsigned int hctx_idx)
  {
  	struct blk_mq_tag_set *set = q->tag_set;
  	int ret;
  
  	hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests,
  					       set->reserved_tags);
  	if (!hctx->sched_tags)
  		return -ENOMEM;
  
  	ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
  	if (ret)
  		blk_mq_sched_free_tags(set, hctx, hctx_idx);
  
  	return ret;
  }
54d5329d4   Omar Sandoval   blk-mq-sched: fix...
500
  static void blk_mq_sched_tags_teardown(struct request_queue *q)
bd166ef18   Jens Axboe   blk-mq-sched: add...
501
502
503
  {
  	struct blk_mq_tag_set *set = q->tag_set;
  	struct blk_mq_hw_ctx *hctx;
6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
504
505
506
507
508
  	int i;
  
  	queue_for_each_hw_ctx(q, hctx, i)
  		blk_mq_sched_free_tags(set, hctx, i);
  }
93252632e   Omar Sandoval   blk-mq-sched: set...
509
510
511
512
  int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
  			   unsigned int hctx_idx)
  {
  	struct elevator_queue *e = q->elevator;
ee056f981   Omar Sandoval   blk-mq-sched: pro...
513
  	int ret;
93252632e   Omar Sandoval   blk-mq-sched: set...
514
515
516
  
  	if (!e)
  		return 0;
ee056f981   Omar Sandoval   blk-mq-sched: pro...
517
518
519
520
521
522
523
524
525
526
527
  	ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
  	if (ret)
  		return ret;
  
  	if (e->type->ops.mq.init_hctx) {
  		ret = e->type->ops.mq.init_hctx(hctx, hctx_idx);
  		if (ret) {
  			blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
  			return ret;
  		}
  	}
d332ce091   Omar Sandoval   blk-mq-debugfs: a...
528
  	blk_mq_debugfs_register_sched_hctx(q, hctx);
ee056f981   Omar Sandoval   blk-mq-sched: pro...
529
  	return 0;
93252632e   Omar Sandoval   blk-mq-sched: set...
530
531
532
533
534
535
536
537
538
  }
  
  void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
  			    unsigned int hctx_idx)
  {
  	struct elevator_queue *e = q->elevator;
  
  	if (!e)
  		return;
d332ce091   Omar Sandoval   blk-mq-debugfs: a...
539
  	blk_mq_debugfs_unregister_sched_hctx(hctx);
ee056f981   Omar Sandoval   blk-mq-sched: pro...
540
541
542
543
  	if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
  		e->type->ops.mq.exit_hctx(hctx, hctx_idx);
  		hctx->sched_data = NULL;
  	}
93252632e   Omar Sandoval   blk-mq-sched: set...
544
545
  	blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
  }
6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
546
547
548
  int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
  {
  	struct blk_mq_hw_ctx *hctx;
ee056f981   Omar Sandoval   blk-mq-sched: pro...
549
  	struct elevator_queue *eq;
6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
550
551
552
553
554
  	unsigned int i;
  	int ret;
  
  	if (!e) {
  		q->elevator = NULL;
32a50fabb   Ming Lei   blk-mq: update nr...
555
  		q->nr_requests = q->tag_set->queue_depth;
6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
556
557
  		return 0;
  	}
bd166ef18   Jens Axboe   blk-mq-sched: add...
558
559
  
  	/*
32825c45f   Ming Lei   blk-mq-sched: fix...
560
561
562
  	 * Default to double of smaller one between hw queue_depth and 128,
  	 * since we don't split into sync/async like the old code did.
  	 * Additionally, this is a per-hw queue depth.
bd166ef18   Jens Axboe   blk-mq-sched: add...
563
  	 */
32825c45f   Ming Lei   blk-mq-sched: fix...
564
565
  	q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
  				   BLKDEV_MAX_RQ);
bd166ef18   Jens Axboe   blk-mq-sched: add...
566

bd166ef18   Jens Axboe   blk-mq-sched: add...
567
  	queue_for_each_hw_ctx(q, hctx, i) {
6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
568
  		ret = blk_mq_sched_alloc_tags(q, hctx, i);
bd166ef18   Jens Axboe   blk-mq-sched: add...
569
  		if (ret)
6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
570
  			goto err;
bd166ef18   Jens Axboe   blk-mq-sched: add...
571
  	}
6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
572
573
574
  	ret = e->ops.mq.init_sched(q, e);
  	if (ret)
  		goto err;
bd166ef18   Jens Axboe   blk-mq-sched: add...
575

d332ce091   Omar Sandoval   blk-mq-debugfs: a...
576
577
578
579
  	blk_mq_debugfs_register_sched(q);
  
  	queue_for_each_hw_ctx(q, hctx, i) {
  		if (e->ops.mq.init_hctx) {
ee056f981   Omar Sandoval   blk-mq-sched: pro...
580
581
582
583
584
585
586
587
  			ret = e->ops.mq.init_hctx(hctx, i);
  			if (ret) {
  				eq = q->elevator;
  				blk_mq_exit_sched(q, eq);
  				kobject_put(&eq->kobj);
  				return ret;
  			}
  		}
d332ce091   Omar Sandoval   blk-mq-debugfs: a...
588
  		blk_mq_debugfs_register_sched_hctx(q, hctx);
ee056f981   Omar Sandoval   blk-mq-sched: pro...
589
  	}
bd166ef18   Jens Axboe   blk-mq-sched: add...
590
  	return 0;
bd166ef18   Jens Axboe   blk-mq-sched: add...
591

6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
592
  err:
54d5329d4   Omar Sandoval   blk-mq-sched: fix...
593
594
  	blk_mq_sched_tags_teardown(q);
  	q->elevator = NULL;
6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
595
  	return ret;
bd166ef18   Jens Axboe   blk-mq-sched: add...
596
  }
d34849913   Jens Axboe   blk-mq-sched: all...
597

54d5329d4   Omar Sandoval   blk-mq-sched: fix...
598
599
  void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
  {
ee056f981   Omar Sandoval   blk-mq-sched: pro...
600
601
  	struct blk_mq_hw_ctx *hctx;
  	unsigned int i;
d332ce091   Omar Sandoval   blk-mq-debugfs: a...
602
603
604
605
606
  	queue_for_each_hw_ctx(q, hctx, i) {
  		blk_mq_debugfs_unregister_sched_hctx(hctx);
  		if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
  			e->type->ops.mq.exit_hctx(hctx, i);
  			hctx->sched_data = NULL;
ee056f981   Omar Sandoval   blk-mq-sched: pro...
607
608
  		}
  	}
d332ce091   Omar Sandoval   blk-mq-debugfs: a...
609
  	blk_mq_debugfs_unregister_sched(q);
54d5329d4   Omar Sandoval   blk-mq-sched: fix...
610
611
612
613
614
  	if (e->type->ops.mq.exit_sched)
  		e->type->ops.mq.exit_sched(e);
  	blk_mq_sched_tags_teardown(q);
  	q->elevator = NULL;
  }