Blame view

block/blk-mq-sched.c 17 KB
3dcf60bcb   Christoph Hellwig   block: add SPDX t...
1
  // SPDX-License-Identifier: GPL-2.0
bd166ef18   Jens Axboe   blk-mq-sched: add...
2
3
4
5
6
7
8
9
  /*
   * blk-mq scheduling framework
   *
   * Copyright (C) 2016 Jens Axboe
   */
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/blk-mq.h>
6e6fcbc27   Ming Lei   blk-mq: support b...
10
  #include <linux/list_sort.h>
bd166ef18   Jens Axboe   blk-mq-sched: add...
11
12
13
14
15
  
  #include <trace/events/block.h>
  
  #include "blk.h"
  #include "blk-mq.h"
d332ce091   Omar Sandoval   blk-mq-debugfs: a...
16
  #include "blk-mq-debugfs.h"
bd166ef18   Jens Axboe   blk-mq-sched: add...
17
18
19
  #include "blk-mq-sched.h"
  #include "blk-mq-tag.h"
  #include "blk-wbt.h"
e2b3fa5af   Damien Le Moal   block: Remove bio...
20
  void blk_mq_sched_assign_ioc(struct request *rq)
bd166ef18   Jens Axboe   blk-mq-sched: add...
21
  {
44e8c2bff   Christoph Hellwig   blk-mq: refactor ...
22
  	struct request_queue *q = rq->q;
0c62bff1f   Jens Axboe   block: fix attemp...
23
  	struct io_context *ioc;
bd166ef18   Jens Axboe   blk-mq-sched: add...
24
  	struct io_cq *icq;
0c62bff1f   Jens Axboe   block: fix attemp...
25
26
27
28
29
30
  	/*
  	 * May not have an IO context if it's a passthrough request
  	 */
  	ioc = current->io_context;
  	if (!ioc)
  		return;
0d945c1f9   Christoph Hellwig   block: remove the...
31
  	spin_lock_irq(&q->queue_lock);
bd166ef18   Jens Axboe   blk-mq-sched: add...
32
  	icq = ioc_lookup_icq(ioc, q);
0d945c1f9   Christoph Hellwig   block: remove the...
33
  	spin_unlock_irq(&q->queue_lock);
bd166ef18   Jens Axboe   blk-mq-sched: add...
34
35
36
37
38
39
  
  	if (!icq) {
  		icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
  		if (!icq)
  			return;
  	}
ea511e3c2   Christoph Hellwig   blk-mq: remove bl...
40
  	get_io_context(icq->ioc);
44e8c2bff   Christoph Hellwig   blk-mq: refactor ...
41
  	rq->elv.icq = icq;
bd166ef18   Jens Axboe   blk-mq-sched: add...
42
  }
8e8320c93   Jens Axboe   blk-mq: fix perfo...
43
44
45
46
  /*
   * Mark a hardware queue as needing a restart. For shared queues, maintain
   * a count of how many hardware queues are marked for restart.
   */
7211aef86   Damien Le Moal   block: mq-deadlin...
47
  void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
8e8320c93   Jens Axboe   blk-mq: fix perfo...
48
49
50
  {
  	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
  		return;
97889f9ac   Ming Lei   blk-mq: remove sy...
51
  	set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
8e8320c93   Jens Axboe   blk-mq: fix perfo...
52
  }
7211aef86   Damien Le Moal   block: mq-deadlin...
53
  EXPORT_SYMBOL_GPL(blk_mq_sched_mark_restart_hctx);
8e8320c93   Jens Axboe   blk-mq: fix perfo...
54

97889f9ac   Ming Lei   blk-mq: remove sy...
55
  void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
8e8320c93   Jens Axboe   blk-mq: fix perfo...
56
57
  {
  	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
97889f9ac   Ming Lei   blk-mq: remove sy...
58
59
  		return;
  	clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
8e8320c93   Jens Axboe   blk-mq: fix perfo...
60

d7d8535f3   Ming Lei   blk-mq: order add...
61
62
63
64
65
66
67
68
  	/*
  	 * Order clearing SCHED_RESTART and list_empty_careful(&hctx->dispatch)
  	 * in blk_mq_run_hw_queue(). Its pair is the barrier in
  	 * blk_mq_dispatch_rq_list(). So dispatch code won't see SCHED_RESTART,
  	 * meantime new request added to hctx->dispatch is missed to check in
  	 * blk_mq_run_hw_queue().
  	 */
  	smp_mb();
97889f9ac   Ming Lei   blk-mq: remove sy...
69
  	blk_mq_run_hw_queue(hctx, true);
8e8320c93   Jens Axboe   blk-mq: fix perfo...
70
  }
6e6fcbc27   Ming Lei   blk-mq: support b...
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
  static int sched_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
  {
  	struct request *rqa = container_of(a, struct request, queuelist);
  	struct request *rqb = container_of(b, struct request, queuelist);
  
  	return rqa->mq_hctx > rqb->mq_hctx;
  }
  
  static bool blk_mq_dispatch_hctx_list(struct list_head *rq_list)
  {
  	struct blk_mq_hw_ctx *hctx =
  		list_first_entry(rq_list, struct request, queuelist)->mq_hctx;
  	struct request *rq;
  	LIST_HEAD(hctx_list);
  	unsigned int count = 0;
6e6fcbc27   Ming Lei   blk-mq: support b...
86
87
88
89
90
91
92
93
94
95
96
  
  	list_for_each_entry(rq, rq_list, queuelist) {
  		if (rq->mq_hctx != hctx) {
  			list_cut_before(&hctx_list, rq_list, &rq->queuelist);
  			goto dispatch;
  		}
  		count++;
  	}
  	list_splice_tail_init(rq_list, &hctx_list);
  
  dispatch:
106e71c51   Baolin Wang   blk-mq: Remove un...
97
  	return blk_mq_dispatch_rq_list(hctx, &hctx_list, count);
6e6fcbc27   Ming Lei   blk-mq: support b...
98
  }
a0823421a   Douglas Anderson   blk-mq: Rerun dis...
99
  #define BLK_MQ_BUDGET_DELAY	3		/* ms units */
1f460b63d   Ming Lei   blk-mq: don't res...
100
101
102
103
  /*
   * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
   * its queue by itself in its completion handler, so we don't need to
   * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
28d65729b   Salman Qazi   block: Limit numb...
104
105
106
   *
   * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
   * be run again.  This is necessary to avoid starving flushes.
1f460b63d   Ming Lei   blk-mq: don't res...
107
   */
6e6fcbc27   Ming Lei   blk-mq: support b...
108
  static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
caf8eb0d6   Ming Lei   blk-mq-sched: mov...
109
110
111
  {
  	struct request_queue *q = hctx->queue;
  	struct elevator_queue *e = q->elevator;
6e6fcbc27   Ming Lei   blk-mq: support b...
112
113
114
  	bool multi_hctxs = false, run_queue = false;
  	bool dispatched = false, busy = false;
  	unsigned int max_dispatch;
caf8eb0d6   Ming Lei   blk-mq-sched: mov...
115
  	LIST_HEAD(rq_list);
6e6fcbc27   Ming Lei   blk-mq: support b...
116
117
118
119
120
121
  	int count = 0;
  
  	if (hctx->dispatch_busy)
  		max_dispatch = 1;
  	else
  		max_dispatch = hctx->queue->nr_requests;
caf8eb0d6   Ming Lei   blk-mq-sched: mov...
122
123
  
  	do {
6e6fcbc27   Ming Lei   blk-mq: support b...
124
  		struct request *rq;
f9cd4bfe9   Jens Axboe   block: get rid of...
125
  		if (e->type->ops.has_work && !e->type->ops.has_work(hctx))
caf8eb0d6   Ming Lei   blk-mq-sched: mov...
126
  			break;
de1482974   Ming Lei   blk-mq: introduce...
127

28d65729b   Salman Qazi   block: Limit numb...
128
  		if (!list_empty_careful(&hctx->dispatch)) {
6e6fcbc27   Ming Lei   blk-mq: support b...
129
  			busy = true;
28d65729b   Salman Qazi   block: Limit numb...
130
131
  			break;
  		}
65c763694   Ming Lei   blk-mq: pass requ...
132
  		if (!blk_mq_get_dispatch_budget(q))
1f460b63d   Ming Lei   blk-mq: don't res...
133
  			break;
de1482974   Ming Lei   blk-mq: introduce...
134

f9cd4bfe9   Jens Axboe   block: get rid of...
135
  		rq = e->type->ops.dispatch_request(hctx);
de1482974   Ming Lei   blk-mq: introduce...
136
  		if (!rq) {
65c763694   Ming Lei   blk-mq: pass requ...
137
  			blk_mq_put_dispatch_budget(q);
a0823421a   Douglas Anderson   blk-mq: Rerun dis...
138
139
140
141
142
143
144
  			/*
  			 * We're releasing without dispatching. Holding the
  			 * budget could have blocked any "hctx"s with the
  			 * same queue and if we didn't dispatch then there's
  			 * no guarantee anyone will kick the queue.  Kick it
  			 * ourselves.
  			 */
6e6fcbc27   Ming Lei   blk-mq: support b...
145
  			run_queue = true;
de1482974   Ming Lei   blk-mq: introduce...
146
  			break;
de1482974   Ming Lei   blk-mq: introduce...
147
148
149
150
151
152
153
  		}
  
  		/*
  		 * Now this rq owns the budget which has to be released
  		 * if this rq won't be queued to driver via .queue_rq()
  		 * in blk_mq_dispatch_rq_list().
  		 */
6e6fcbc27   Ming Lei   blk-mq: support b...
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
  		list_add_tail(&rq->queuelist, &rq_list);
  		if (rq->mq_hctx != hctx)
  			multi_hctxs = true;
  	} while (++count < max_dispatch);
  
  	if (!count) {
  		if (run_queue)
  			blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
  	} else if (multi_hctxs) {
  		/*
  		 * Requests from different hctx may be dequeued from some
  		 * schedulers, such as bfq and deadline.
  		 *
  		 * Sort the requests in the list according to their hctx,
  		 * dispatch batching requests from same hctx at a time.
  		 */
  		list_sort(NULL, &rq_list, sched_rq_cmp);
  		do {
  			dispatched |= blk_mq_dispatch_hctx_list(&rq_list);
  		} while (!list_empty(&rq_list));
  	} else {
  		dispatched = blk_mq_dispatch_rq_list(hctx, &rq_list, count);
  	}
  
  	if (busy)
  		return -EAGAIN;
  	return !!dispatched;
  }
  
  static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
  {
  	int ret;
  
  	do {
  		ret = __blk_mq_do_dispatch_sched(hctx);
  	} while (ret == 1);
28d65729b   Salman Qazi   block: Limit numb...
190
191
  
  	return ret;
caf8eb0d6   Ming Lei   blk-mq-sched: mov...
192
  }
b347689ff   Ming Lei   blk-mq-sched: imp...
193
194
195
  static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
  					  struct blk_mq_ctx *ctx)
  {
f31967f0e   Jens Axboe   blk-mq: allow sof...
196
  	unsigned short idx = ctx->index_hw[hctx->type];
b347689ff   Ming Lei   blk-mq-sched: imp...
197
198
199
200
201
202
  
  	if (++idx == hctx->nr_ctx)
  		idx = 0;
  
  	return hctx->ctxs[idx];
  }
1f460b63d   Ming Lei   blk-mq: don't res...
203
204
205
206
  /*
   * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
   * its queue by itself in its completion handler, so we don't need to
   * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
28d65729b   Salman Qazi   block: Limit numb...
207
208
   *
   * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
c4aecaa25   Randy Dunlap   block: blk-mq-sch...
209
   * be run again.  This is necessary to avoid starving flushes.
1f460b63d   Ming Lei   blk-mq: don't res...
210
   */
28d65729b   Salman Qazi   block: Limit numb...
211
  static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
b347689ff   Ming Lei   blk-mq-sched: imp...
212
213
214
215
  {
  	struct request_queue *q = hctx->queue;
  	LIST_HEAD(rq_list);
  	struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
28d65729b   Salman Qazi   block: Limit numb...
216
  	int ret = 0;
445874e89   Ming Lei   blk-mq: pass hctx...
217
  	struct request *rq;
b347689ff   Ming Lei   blk-mq-sched: imp...
218
219
  
  	do {
28d65729b   Salman Qazi   block: Limit numb...
220
221
222
223
  		if (!list_empty_careful(&hctx->dispatch)) {
  			ret = -EAGAIN;
  			break;
  		}
b347689ff   Ming Lei   blk-mq-sched: imp...
224
225
  		if (!sbitmap_any_bit_set(&hctx->ctx_map))
  			break;
65c763694   Ming Lei   blk-mq: pass requ...
226
  		if (!blk_mq_get_dispatch_budget(q))
1f460b63d   Ming Lei   blk-mq: don't res...
227
  			break;
b347689ff   Ming Lei   blk-mq-sched: imp...
228
229
230
  
  		rq = blk_mq_dequeue_from_ctx(hctx, ctx);
  		if (!rq) {
65c763694   Ming Lei   blk-mq: pass requ...
231
  			blk_mq_put_dispatch_budget(q);
a0823421a   Douglas Anderson   blk-mq: Rerun dis...
232
233
234
235
236
237
238
239
  			/*
  			 * We're releasing without dispatching. Holding the
  			 * budget could have blocked any "hctx"s with the
  			 * same queue and if we didn't dispatch then there's
  			 * no guarantee anyone will kick the queue.  Kick it
  			 * ourselves.
  			 */
  			blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
b347689ff   Ming Lei   blk-mq-sched: imp...
240
  			break;
b347689ff   Ming Lei   blk-mq-sched: imp...
241
242
243
244
245
246
247
248
249
250
251
  		}
  
  		/*
  		 * Now this rq owns the budget which has to be released
  		 * if this rq won't be queued to driver via .queue_rq()
  		 * in blk_mq_dispatch_rq_list().
  		 */
  		list_add(&rq->queuelist, &rq_list);
  
  		/* round robin for fair dispatch */
  		ctx = blk_mq_next_ctx(hctx, rq->mq_ctx);
1fd40b5ea   Ming Lei   blk-mq: pass obta...
252
  	} while (blk_mq_dispatch_rq_list(rq->mq_hctx, &rq_list, 1));
b347689ff   Ming Lei   blk-mq-sched: imp...
253
254
  
  	WRITE_ONCE(hctx->dispatch_from, ctx);
28d65729b   Salman Qazi   block: Limit numb...
255
  	return ret;
b347689ff   Ming Lei   blk-mq-sched: imp...
256
  }
e1b586f2b   Zheng Bin   blk-mq: make func...
257
  static int __blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
bd166ef18   Jens Axboe   blk-mq-sched: add...
258
  {
81380ca10   Omar Sandoval   blk-mq: use the r...
259
260
  	struct request_queue *q = hctx->queue;
  	struct elevator_queue *e = q->elevator;
f9cd4bfe9   Jens Axboe   block: get rid of...
261
  	const bool has_sched_dispatch = e && e->type->ops.dispatch_request;
28d65729b   Salman Qazi   block: Limit numb...
262
  	int ret = 0;
bd166ef18   Jens Axboe   blk-mq-sched: add...
263
  	LIST_HEAD(rq_list);
bd166ef18   Jens Axboe   blk-mq-sched: add...
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
  	/*
  	 * If we have previous entries on our dispatch list, grab them first for
  	 * more fair dispatch.
  	 */
  	if (!list_empty_careful(&hctx->dispatch)) {
  		spin_lock(&hctx->lock);
  		if (!list_empty(&hctx->dispatch))
  			list_splice_init(&hctx->dispatch, &rq_list);
  		spin_unlock(&hctx->lock);
  	}
  
  	/*
  	 * Only ask the scheduler for requests, if we didn't have residual
  	 * requests from the dispatch list. This is to avoid the case where
  	 * we only ever dispatch a fraction of the requests available because
  	 * of low device queue depth. Once we pull requests out of the IO
  	 * scheduler, we can no longer merge or sort them. So it's best to
  	 * leave them there for as long as we can. Mark the hw queue as
  	 * needing a restart in that case.
caf8eb0d6   Ming Lei   blk-mq-sched: mov...
283
284
285
286
  	 *
  	 * We want to dispatch from the scheduler if there was nothing
  	 * on the dispatch list or we were able to dispatch from the
  	 * dispatch list.
bd166ef18   Jens Axboe   blk-mq-sched: add...
287
  	 */
c13660a08   Jens Axboe   blk-mq-sched: cha...
288
  	if (!list_empty(&rq_list)) {
d38d35155   Omar Sandoval   blk-mq-sched: sep...
289
  		blk_mq_sched_mark_restart_hctx(hctx);
1fd40b5ea   Ming Lei   blk-mq: pass obta...
290
  		if (blk_mq_dispatch_rq_list(hctx, &rq_list, 0)) {
b347689ff   Ming Lei   blk-mq-sched: imp...
291
  			if (has_sched_dispatch)
28d65729b   Salman Qazi   block: Limit numb...
292
  				ret = blk_mq_do_dispatch_sched(hctx);
b347689ff   Ming Lei   blk-mq-sched: imp...
293
  			else
28d65729b   Salman Qazi   block: Limit numb...
294
  				ret = blk_mq_do_dispatch_ctx(hctx);
b347689ff   Ming Lei   blk-mq-sched: imp...
295
  		}
caf8eb0d6   Ming Lei   blk-mq-sched: mov...
296
  	} else if (has_sched_dispatch) {
28d65729b   Salman Qazi   block: Limit numb...
297
  		ret = blk_mq_do_dispatch_sched(hctx);
6e7687173   Ming Lei   blk-mq: dequeue r...
298
299
  	} else if (hctx->dispatch_busy) {
  		/* dequeue request one by one from sw queue if queue is busy */
28d65729b   Salman Qazi   block: Limit numb...
300
  		ret = blk_mq_do_dispatch_ctx(hctx);
caf8eb0d6   Ming Lei   blk-mq-sched: mov...
301
  	} else {
c13660a08   Jens Axboe   blk-mq-sched: cha...
302
  		blk_mq_flush_busy_ctxs(hctx, &rq_list);
1fd40b5ea   Ming Lei   blk-mq: pass obta...
303
  		blk_mq_dispatch_rq_list(hctx, &rq_list, 0);
64765a75e   Jens Axboe   blk-mq-sched: ask...
304
  	}
28d65729b   Salman Qazi   block: Limit numb...
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
  
  	return ret;
  }
  
  void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
  {
  	struct request_queue *q = hctx->queue;
  
  	/* RCU or SRCU read lock is needed before checking quiesced flag */
  	if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
  		return;
  
  	hctx->run++;
  
  	/*
  	 * A return of -EAGAIN is an indication that hctx->dispatch is not
  	 * empty and we must run again in order to avoid starving flushes.
  	 */
  	if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN) {
  		if (__blk_mq_sched_dispatch_requests(hctx) == -EAGAIN)
  			blk_mq_run_hw_queue(hctx, true);
  	}
bd166ef18   Jens Axboe   blk-mq-sched: add...
327
  }
14ccb66b3   Christoph Hellwig   block: remove the...
328
329
  bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
  		unsigned int nr_segs)
bd166ef18   Jens Axboe   blk-mq-sched: add...
330
331
  {
  	struct elevator_queue *e = q->elevator;
9bddeb2a5   Ming Lei   blk-mq: make per-...
332
  	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
8ccdf4a37   Jianchao Wang   blk-mq: save queu...
333
  	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
9bddeb2a5   Ming Lei   blk-mq: make per-...
334
  	bool ret = false;
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
335
  	enum hctx_type type;
bd166ef18   Jens Axboe   blk-mq-sched: add...
336

c05f42206   Bart Van Assche   blk-mq: remove bl...
337
  	if (e && e->type->ops.bio_merge)
14ccb66b3   Christoph Hellwig   block: remove the...
338
  		return e->type->ops.bio_merge(hctx, bio, nr_segs);
bd166ef18   Jens Axboe   blk-mq-sched: add...
339

c16d6b5a9   Ming Lei   blk-mq: fix dispa...
340
  	type = hctx->type;
cdfcef9ee   Baolin Wang   block: Remove blk...
341
342
343
344
345
346
347
348
349
350
351
352
353
354
  	if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) ||
  	    list_empty_careful(&ctx->rq_lists[type]))
  		return false;
  
  	/* default per sw-queue merge */
  	spin_lock(&ctx->lock);
  	/*
  	 * Reverse check our software queue for entries that we could
  	 * potentially merge with. Currently includes a hand-wavy stop
  	 * count of 8, to not spend too much time checking for merges.
  	 */
  	if (blk_bio_list_merge(q, &ctx->rq_lists[type], bio, nr_segs)) {
  		ctx->rq_merged++;
  		ret = true;
9bddeb2a5   Ming Lei   blk-mq: make per-...
355
  	}
cdfcef9ee   Baolin Wang   block: Remove blk...
356
  	spin_unlock(&ctx->lock);
9bddeb2a5   Ming Lei   blk-mq: make per-...
357
  	return ret;
bd166ef18   Jens Axboe   blk-mq-sched: add...
358
359
360
361
362
363
364
365
366
367
368
369
370
  }
  
  bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
  {
  	return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
  }
  EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
  
  void blk_mq_sched_request_inserted(struct request *rq)
  {
  	trace_block_rq_insert(rq->q, rq);
  }
  EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
0cacba6cf   Omar Sandoval   blk-mq-sched: byp...
371
  static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
a6a252e64   Ming Lei   blk-mq-sched: dec...
372
  				       bool has_sched,
0cacba6cf   Omar Sandoval   blk-mq-sched: byp...
373
  				       struct request *rq)
bd166ef18   Jens Axboe   blk-mq-sched: add...
374
  {
01e99aeca   Ming Lei   blk-mq: insert pa...
375
376
377
378
379
380
381
382
383
384
385
386
  	/*
  	 * dispatch flush and passthrough rq directly
  	 *
  	 * passthrough request has to be added to hctx->dispatch directly.
  	 * For some reason, device may be in one situation which can't
  	 * handle FS request, so STS_RESOURCE is always returned and the
  	 * FS request will be added to hctx->dispatch. However passthrough
  	 * request may be required at that time for fixing the problem. If
  	 * passthrough request is added to scheduler queue, there isn't any
  	 * chance to dispatch it given we prioritize requests in hctx->dispatch.
  	 */
  	if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq))
a6a252e64   Ming Lei   blk-mq-sched: dec...
387
  		return true;
a6a252e64   Ming Lei   blk-mq-sched: dec...
388

923218f61   Ming Lei   blk-mq: don't all...
389
  	if (has_sched)
bd166ef18   Jens Axboe   blk-mq-sched: add...
390
  		rq->rq_flags |= RQF_SORTED;
bd166ef18   Jens Axboe   blk-mq-sched: add...
391

a6a252e64   Ming Lei   blk-mq-sched: dec...
392
  	return false;
bd166ef18   Jens Axboe   blk-mq-sched: add...
393
  }
bd166ef18   Jens Axboe   blk-mq-sched: add...
394

bd6737f1a   Jens Axboe   blk-mq-sched: add...
395
  void blk_mq_sched_insert_request(struct request *rq, bool at_head,
9e97d2951   Mike Snitzer   blk-mq-sched: rem...
396
  				 bool run_queue, bool async)
bd6737f1a   Jens Axboe   blk-mq-sched: add...
397
398
399
400
  {
  	struct request_queue *q = rq->q;
  	struct elevator_queue *e = q->elevator;
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
ea4f995ee   Jens Axboe   blk-mq: cache req...
401
  	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
bd6737f1a   Jens Axboe   blk-mq-sched: add...
402

e44a6a235   Xianting Tian   blk-mq: use BLK_M...
403
  	WARN_ON(e && (rq->tag != BLK_MQ_NO_TAG));
923218f61   Ming Lei   blk-mq: don't all...
404

01e99aeca   Ming Lei   blk-mq: insert pa...
405
  	if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) {
cc3200eac   Ming Lei   blk-mq: insert fl...
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
  		/*
  		 * Firstly normal IO request is inserted to scheduler queue or
  		 * sw queue, meantime we add flush request to dispatch queue(
  		 * hctx->dispatch) directly and there is at most one in-flight
  		 * flush request for each hw queue, so it doesn't matter to add
  		 * flush request to tail or front of the dispatch queue.
  		 *
  		 * Secondly in case of NCQ, flush request belongs to non-NCQ
  		 * command, and queueing it will fail when there is any
  		 * in-flight normal IO request(NCQ command). When adding flush
  		 * rq to the front of hctx->dispatch, it is easier to introduce
  		 * extra time to flush rq's latency because of S_SCHED_RESTART
  		 * compared with adding to the tail of dispatch queue, then
  		 * chance of flush merge is increased, and less flush requests
  		 * will be issued to controller. It is observed that ~10% time
  		 * is saved in blktests block/004 on disk attached to AHCI/NCQ
  		 * drive when adding flush rq to the front of hctx->dispatch.
  		 *
  		 * Simply queue flush rq to the front of hctx->dispatch so that
  		 * intensive flush workloads can benefit in case of NCQ HW.
  		 */
  		at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head;
01e99aeca   Ming Lei   blk-mq: insert pa...
428
  		blk_mq_request_bypass_insert(rq, at_head, false);
0cacba6cf   Omar Sandoval   blk-mq-sched: byp...
429
  		goto run;
01e99aeca   Ming Lei   blk-mq: insert pa...
430
  	}
0cacba6cf   Omar Sandoval   blk-mq-sched: byp...
431

f9cd4bfe9   Jens Axboe   block: get rid of...
432
  	if (e && e->type->ops.insert_requests) {
bd6737f1a   Jens Axboe   blk-mq-sched: add...
433
434
435
  		LIST_HEAD(list);
  
  		list_add(&rq->queuelist, &list);
f9cd4bfe9   Jens Axboe   block: get rid of...
436
  		e->type->ops.insert_requests(hctx, &list, at_head);
bd6737f1a   Jens Axboe   blk-mq-sched: add...
437
438
439
440
441
  	} else {
  		spin_lock(&ctx->lock);
  		__blk_mq_insert_request(hctx, rq, at_head);
  		spin_unlock(&ctx->lock);
  	}
0cacba6cf   Omar Sandoval   blk-mq-sched: byp...
442
  run:
bd6737f1a   Jens Axboe   blk-mq-sched: add...
443
444
445
  	if (run_queue)
  		blk_mq_run_hw_queue(hctx, async);
  }
67cae4c94   Jens Axboe   blk-mq: cleanup a...
446
  void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
bd6737f1a   Jens Axboe   blk-mq-sched: add...
447
448
449
  				  struct blk_mq_ctx *ctx,
  				  struct list_head *list, bool run_queue_async)
  {
f9afca4d3   Jens Axboe   blk-mq: pass in r...
450
  	struct elevator_queue *e;
e87eb301b   Ming Lei   blk-mq: grab .q_u...
451
452
453
454
455
456
457
458
  	struct request_queue *q = hctx->queue;
  
  	/*
  	 * blk_mq_sched_insert_requests() is called from flush plug
  	 * context only, and hold one usage counter to prevent queue
  	 * from being released.
  	 */
  	percpu_ref_get(&q->q_usage_counter);
bd6737f1a   Jens Axboe   blk-mq-sched: add...
459

f9afca4d3   Jens Axboe   blk-mq: pass in r...
460
  	e = hctx->queue->elevator;
f9cd4bfe9   Jens Axboe   block: get rid of...
461
462
  	if (e && e->type->ops.insert_requests)
  		e->type->ops.insert_requests(hctx, list, false);
6ce3dd6ee   Ming Lei   blk-mq: issue dir...
463
464
465
466
467
468
  	else {
  		/*
  		 * try to issue requests directly if the hw queue isn't
  		 * busy in case of 'none' scheduler, and this way may save
  		 * us one extra enqueue & dequeue to sw queue.
  		 */
fd9c40f64   Bart Van Assche   block: Revert v5....
469
  		if (!hctx->dispatch_busy && !e && !run_queue_async) {
6ce3dd6ee   Ming Lei   blk-mq: issue dir...
470
  			blk_mq_try_issue_list_directly(hctx, list);
fd9c40f64   Bart Van Assche   block: Revert v5....
471
  			if (list_empty(list))
e87eb301b   Ming Lei   blk-mq: grab .q_u...
472
  				goto out;
fd9c40f64   Bart Van Assche   block: Revert v5....
473
474
  		}
  		blk_mq_insert_requests(hctx, ctx, list);
6ce3dd6ee   Ming Lei   blk-mq: issue dir...
475
  	}
bd6737f1a   Jens Axboe   blk-mq-sched: add...
476
477
  
  	blk_mq_run_hw_queue(hctx, run_queue_async);
e87eb301b   Ming Lei   blk-mq: grab .q_u...
478
479
   out:
  	percpu_ref_put(&q->q_usage_counter);
bd6737f1a   Jens Axboe   blk-mq-sched: add...
480
  }
bd166ef18   Jens Axboe   blk-mq-sched: add...
481
482
483
484
  static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
  				   struct blk_mq_hw_ctx *hctx,
  				   unsigned int hctx_idx)
  {
32bc15afe   John Garry   blk-mq: Facilitat...
485
  	unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
1c0706a70   John Garry   blk-mq: Pass flag...
486

bd166ef18   Jens Axboe   blk-mq-sched: add...
487
488
  	if (hctx->sched_tags) {
  		blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
1c0706a70   John Garry   blk-mq: Pass flag...
489
  		blk_mq_free_rq_map(hctx->sched_tags, flags);
bd166ef18   Jens Axboe   blk-mq-sched: add...
490
491
492
  		hctx->sched_tags = NULL;
  	}
  }
6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
493
494
495
496
497
  static int blk_mq_sched_alloc_tags(struct request_queue *q,
  				   struct blk_mq_hw_ctx *hctx,
  				   unsigned int hctx_idx)
  {
  	struct blk_mq_tag_set *set = q->tag_set;
32bc15afe   John Garry   blk-mq: Facilitat...
498
499
  	/* Clear HCTX_SHARED so tags are init'ed */
  	unsigned int flags = set->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
500
501
502
  	int ret;
  
  	hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests,
1c0706a70   John Garry   blk-mq: Pass flag...
503
  					       set->reserved_tags, flags);
6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
504
505
506
507
508
509
510
511
512
  	if (!hctx->sched_tags)
  		return -ENOMEM;
  
  	ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
  	if (ret)
  		blk_mq_sched_free_tags(set, hctx, hctx_idx);
  
  	return ret;
  }
c3e221921   Ming Lei   block: free sched...
513
  /* called in queue's release handler, tagset has gone away */
54d5329d4   Omar Sandoval   blk-mq-sched: fix...
514
  static void blk_mq_sched_tags_teardown(struct request_queue *q)
bd166ef18   Jens Axboe   blk-mq-sched: add...
515
  {
bd166ef18   Jens Axboe   blk-mq-sched: add...
516
  	struct blk_mq_hw_ctx *hctx;
6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
517
  	int i;
c3e221921   Ming Lei   block: free sched...
518
  	queue_for_each_hw_ctx(q, hctx, i) {
32bc15afe   John Garry   blk-mq: Facilitat...
519
520
  		/* Clear HCTX_SHARED so tags are freed */
  		unsigned int flags = hctx->flags & ~BLK_MQ_F_TAG_HCTX_SHARED;
1c0706a70   John Garry   blk-mq: Pass flag...
521

c3e221921   Ming Lei   block: free sched...
522
  		if (hctx->sched_tags) {
1c0706a70   John Garry   blk-mq: Pass flag...
523
  			blk_mq_free_rq_map(hctx->sched_tags, flags);
c3e221921   Ming Lei   block: free sched...
524
525
526
  			hctx->sched_tags = NULL;
  		}
  	}
6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
527
528
529
530
531
  }
  
  int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
  {
  	struct blk_mq_hw_ctx *hctx;
ee056f981   Omar Sandoval   blk-mq-sched: pro...
532
  	struct elevator_queue *eq;
6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
533
534
535
536
537
  	unsigned int i;
  	int ret;
  
  	if (!e) {
  		q->elevator = NULL;
32a50fabb   Ming Lei   blk-mq: update nr...
538
  		q->nr_requests = q->tag_set->queue_depth;
6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
539
540
  		return 0;
  	}
bd166ef18   Jens Axboe   blk-mq-sched: add...
541
542
  
  	/*
32825c45f   Ming Lei   blk-mq-sched: fix...
543
544
545
  	 * Default to double of smaller one between hw queue_depth and 128,
  	 * since we don't split into sync/async like the old code did.
  	 * Additionally, this is a per-hw queue depth.
bd166ef18   Jens Axboe   blk-mq-sched: add...
546
  	 */
32825c45f   Ming Lei   blk-mq-sched: fix...
547
548
  	q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
  				   BLKDEV_MAX_RQ);
bd166ef18   Jens Axboe   blk-mq-sched: add...
549

bd166ef18   Jens Axboe   blk-mq-sched: add...
550
  	queue_for_each_hw_ctx(q, hctx, i) {
6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
551
  		ret = blk_mq_sched_alloc_tags(q, hctx, i);
bd166ef18   Jens Axboe   blk-mq-sched: add...
552
  		if (ret)
6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
553
  			goto err;
bd166ef18   Jens Axboe   blk-mq-sched: add...
554
  	}
f9cd4bfe9   Jens Axboe   block: get rid of...
555
  	ret = e->ops.init_sched(q, e);
6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
556
557
  	if (ret)
  		goto err;
bd166ef18   Jens Axboe   blk-mq-sched: add...
558

d332ce091   Omar Sandoval   blk-mq-debugfs: a...
559
560
561
  	blk_mq_debugfs_register_sched(q);
  
  	queue_for_each_hw_ctx(q, hctx, i) {
f9cd4bfe9   Jens Axboe   block: get rid of...
562
563
  		if (e->ops.init_hctx) {
  			ret = e->ops.init_hctx(hctx, i);
ee056f981   Omar Sandoval   blk-mq-sched: pro...
564
565
  			if (ret) {
  				eq = q->elevator;
c3e221921   Ming Lei   block: free sched...
566
  				blk_mq_sched_free_requests(q);
ee056f981   Omar Sandoval   blk-mq-sched: pro...
567
568
569
570
571
  				blk_mq_exit_sched(q, eq);
  				kobject_put(&eq->kobj);
  				return ret;
  			}
  		}
d332ce091   Omar Sandoval   blk-mq-debugfs: a...
572
  		blk_mq_debugfs_register_sched_hctx(q, hctx);
ee056f981   Omar Sandoval   blk-mq-sched: pro...
573
  	}
bd166ef18   Jens Axboe   blk-mq-sched: add...
574
  	return 0;
bd166ef18   Jens Axboe   blk-mq-sched: add...
575

6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
576
  err:
c3e221921   Ming Lei   block: free sched...
577
  	blk_mq_sched_free_requests(q);
54d5329d4   Omar Sandoval   blk-mq-sched: fix...
578
579
  	blk_mq_sched_tags_teardown(q);
  	q->elevator = NULL;
6917ff0b5   Omar Sandoval   blk-mq-sched: ref...
580
  	return ret;
bd166ef18   Jens Axboe   blk-mq-sched: add...
581
  }
d34849913   Jens Axboe   blk-mq-sched: all...
582

c3e221921   Ming Lei   block: free sched...
583
584
585
586
587
588
589
590
  /*
   * called in either blk_queue_cleanup or elevator_switch, tagset
   * is required for freeing requests
   */
  void blk_mq_sched_free_requests(struct request_queue *q)
  {
  	struct blk_mq_hw_ctx *hctx;
  	int i;
c3e221921   Ming Lei   block: free sched...
591
592
593
594
595
  	queue_for_each_hw_ctx(q, hctx, i) {
  		if (hctx->sched_tags)
  			blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i);
  	}
  }
54d5329d4   Omar Sandoval   blk-mq-sched: fix...
596
597
  void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
  {
ee056f981   Omar Sandoval   blk-mq-sched: pro...
598
599
  	struct blk_mq_hw_ctx *hctx;
  	unsigned int i;
d332ce091   Omar Sandoval   blk-mq-debugfs: a...
600
601
  	queue_for_each_hw_ctx(q, hctx, i) {
  		blk_mq_debugfs_unregister_sched_hctx(hctx);
f9cd4bfe9   Jens Axboe   block: get rid of...
602
603
  		if (e->type->ops.exit_hctx && hctx->sched_data) {
  			e->type->ops.exit_hctx(hctx, i);
d332ce091   Omar Sandoval   blk-mq-debugfs: a...
604
  			hctx->sched_data = NULL;
ee056f981   Omar Sandoval   blk-mq-sched: pro...
605
606
  		}
  	}
d332ce091   Omar Sandoval   blk-mq-debugfs: a...
607
  	blk_mq_debugfs_unregister_sched(q);
f9cd4bfe9   Jens Axboe   block: get rid of...
608
609
  	if (e->type->ops.exit_sched)
  		e->type->ops.exit_sched(e);
54d5329d4   Omar Sandoval   blk-mq-sched: fix...
610
611
612
  	blk_mq_sched_tags_teardown(q);
  	q->elevator = NULL;
  }