Blame view

block/blk-mq.c 97.3 KB
3dcf60bcb   Christoph Hellwig   block: add SPDX t...
1
  // SPDX-License-Identifier: GPL-2.0
75bb4625b   Jens Axboe   blk-mq: add file ...
2
3
4
5
6
7
  /*
   * Block multiqueue core code
   *
   * Copyright (C) 2013-2014 Jens Axboe
   * Copyright (C) 2013-2014 Christoph Hellwig
   */
320ae51fe   Jens Axboe   blk-mq: new multi...
8
9
10
11
12
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/backing-dev.h>
  #include <linux/bio.h>
  #include <linux/blkdev.h>
f75782e4e   Catalin Marinas   block: kmemleak: ...
13
  #include <linux/kmemleak.h>
320ae51fe   Jens Axboe   blk-mq: new multi...
14
15
16
17
18
19
20
21
22
23
  #include <linux/mm.h>
  #include <linux/init.h>
  #include <linux/slab.h>
  #include <linux/workqueue.h>
  #include <linux/smp.h>
  #include <linux/llist.h>
  #include <linux/list_sort.h>
  #include <linux/cpu.h>
  #include <linux/cache.h>
  #include <linux/sched/sysctl.h>
105ab3d8c   Ingo Molnar   sched/headers: Pr...
24
  #include <linux/sched/topology.h>
174cd4b1e   Ingo Molnar   sched/headers: Pr...
25
  #include <linux/sched/signal.h>
320ae51fe   Jens Axboe   blk-mq: new multi...
26
  #include <linux/delay.h>
aedcd72f6   Jens Axboe   blk-mq: limit mem...
27
  #include <linux/crash_dump.h>
88c7b2b75   Jens Axboe   blk-mq: prefetch ...
28
  #include <linux/prefetch.h>
a892c8d52   Satya Tangirala   block: Inline enc...
29
  #include <linux/blk-crypto.h>
320ae51fe   Jens Axboe   blk-mq: new multi...
30
31
32
33
  
  #include <trace/events/block.h>
  
  #include <linux/blk-mq.h>
54d4e6ab9   Max Gurtovoy   block: centralize...
34
  #include <linux/t10-pi.h>
320ae51fe   Jens Axboe   blk-mq: new multi...
35
36
  #include "blk.h"
  #include "blk-mq.h"
9c1051aac   Omar Sandoval   blk-mq: untangle ...
37
  #include "blk-mq-debugfs.h"
320ae51fe   Jens Axboe   blk-mq: new multi...
38
  #include "blk-mq-tag.h"
986d413b7   Bart Van Assche   blk-mq: Enable su...
39
  #include "blk-pm.h"
cf43e6be8   Jens Axboe   block: add scalab...
40
  #include "blk-stat.h"
bd166ef18   Jens Axboe   blk-mq-sched: add...
41
  #include "blk-mq-sched.h"
c1c80384c   Josef Bacik   block: remove ext...
42
  #include "blk-rq-qos.h"
320ae51fe   Jens Axboe   blk-mq: new multi...
43

c3077b5d9   Christoph Hellwig   blk-mq: merge blk...
44
  static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
34dbad5d2   Omar Sandoval   blk-stat: convert...
45
46
  static void blk_mq_poll_stats_start(struct request_queue *q);
  static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
720b8ccc4   Stephen Bates   blk-mq: Add a pol...
47
48
  static int blk_mq_poll_stats_bkt(const struct request *rq)
  {
3d2443069   Hou Tao   block: make rq se...
49
  	int ddir, sectors, bucket;
720b8ccc4   Stephen Bates   blk-mq: Add a pol...
50

99c749a4c   Jens Axboe   blk-stat: kill bl...
51
  	ddir = rq_data_dir(rq);
3d2443069   Hou Tao   block: make rq se...
52
  	sectors = blk_rq_stats_sectors(rq);
720b8ccc4   Stephen Bates   blk-mq: Add a pol...
53

3d2443069   Hou Tao   block: make rq se...
54
  	bucket = ddir + 2 * ilog2(sectors);
720b8ccc4   Stephen Bates   blk-mq: Add a pol...
55
56
57
58
59
60
61
62
  
  	if (bucket < 0)
  		return -1;
  	else if (bucket >= BLK_MQ_POLL_STATS_BKTS)
  		return ddir + BLK_MQ_POLL_STATS_BKTS - 2;
  
  	return bucket;
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
63
  /*
85fae294e   Yufen Yu   blk-mq: update co...
64
65
   * Check if any of the ctx, dispatch list or elevator
   * have pending work in this hardware queue.
320ae51fe   Jens Axboe   blk-mq: new multi...
66
   */
79f720a75   Jens Axboe   blk-mq: only run ...
67
  static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
320ae51fe   Jens Axboe   blk-mq: new multi...
68
  {
79f720a75   Jens Axboe   blk-mq: only run ...
69
70
  	return !list_empty_careful(&hctx->dispatch) ||
  		sbitmap_any_bit_set(&hctx->ctx_map) ||
bd166ef18   Jens Axboe   blk-mq-sched: add...
71
  			blk_mq_sched_has_work(hctx);
1429d7c94   Jens Axboe   blk-mq: switch ct...
72
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
73
74
75
76
77
78
  /*
   * Mark this ctx as having pending work in this hardware queue
   */
  static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx,
  				     struct blk_mq_ctx *ctx)
  {
f31967f0e   Jens Axboe   blk-mq: allow sof...
79
80
81
82
  	const int bit = ctx->index_hw[hctx->type];
  
  	if (!sbitmap_test_bit(&hctx->ctx_map, bit))
  		sbitmap_set_bit(&hctx->ctx_map, bit);
1429d7c94   Jens Axboe   blk-mq: switch ct...
83
84
85
86
87
  }
  
  static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
  				      struct blk_mq_ctx *ctx)
  {
f31967f0e   Jens Axboe   blk-mq: allow sof...
88
89
90
  	const int bit = ctx->index_hw[hctx->type];
  
  	sbitmap_clear_bit(&hctx->ctx_map, bit);
320ae51fe   Jens Axboe   blk-mq: new multi...
91
  }
f299b7c7a   Jens Axboe   blk-mq: provide i...
92
93
  struct mq_inflight {
  	struct hd_struct *part;
a2e80f6f0   Pavel Begunkov   blk-mq: Embed cou...
94
  	unsigned int inflight[2];
f299b7c7a   Jens Axboe   blk-mq: provide i...
95
  };
7baa85727   Jens Axboe   blk-mq-tag: chang...
96
  static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
f299b7c7a   Jens Axboe   blk-mq: provide i...
97
98
99
100
  				  struct request *rq, void *priv,
  				  bool reserved)
  {
  	struct mq_inflight *mi = priv;
a926c7aff   Gabriel Krisman Bertazi   block: Consider o...
101
  	if (rq->part == mi->part && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT)
bb4e6b149   Pavel Begunkov   blk-mq: Reuse cal...
102
  		mi->inflight[rq_data_dir(rq)]++;
7baa85727   Jens Axboe   blk-mq-tag: chang...
103
104
  
  	return true;
f299b7c7a   Jens Axboe   blk-mq: provide i...
105
  }
e016b7820   Mikulas Patocka   block: return jus...
106
  unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part)
f299b7c7a   Jens Axboe   blk-mq: provide i...
107
  {
a2e80f6f0   Pavel Begunkov   blk-mq: Embed cou...
108
  	struct mq_inflight mi = { .part = part };
f299b7c7a   Jens Axboe   blk-mq: provide i...
109

f299b7c7a   Jens Axboe   blk-mq: provide i...
110
  	blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
e016b7820   Mikulas Patocka   block: return jus...
111

a2e80f6f0   Pavel Begunkov   blk-mq: Embed cou...
112
  	return mi.inflight[0] + mi.inflight[1];
bf0ddaba6   Omar Sandoval   blk-mq: fix sysfs...
113
114
115
116
117
  }
  
  void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
  			 unsigned int inflight[2])
  {
a2e80f6f0   Pavel Begunkov   blk-mq: Embed cou...
118
  	struct mq_inflight mi = { .part = part };
bf0ddaba6   Omar Sandoval   blk-mq: fix sysfs...
119

bb4e6b149   Pavel Begunkov   blk-mq: Reuse cal...
120
  	blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
a2e80f6f0   Pavel Begunkov   blk-mq: Embed cou...
121
122
  	inflight[0] = mi.inflight[0];
  	inflight[1] = mi.inflight[1];
bf0ddaba6   Omar Sandoval   blk-mq: fix sysfs...
123
  }
1671d522c   Ming Lei   block: rename blk...
124
  void blk_freeze_queue_start(struct request_queue *q)
43a5e4e21   Ming Lei   block: blk-mq: su...
125
  {
7996a8b55   Bob Liu   blk-mq: fix hang ...
126
127
  	mutex_lock(&q->mq_freeze_lock);
  	if (++q->mq_freeze_depth == 1) {
3ef28e83a   Dan Williams   block: generic re...
128
  		percpu_ref_kill(&q->q_usage_counter);
7996a8b55   Bob Liu   blk-mq: fix hang ...
129
  		mutex_unlock(&q->mq_freeze_lock);
344e9ffcb   Jens Axboe   block: add queue_...
130
  		if (queue_is_mq(q))
055f6e18e   Ming Lei   block: Make q_usa...
131
  			blk_mq_run_hw_queues(q, false);
7996a8b55   Bob Liu   blk-mq: fix hang ...
132
133
  	} else {
  		mutex_unlock(&q->mq_freeze_lock);
cddd5d176   Tejun Heo   blk-mq: blk_mq_fr...
134
  	}
f3af020b9   Tejun Heo   blk-mq: make mq_q...
135
  }
1671d522c   Ming Lei   block: rename blk...
136
  EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
f3af020b9   Tejun Heo   blk-mq: make mq_q...
137

6bae363ee   Keith Busch   blk-mq: Export bl...
138
  void blk_mq_freeze_queue_wait(struct request_queue *q)
f3af020b9   Tejun Heo   blk-mq: make mq_q...
139
  {
3ef28e83a   Dan Williams   block: generic re...
140
  	wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
43a5e4e21   Ming Lei   block: blk-mq: su...
141
  }
6bae363ee   Keith Busch   blk-mq: Export bl...
142
  EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait);
43a5e4e21   Ming Lei   block: blk-mq: su...
143

f91328c40   Keith Busch   blk-mq: Provide f...
144
145
146
147
148
149
150
151
  int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
  				     unsigned long timeout)
  {
  	return wait_event_timeout(q->mq_freeze_wq,
  					percpu_ref_is_zero(&q->q_usage_counter),
  					timeout);
  }
  EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout);
43a5e4e21   Ming Lei   block: blk-mq: su...
152

f3af020b9   Tejun Heo   blk-mq: make mq_q...
153
154
155
156
  /*
   * Guarantee no request is in use, so we can change any data structure of
   * the queue afterward.
   */
3ef28e83a   Dan Williams   block: generic re...
157
  void blk_freeze_queue(struct request_queue *q)
f3af020b9   Tejun Heo   blk-mq: make mq_q...
158
  {
3ef28e83a   Dan Williams   block: generic re...
159
160
161
162
163
164
165
  	/*
  	 * In the !blk_mq case we are only calling this to kill the
  	 * q_usage_counter, otherwise this increases the freeze depth
  	 * and waits for it to return to zero.  For this reason there is
  	 * no blk_unfreeze_queue(), and blk_freeze_queue() is not
  	 * exported to drivers as the only user for unfreeze is blk_mq.
  	 */
1671d522c   Ming Lei   block: rename blk...
166
  	blk_freeze_queue_start(q);
f3af020b9   Tejun Heo   blk-mq: make mq_q...
167
168
  	blk_mq_freeze_queue_wait(q);
  }
3ef28e83a   Dan Williams   block: generic re...
169
170
171
172
173
174
175
176
177
  
  void blk_mq_freeze_queue(struct request_queue *q)
  {
  	/*
  	 * ...just an alias to keep freeze and unfreeze actions balanced
  	 * in the blk_mq_* namespace
  	 */
  	blk_freeze_queue(q);
  }
c761d96b0   Jens Axboe   blk-mq: export bl...
178
  EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
f3af020b9   Tejun Heo   blk-mq: make mq_q...
179

b4c6a0287   Keith Busch   blk-mq: Export fr...
180
  void blk_mq_unfreeze_queue(struct request_queue *q)
320ae51fe   Jens Axboe   blk-mq: new multi...
181
  {
7996a8b55   Bob Liu   blk-mq: fix hang ...
182
183
184
185
  	mutex_lock(&q->mq_freeze_lock);
  	q->mq_freeze_depth--;
  	WARN_ON_ONCE(q->mq_freeze_depth < 0);
  	if (!q->mq_freeze_depth) {
bdd631609   Bart Van Assche   block: Allow unfr...
186
  		percpu_ref_resurrect(&q->q_usage_counter);
320ae51fe   Jens Axboe   blk-mq: new multi...
187
  		wake_up_all(&q->mq_freeze_wq);
add703fda   Tejun Heo   blk-mq: use percp...
188
  	}
7996a8b55   Bob Liu   blk-mq: fix hang ...
189
  	mutex_unlock(&q->mq_freeze_lock);
320ae51fe   Jens Axboe   blk-mq: new multi...
190
  }
b4c6a0287   Keith Busch   blk-mq: Export fr...
191
  EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
320ae51fe   Jens Axboe   blk-mq: new multi...
192

852ec8098   Bart Van Assche   blk-mq: Make it s...
193
194
195
196
197
198
  /*
   * FIXME: replace the scsi_internal_device_*block_nowait() calls in the
   * mpt3sas driver such that this function can be removed.
   */
  void blk_mq_quiesce_queue_nowait(struct request_queue *q)
  {
8814ce8a0   Bart Van Assche   block: Introduce ...
199
  	blk_queue_flag_set(QUEUE_FLAG_QUIESCED, q);
852ec8098   Bart Van Assche   blk-mq: Make it s...
200
201
  }
  EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
6a83e74d2   Bart Van Assche   blk-mq: Introduce...
202
  /**
69e07c4ad   Ming Lei   blk-mq: update co...
203
   * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
6a83e74d2   Bart Van Assche   blk-mq: Introduce...
204
205
206
   * @q: request queue.
   *
   * Note: this function does not prevent that the struct request end_io()
69e07c4ad   Ming Lei   blk-mq: update co...
207
208
209
   * callback function is invoked. Once this function is returned, we make
   * sure no dispatch can happen until the queue is unquiesced via
   * blk_mq_unquiesce_queue().
6a83e74d2   Bart Van Assche   blk-mq: Introduce...
210
211
212
213
214
215
   */
  void blk_mq_quiesce_queue(struct request_queue *q)
  {
  	struct blk_mq_hw_ctx *hctx;
  	unsigned int i;
  	bool rcu = false;
1d9e9bc6b   Ming Lei   blk-mq: don't sto...
216
  	blk_mq_quiesce_queue_nowait(q);
f4560ffe8   Ming Lei   blk-mq: use QUEUE...
217

6a83e74d2   Bart Van Assche   blk-mq: Introduce...
218
219
  	queue_for_each_hw_ctx(q, hctx, i) {
  		if (hctx->flags & BLK_MQ_F_BLOCKING)
05707b64a   Tejun Heo   blk-mq: rename bl...
220
  			synchronize_srcu(hctx->srcu);
6a83e74d2   Bart Van Assche   blk-mq: Introduce...
221
222
223
224
225
226
227
  		else
  			rcu = true;
  	}
  	if (rcu)
  		synchronize_rcu();
  }
  EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
e4e739131   Ming Lei   blk-mq: introduce...
228
229
230
231
232
233
234
235
236
  /*
   * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
   * @q: request queue.
   *
   * This function recovers queue into the state before quiescing
   * which is done by blk_mq_quiesce_queue.
   */
  void blk_mq_unquiesce_queue(struct request_queue *q)
  {
8814ce8a0   Bart Van Assche   block: Introduce ...
237
  	blk_queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
f4560ffe8   Ming Lei   blk-mq: use QUEUE...
238

1d9e9bc6b   Ming Lei   blk-mq: don't sto...
239
240
  	/* dispatch requests which are inserted during quiescing */
  	blk_mq_run_hw_queues(q, true);
e4e739131   Ming Lei   blk-mq: introduce...
241
242
  }
  EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
aed3ea94b   Jens Axboe   block: wake up wa...
243
244
245
246
247
248
249
250
251
  void blk_mq_wake_waiters(struct request_queue *q)
  {
  	struct blk_mq_hw_ctx *hctx;
  	unsigned int i;
  
  	queue_for_each_hw_ctx(q, hctx, i)
  		if (blk_mq_hw_queue_mapped(hctx))
  			blk_mq_tag_wakeup_all(hctx->tags, true);
  }
fe1f45264   Jens Axboe   blk-mq: don't cal...
252
  /*
9a91b05bb   Hou Tao   block: also check...
253
254
   * Only need start/end time stamping if we have iostat or
   * blk stats enabled, or using an IO scheduler.
fe1f45264   Jens Axboe   blk-mq: don't cal...
255
256
257
   */
  static inline bool blk_mq_need_time_stamp(struct request *rq)
  {
9a91b05bb   Hou Tao   block: also check...
258
  	return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS)) || rq->q->elevator;
fe1f45264   Jens Axboe   blk-mq: don't cal...
259
  }
e4cdf1a1c   Christoph Hellwig   blk-mq: remove __...
260
  static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
7ea4d8a4d   Christoph Hellwig   blk-mq: move more...
261
  		unsigned int tag, u64 alloc_time_ns)
320ae51fe   Jens Axboe   blk-mq: new multi...
262
  {
e4cdf1a1c   Christoph Hellwig   blk-mq: remove __...
263
264
  	struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
  	struct request *rq = tags->static_rqs[tag];
c3a148d20   Bart Van Assche   blk-mq: Initializ...
265

42fdc5e49   Christoph Hellwig   blk-mq: remove th...
266
  	if (data->q->elevator) {
766473681   Christoph Hellwig   blk-mq: use BLK_M...
267
  		rq->tag = BLK_MQ_NO_TAG;
e4cdf1a1c   Christoph Hellwig   blk-mq: remove __...
268
269
  		rq->internal_tag = tag;
  	} else {
e4cdf1a1c   Christoph Hellwig   blk-mq: remove __...
270
  		rq->tag = tag;
766473681   Christoph Hellwig   blk-mq: use BLK_M...
271
  		rq->internal_tag = BLK_MQ_NO_TAG;
e4cdf1a1c   Christoph Hellwig   blk-mq: remove __...
272
  	}
af76e555e   Christoph Hellwig   blk-mq: initializ...
273
  	/* csd/requeue_work/fifo_time is initialized before use */
e4cdf1a1c   Christoph Hellwig   blk-mq: remove __...
274
275
  	rq->q = data->q;
  	rq->mq_ctx = data->ctx;
ea4f995ee   Jens Axboe   blk-mq: cache req...
276
  	rq->mq_hctx = data->hctx;
568f27006   Ming Lei   blk-mq: centralis...
277
  	rq->rq_flags = 0;
7ea4d8a4d   Christoph Hellwig   blk-mq: move more...
278
  	rq->cmd_flags = data->cmd_flags;
8ed46b329   Bart Van Assche   scsi: block: Intr...
279
280
  	if (data->flags & BLK_MQ_REQ_PM)
  		rq->rq_flags |= RQF_PM;
e4cdf1a1c   Christoph Hellwig   blk-mq: remove __...
281
  	if (blk_queue_io_stat(data->q))
e80640213   Christoph Hellwig   block: split out ...
282
  		rq->rq_flags |= RQF_IO_STAT;
7c3fb70f0   Jens Axboe   block: rearrange ...
283
  	INIT_LIST_HEAD(&rq->queuelist);
af76e555e   Christoph Hellwig   blk-mq: initializ...
284
285
  	INIT_HLIST_NODE(&rq->hash);
  	RB_CLEAR_NODE(&rq->rb_node);
af76e555e   Christoph Hellwig   blk-mq: initializ...
286
287
  	rq->rq_disk = NULL;
  	rq->part = NULL;
6f816b4b7   Tejun Heo   blk-mq: add optio...
288
289
290
  #ifdef CONFIG_BLK_RQ_ALLOC_TIME
  	rq->alloc_time_ns = alloc_time_ns;
  #endif
fe1f45264   Jens Axboe   blk-mq: don't cal...
291
292
293
294
  	if (blk_mq_need_time_stamp(rq))
  		rq->start_time_ns = ktime_get_ns();
  	else
  		rq->start_time_ns = 0;
544ccc8dc   Omar Sandoval   block: get rid of...
295
  	rq->io_start_time_ns = 0;
3d2443069   Hou Tao   block: make rq se...
296
  	rq->stats_sectors = 0;
af76e555e   Christoph Hellwig   blk-mq: initializ...
297
298
299
300
  	rq->nr_phys_segments = 0;
  #if defined(CONFIG_BLK_DEV_INTEGRITY)
  	rq->nr_integrity_segments = 0;
  #endif
a892c8d52   Satya Tangirala   block: Inline enc...
301
  	blk_crypto_rq_set_defaults(rq);
af76e555e   Christoph Hellwig   blk-mq: initializ...
302
  	/* tag was already set */
079076b34   Christoph Hellwig   block: remove dea...
303
  	WRITE_ONCE(rq->deadline, 0);
af76e555e   Christoph Hellwig   blk-mq: initializ...
304

f6be4fb4b   Jens Axboe   blk-mq: ->timeout...
305
  	rq->timeout = 0;
af76e555e   Christoph Hellwig   blk-mq: initializ...
306
307
  	rq->end_io = NULL;
  	rq->end_io_data = NULL;
af76e555e   Christoph Hellwig   blk-mq: initializ...
308

7ea4d8a4d   Christoph Hellwig   blk-mq: move more...
309
  	data->ctx->rq_dispatched[op_is_sync(data->cmd_flags)]++;
12f5b9314   Keith Busch   blk-mq: Remove ge...
310
  	refcount_set(&rq->ref, 1);
7ea4d8a4d   Christoph Hellwig   blk-mq: move more...
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
  
  	if (!op_is_flush(data->cmd_flags)) {
  		struct elevator_queue *e = data->q->elevator;
  
  		rq->elv.icq = NULL;
  		if (e && e->type->ops.prepare_request) {
  			if (e->type->icq_cache)
  				blk_mq_sched_assign_ioc(rq);
  
  			e->type->ops.prepare_request(rq);
  			rq->rq_flags |= RQF_ELVPRIV;
  		}
  	}
  
  	data->hctx->queued++;
e4cdf1a1c   Christoph Hellwig   blk-mq: remove __...
326
  	return rq;
5dee85772   Christoph Hellwig   blk-mq: initializ...
327
  }
e6e7abffe   Christoph Hellwig   blk-mq: simplify ...
328
  static struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data)
d2c0d3832   Christoph Hellwig   blk-mq: move blk_...
329
  {
e6e7abffe   Christoph Hellwig   blk-mq: simplify ...
330
  	struct request_queue *q = data->q;
d2c0d3832   Christoph Hellwig   blk-mq: move blk_...
331
  	struct elevator_queue *e = q->elevator;
6f816b4b7   Tejun Heo   blk-mq: add optio...
332
  	u64 alloc_time_ns = 0;
600c3b0ce   Christoph Hellwig   blk-mq: open code...
333
  	unsigned int tag;
d2c0d3832   Christoph Hellwig   blk-mq: move blk_...
334

6f816b4b7   Tejun Heo   blk-mq: add optio...
335
336
337
  	/* alloc_time includes depth and tag waits */
  	if (blk_queue_rq_alloc_time(q))
  		alloc_time_ns = ktime_get_ns();
f9afca4d3   Jens Axboe   blk-mq: pass in r...
338
  	if (data->cmd_flags & REQ_NOWAIT)
03a07c92a   Goldwyn Rodrigues   block: return on ...
339
  		data->flags |= BLK_MQ_REQ_NOWAIT;
d2c0d3832   Christoph Hellwig   blk-mq: move blk_...
340
341
  
  	if (e) {
d2c0d3832   Christoph Hellwig   blk-mq: move blk_...
342
343
  		/*
  		 * Flush requests are special and go directly to the
17a511993   Jens Axboe   blk-mq: don't cal...
344
345
  		 * dispatch list. Don't include reserved tags in the
  		 * limiting, as it isn't useful.
d2c0d3832   Christoph Hellwig   blk-mq: move blk_...
346
  		 */
f9afca4d3   Jens Axboe   blk-mq: pass in r...
347
348
  		if (!op_is_flush(data->cmd_flags) &&
  		    e->type->ops.limit_depth &&
17a511993   Jens Axboe   blk-mq: don't cal...
349
  		    !(data->flags & BLK_MQ_REQ_RESERVED))
f9afca4d3   Jens Axboe   blk-mq: pass in r...
350
  			e->type->ops.limit_depth(data->cmd_flags, data);
d2c0d3832   Christoph Hellwig   blk-mq: move blk_...
351
  	}
bf0beec06   Ming Lei   blk-mq: drain I/O...
352
  retry:
600c3b0ce   Christoph Hellwig   blk-mq: open code...
353
354
  	data->ctx = blk_mq_get_ctx(q);
  	data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
42fdc5e49   Christoph Hellwig   blk-mq: remove th...
355
  	if (!e)
600c3b0ce   Christoph Hellwig   blk-mq: open code...
356
  		blk_mq_tag_busy(data->hctx);
bf0beec06   Ming Lei   blk-mq: drain I/O...
357
358
359
360
361
  	/*
  	 * Waiting allocations only fail because of an inactive hctx.  In that
  	 * case just retry the hctx assignment and tag allocation as CPU hotplug
  	 * should have migrated us to an online CPU by now.
  	 */
e4cdf1a1c   Christoph Hellwig   blk-mq: remove __...
362
  	tag = blk_mq_get_tag(data);
bf0beec06   Ming Lei   blk-mq: drain I/O...
363
364
365
366
367
368
369
  	if (tag == BLK_MQ_NO_TAG) {
  		if (data->flags & BLK_MQ_REQ_NOWAIT)
  			return NULL;
  
  		/*
  		 * Give up the CPU and sleep for a random short time to ensure
  		 * that thread using a realtime scheduling class are migrated
70f15a4fd   Randy Dunlap   block: blk-mq: de...
370
  		 * off the CPU, and thus off the hctx that is going away.
bf0beec06   Ming Lei   blk-mq: drain I/O...
371
372
373
374
  		 */
  		msleep(3);
  		goto retry;
  	}
7ea4d8a4d   Christoph Hellwig   blk-mq: move more...
375
  	return blk_mq_rq_ctx_init(data, tag, alloc_time_ns);
d2c0d3832   Christoph Hellwig   blk-mq: move blk_...
376
  }
cd6ce1482   Bart Van Assche   block: Make reque...
377
  struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
9a95e4ef7   Bart Van Assche   block, nvme: Intr...
378
  		blk_mq_req_flags_t flags)
320ae51fe   Jens Axboe   blk-mq: new multi...
379
  {
e6e7abffe   Christoph Hellwig   blk-mq: simplify ...
380
381
382
383
384
  	struct blk_mq_alloc_data data = {
  		.q		= q,
  		.flags		= flags,
  		.cmd_flags	= op,
  	};
bd166ef18   Jens Axboe   blk-mq-sched: add...
385
  	struct request *rq;
a492f0754   Joe Lawrence   block,scsi: fixup...
386
  	int ret;
320ae51fe   Jens Axboe   blk-mq: new multi...
387

3a0a52997   Bart Van Assche   block, scsi: Make...
388
  	ret = blk_queue_enter(q, flags);
a492f0754   Joe Lawrence   block,scsi: fixup...
389
390
  	if (ret)
  		return ERR_PTR(ret);
320ae51fe   Jens Axboe   blk-mq: new multi...
391

e6e7abffe   Christoph Hellwig   blk-mq: simplify ...
392
  	rq = __blk_mq_alloc_request(&data);
bd166ef18   Jens Axboe   blk-mq-sched: add...
393
  	if (!rq)
a5ea58110   Christoph Hellwig   blk-mq: move the ...
394
  		goto out_queue_exit;
0c4de0f33   Christoph Hellwig   block: ensure bio...
395
396
397
  	rq->__data_len = 0;
  	rq->__sector = (sector_t) -1;
  	rq->bio = rq->biotail = NULL;
320ae51fe   Jens Axboe   blk-mq: new multi...
398
  	return rq;
a5ea58110   Christoph Hellwig   blk-mq: move the ...
399
400
401
  out_queue_exit:
  	blk_queue_exit(q);
  	return ERR_PTR(-EWOULDBLOCK);
320ae51fe   Jens Axboe   blk-mq: new multi...
402
  }
4bb659b15   Jens Axboe   blk-mq: implement...
403
  EXPORT_SYMBOL(blk_mq_alloc_request);
320ae51fe   Jens Axboe   blk-mq: new multi...
404

cd6ce1482   Bart Van Assche   block: Make reque...
405
  struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
9a95e4ef7   Bart Van Assche   block, nvme: Intr...
406
  	unsigned int op, blk_mq_req_flags_t flags, unsigned int hctx_idx)
1f5bd336b   Ming Lin   blk-mq: add blk_m...
407
  {
e6e7abffe   Christoph Hellwig   blk-mq: simplify ...
408
409
410
411
412
  	struct blk_mq_alloc_data data = {
  		.q		= q,
  		.flags		= flags,
  		.cmd_flags	= op,
  	};
600c3b0ce   Christoph Hellwig   blk-mq: open code...
413
  	u64 alloc_time_ns = 0;
6d2809d51   Omar Sandoval   blk-mq: make blk_...
414
  	unsigned int cpu;
600c3b0ce   Christoph Hellwig   blk-mq: open code...
415
  	unsigned int tag;
1f5bd336b   Ming Lin   blk-mq: add blk_m...
416
  	int ret;
600c3b0ce   Christoph Hellwig   blk-mq: open code...
417
418
419
  	/* alloc_time includes depth and tag waits */
  	if (blk_queue_rq_alloc_time(q))
  		alloc_time_ns = ktime_get_ns();
1f5bd336b   Ming Lin   blk-mq: add blk_m...
420
421
422
423
424
425
  	/*
  	 * If the tag allocator sleeps we could get an allocation for a
  	 * different hardware context.  No need to complicate the low level
  	 * allocator for this for the rare use case of a command tied to
  	 * a specific queue.
  	 */
600c3b0ce   Christoph Hellwig   blk-mq: open code...
426
  	if (WARN_ON_ONCE(!(flags & (BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED))))
1f5bd336b   Ming Lin   blk-mq: add blk_m...
427
428
429
430
  		return ERR_PTR(-EINVAL);
  
  	if (hctx_idx >= q->nr_hw_queues)
  		return ERR_PTR(-EIO);
3a0a52997   Bart Van Assche   block, scsi: Make...
431
  	ret = blk_queue_enter(q, flags);
1f5bd336b   Ming Lin   blk-mq: add blk_m...
432
433
  	if (ret)
  		return ERR_PTR(ret);
c8712c6a6   Christoph Hellwig   blk-mq: skip unma...
434
435
436
437
  	/*
  	 * Check if the hardware context is actually mapped to anything.
  	 * If not tell the caller that it should skip this queue.
  	 */
a5ea58110   Christoph Hellwig   blk-mq: move the ...
438
  	ret = -EXDEV;
e6e7abffe   Christoph Hellwig   blk-mq: simplify ...
439
440
  	data.hctx = q->queue_hw_ctx[hctx_idx];
  	if (!blk_mq_hw_queue_mapped(data.hctx))
a5ea58110   Christoph Hellwig   blk-mq: move the ...
441
  		goto out_queue_exit;
e6e7abffe   Christoph Hellwig   blk-mq: simplify ...
442
443
  	cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
  	data.ctx = __blk_mq_get_ctx(q, cpu);
1f5bd336b   Ming Lin   blk-mq: add blk_m...
444

42fdc5e49   Christoph Hellwig   blk-mq: remove th...
445
  	if (!q->elevator)
600c3b0ce   Christoph Hellwig   blk-mq: open code...
446
  		blk_mq_tag_busy(data.hctx);
a5ea58110   Christoph Hellwig   blk-mq: move the ...
447
  	ret = -EWOULDBLOCK;
600c3b0ce   Christoph Hellwig   blk-mq: open code...
448
449
  	tag = blk_mq_get_tag(&data);
  	if (tag == BLK_MQ_NO_TAG)
a5ea58110   Christoph Hellwig   blk-mq: move the ...
450
  		goto out_queue_exit;
600c3b0ce   Christoph Hellwig   blk-mq: open code...
451
  	return blk_mq_rq_ctx_init(&data, tag, alloc_time_ns);
a5ea58110   Christoph Hellwig   blk-mq: move the ...
452
453
454
  out_queue_exit:
  	blk_queue_exit(q);
  	return ERR_PTR(ret);
1f5bd336b   Ming Lin   blk-mq: add blk_m...
455
456
  }
  EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
12f5b9314   Keith Busch   blk-mq: Remove ge...
457
458
459
460
  static void __blk_mq_free_request(struct request *rq)
  {
  	struct request_queue *q = rq->q;
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
ea4f995ee   Jens Axboe   blk-mq: cache req...
461
  	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
12f5b9314   Keith Busch   blk-mq: Remove ge...
462
  	const int sched_tag = rq->internal_tag;
a892c8d52   Satya Tangirala   block: Inline enc...
463
  	blk_crypto_free_request(rq);
986d413b7   Bart Van Assche   blk-mq: Enable su...
464
  	blk_pm_mark_last_busy(rq);
ea4f995ee   Jens Axboe   blk-mq: cache req...
465
  	rq->mq_hctx = NULL;
766473681   Christoph Hellwig   blk-mq: use BLK_M...
466
  	if (rq->tag != BLK_MQ_NO_TAG)
cae740a04   John Garry   blk-mq: Remove so...
467
  		blk_mq_put_tag(hctx->tags, ctx, rq->tag);
766473681   Christoph Hellwig   blk-mq: use BLK_M...
468
  	if (sched_tag != BLK_MQ_NO_TAG)
cae740a04   John Garry   blk-mq: Remove so...
469
  		blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag);
12f5b9314   Keith Busch   blk-mq: Remove ge...
470
471
472
  	blk_mq_sched_restart(hctx);
  	blk_queue_exit(q);
  }
6af54051a   Christoph Hellwig   blk-mq: simplify ...
473
  void blk_mq_free_request(struct request *rq)
320ae51fe   Jens Axboe   blk-mq: new multi...
474
  {
320ae51fe   Jens Axboe   blk-mq: new multi...
475
  	struct request_queue *q = rq->q;
6af54051a   Christoph Hellwig   blk-mq: simplify ...
476
477
  	struct elevator_queue *e = q->elevator;
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
ea4f995ee   Jens Axboe   blk-mq: cache req...
478
  	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
6af54051a   Christoph Hellwig   blk-mq: simplify ...
479

5bbf4e5a8   Christoph Hellwig   blk-mq-sched: uni...
480
  	if (rq->rq_flags & RQF_ELVPRIV) {
f9cd4bfe9   Jens Axboe   block: get rid of...
481
482
  		if (e && e->type->ops.finish_request)
  			e->type->ops.finish_request(rq);
6af54051a   Christoph Hellwig   blk-mq: simplify ...
483
484
485
486
487
  		if (rq->elv.icq) {
  			put_io_context(rq->elv.icq->ioc);
  			rq->elv.icq = NULL;
  		}
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
488

6af54051a   Christoph Hellwig   blk-mq: simplify ...
489
  	ctx->rq_completed[rq_is_sync(rq)]++;
e80640213   Christoph Hellwig   block: split out ...
490
  	if (rq->rq_flags & RQF_MQ_INFLIGHT)
bccf5e26d   John Garry   blk-mq: Record nr...
491
  		__blk_mq_dec_active_requests(hctx);
87760e5ee   Jens Axboe   block: hook up wr...
492

7beb2f845   Jens Axboe   blk-mq: wire up c...
493
494
  	if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
  		laptop_io_completion(q->backing_dev_info);
a79050434   Josef Bacik   blk-rq-qos: refac...
495
  	rq_qos_done(q, rq);
0d2602ca3   Jens Axboe   blk-mq: improve s...
496

12f5b9314   Keith Busch   blk-mq: Remove ge...
497
498
499
  	WRITE_ONCE(rq->state, MQ_RQ_IDLE);
  	if (refcount_dec_and_test(&rq->ref))
  		__blk_mq_free_request(rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
500
  }
1a3b595a2   Jens Axboe   blk-mq: export bl...
501
  EXPORT_SYMBOL_GPL(blk_mq_free_request);
320ae51fe   Jens Axboe   blk-mq: new multi...
502

2a842acab   Christoph Hellwig   block: introduce ...
503
  inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
320ae51fe   Jens Axboe   blk-mq: new multi...
504
  {
fe1f45264   Jens Axboe   blk-mq: don't cal...
505
506
507
508
  	u64 now = 0;
  
  	if (blk_mq_need_time_stamp(rq))
  		now = ktime_get_ns();
522a77756   Omar Sandoval   block: consolidat...
509

4bc6339a5   Omar Sandoval   block: move blk_s...
510
511
  	if (rq->rq_flags & RQF_STATS) {
  		blk_mq_poll_stats_start(rq->q);
522a77756   Omar Sandoval   block: consolidat...
512
  		blk_stat_add(rq, now);
4bc6339a5   Omar Sandoval   block: move blk_s...
513
  	}
87890092e   Baolin Wang   blk-mq: remove re...
514
  	blk_mq_sched_completed_request(rq, now);
ed88660a5   Omar Sandoval   block: move call ...
515

522a77756   Omar Sandoval   block: consolidat...
516
  	blk_account_io_done(rq, now);
0d11e6aca   Ming Lei   blk-mq: fix use-a...
517

91b63639c   Christoph Hellwig   blk-mq: bidi support
518
  	if (rq->end_io) {
a79050434   Josef Bacik   blk-rq-qos: refac...
519
  		rq_qos_done(rq->q, rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
520
  		rq->end_io(rq, error);
91b63639c   Christoph Hellwig   blk-mq: bidi support
521
  	} else {
320ae51fe   Jens Axboe   blk-mq: new multi...
522
  		blk_mq_free_request(rq);
91b63639c   Christoph Hellwig   blk-mq: bidi support
523
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
524
  }
c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
525
  EXPORT_SYMBOL(__blk_mq_end_request);
63151a449   Christoph Hellwig   blk-mq: allow dri...
526

2a842acab   Christoph Hellwig   block: introduce ...
527
  void blk_mq_end_request(struct request *rq, blk_status_t error)
63151a449   Christoph Hellwig   blk-mq: allow dri...
528
529
530
  {
  	if (blk_update_request(rq, error, blk_rq_bytes(rq)))
  		BUG();
c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
531
  	__blk_mq_end_request(rq, error);
63151a449   Christoph Hellwig   blk-mq: allow dri...
532
  }
c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
533
  EXPORT_SYMBOL(blk_mq_end_request);
320ae51fe   Jens Axboe   blk-mq: new multi...
534

c3077b5d9   Christoph Hellwig   blk-mq: merge blk...
535
536
537
538
539
  /*
   * Softirq action handler - move entries to local list and loop over them
   * while passing them to the queue registered handler.
   */
  static __latent_entropy void blk_done_softirq(struct softirq_action *h)
320ae51fe   Jens Axboe   blk-mq: new multi...
540
  {
c3077b5d9   Christoph Hellwig   blk-mq: merge blk...
541
  	struct list_head *cpu_list, local_list;
320ae51fe   Jens Axboe   blk-mq: new multi...
542

c3077b5d9   Christoph Hellwig   blk-mq: merge blk...
543
544
545
546
547
548
549
550
551
552
553
554
  	local_irq_disable();
  	cpu_list = this_cpu_ptr(&blk_cpu_done);
  	list_replace_init(cpu_list, &local_list);
  	local_irq_enable();
  
  	while (!list_empty(&local_list)) {
  		struct request *rq;
  
  		rq = list_entry(local_list.next, struct request, ipi_list);
  		list_del_init(&rq->ipi_list);
  		rq->q->mq_ops->complete(rq);
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
555
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
556

115243f55   Christoph Hellwig   blk-mq: factor ou...
557
  static void blk_mq_trigger_softirq(struct request *rq)
320ae51fe   Jens Axboe   blk-mq: new multi...
558
  {
d391a7a39   Christoph Hellwig   blk-mq: merge the...
559
560
  	struct list_head *list;
  	unsigned long flags;
c3077b5d9   Christoph Hellwig   blk-mq: merge blk...
561

d391a7a39   Christoph Hellwig   blk-mq: merge the...
562
563
  	local_irq_save(flags);
  	list = this_cpu_ptr(&blk_cpu_done);
c3077b5d9   Christoph Hellwig   blk-mq: merge blk...
564
  	list_add_tail(&rq->ipi_list, list);
115243f55   Christoph Hellwig   blk-mq: factor ou...
565
566
567
568
569
  	/*
  	 * If the list only contains our just added request, signal a raise of
  	 * the softirq.  If there are already entries there, someone already
  	 * raised the irq but it hasn't run yet.
  	 */
c3077b5d9   Christoph Hellwig   blk-mq: merge blk...
570
571
  	if (list->next == &rq->ipi_list)
  		raise_softirq_irqoff(BLOCK_SOFTIRQ);
d391a7a39   Christoph Hellwig   blk-mq: merge the...
572
  	local_irq_restore(flags);
115243f55   Christoph Hellwig   blk-mq: factor ou...
573
  }
c3077b5d9   Christoph Hellwig   blk-mq: merge blk...
574
575
576
577
578
579
580
581
582
583
584
585
586
587
  static int blk_softirq_cpu_dead(unsigned int cpu)
  {
  	/*
  	 * If a CPU goes away, splice its entries to the current CPU
  	 * and trigger a run of the softirq
  	 */
  	local_irq_disable();
  	list_splice_init(&per_cpu(blk_cpu_done, cpu),
  			 this_cpu_ptr(&blk_cpu_done));
  	raise_softirq_irqoff(BLOCK_SOFTIRQ);
  	local_irq_enable();
  
  	return 0;
  }
40d09b53b   Christoph Hellwig   blk-mq: add a new...
588
589
  
  static void __blk_mq_complete_request_remote(void *data)
c3077b5d9   Christoph Hellwig   blk-mq: merge blk...
590
  {
40d09b53b   Christoph Hellwig   blk-mq: add a new...
591
  	struct request *rq = data;
320ae51fe   Jens Axboe   blk-mq: new multi...
592

36e765392   Ming Lei   blk-mq: complete ...
593
  	/*
d391a7a39   Christoph Hellwig   blk-mq: merge the...
594
595
596
597
  	 * For most of single queue controllers, there is only one irq vector
  	 * for handling I/O completion, and the only irq's affinity is set
  	 * to all possible CPUs.  On most of ARCHs, this affinity means the irq
  	 * is handled on one specific CPU.
36e765392   Ming Lei   blk-mq: complete ...
598
  	 *
d391a7a39   Christoph Hellwig   blk-mq: merge the...
599
600
  	 * So complete I/O requests in softirq context in case of single queue
  	 * devices to avoid degrading I/O performance due to irqsoff latency.
36e765392   Ming Lei   blk-mq: complete ...
601
  	 */
d391a7a39   Christoph Hellwig   blk-mq: merge the...
602
603
604
605
  	if (rq->q->nr_hw_queues == 1)
  		blk_mq_trigger_softirq(rq);
  	else
  		rq->q->mq_ops->complete(rq);
c3077b5d9   Christoph Hellwig   blk-mq: merge blk...
606
  }
963395269   Christoph Hellwig   blk-mq: factor ou...
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
  static inline bool blk_mq_complete_need_ipi(struct request *rq)
  {
  	int cpu = raw_smp_processor_id();
  
  	if (!IS_ENABLED(CONFIG_SMP) ||
  	    !test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags))
  		return false;
  
  	/* same CPU or cache domain?  Complete locally */
  	if (cpu == rq->mq_ctx->cpu ||
  	    (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags) &&
  	     cpus_share_cache(cpu, rq->mq_ctx->cpu)))
  		return false;
  
  	/* don't try to IPI to an offline CPU */
  	return cpu_online(rq->mq_ctx->cpu);
  }
40d09b53b   Christoph Hellwig   blk-mq: add a new...
624
  bool blk_mq_complete_request_remote(struct request *rq)
320ae51fe   Jens Axboe   blk-mq: new multi...
625
  {
af78ff7c6   Keith Busch   blk-mq: Simplify ...
626
  	WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
36e765392   Ming Lei   blk-mq: complete ...
627

4ab32bf33   Jens Axboe   blk-mq: never red...
628
629
630
631
  	/*
  	 * For a polled request, always complete locallly, it's pointless
  	 * to redirect the completion.
  	 */
40d09b53b   Christoph Hellwig   blk-mq: add a new...
632
633
  	if (rq->cmd_flags & REQ_HIPRI)
  		return false;
385352016   Christoph Hellwig   blk-mq: respect r...
634

963395269   Christoph Hellwig   blk-mq: factor ou...
635
  	if (blk_mq_complete_need_ipi(rq)) {
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
636
  		rq->csd.func = __blk_mq_complete_request_remote;
3d6efbf62   Christoph Hellwig   blk-mq: use __smp...
637
638
  		rq->csd.info = rq;
  		rq->csd.flags = 0;
963395269   Christoph Hellwig   blk-mq: factor ou...
639
  		smp_call_function_single_async(rq->mq_ctx->cpu, &rq->csd);
3d6efbf62   Christoph Hellwig   blk-mq: use __smp...
640
  	} else {
40d09b53b   Christoph Hellwig   blk-mq: add a new...
641
642
643
  		if (rq->q->nr_hw_queues > 1)
  			return false;
  		blk_mq_trigger_softirq(rq);
3d6efbf62   Christoph Hellwig   blk-mq: use __smp...
644
  	}
40d09b53b   Christoph Hellwig   blk-mq: add a new...
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
  
  	return true;
  }
  EXPORT_SYMBOL_GPL(blk_mq_complete_request_remote);
  
  /**
   * blk_mq_complete_request - end I/O on a request
   * @rq:		the request being processed
   *
   * Description:
   *	Complete a request by scheduling the ->complete_rq operation.
   **/
  void blk_mq_complete_request(struct request *rq)
  {
  	if (!blk_mq_complete_request_remote(rq))
  		rq->q->mq_ops->complete(rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
661
  }
15f73f5b3   Christoph Hellwig   blk-mq: move fail...
662
  EXPORT_SYMBOL(blk_mq_complete_request);
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
663

04ced159c   Jens Axboe   blk-mq: move hctx...
664
  static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx)
b7435db8b   Bart Van Assche   blk-mq: Add locki...
665
  	__releases(hctx->srcu)
04ced159c   Jens Axboe   blk-mq: move hctx...
666
667
668
669
  {
  	if (!(hctx->flags & BLK_MQ_F_BLOCKING))
  		rcu_read_unlock();
  	else
05707b64a   Tejun Heo   blk-mq: rename bl...
670
  		srcu_read_unlock(hctx->srcu, srcu_idx);
04ced159c   Jens Axboe   blk-mq: move hctx...
671
672
673
  }
  
  static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx)
b7435db8b   Bart Van Assche   blk-mq: Add locki...
674
  	__acquires(hctx->srcu)
04ced159c   Jens Axboe   blk-mq: move hctx...
675
  {
08b5a6e2a   Jens Axboe   blk-mq: silence f...
676
677
678
  	if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
  		/* shut up gcc false positive */
  		*srcu_idx = 0;
04ced159c   Jens Axboe   blk-mq: move hctx...
679
  		rcu_read_lock();
08b5a6e2a   Jens Axboe   blk-mq: silence f...
680
  	} else
05707b64a   Tejun Heo   blk-mq: rename bl...
681
  		*srcu_idx = srcu_read_lock(hctx->srcu);
04ced159c   Jens Axboe   blk-mq: move hctx...
682
  }
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
683
  /**
105663f73   André Almeida   blk-mq: Document ...
684
685
686
687
688
689
690
   * blk_mq_start_request - Start processing a request
   * @rq: Pointer to request to be started
   *
   * Function used by device drivers to notify the block layer that a request
   * is going to be processed now, so blk layer can do proper initializations
   * such as starting the timeout timer.
   */
e2490073c   Christoph Hellwig   blk-mq: call blk_...
691
  void blk_mq_start_request(struct request *rq)
320ae51fe   Jens Axboe   blk-mq: new multi...
692
693
694
695
  {
  	struct request_queue *q = rq->q;
  
  	trace_block_rq_issue(q, rq);
cf43e6be8   Jens Axboe   block: add scalab...
696
  	if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
544ccc8dc   Omar Sandoval   block: get rid of...
697
  		rq->io_start_time_ns = ktime_get_ns();
3d2443069   Hou Tao   block: make rq se...
698
  		rq->stats_sectors = blk_rq_sectors(rq);
cf43e6be8   Jens Axboe   block: add scalab...
699
  		rq->rq_flags |= RQF_STATS;
a79050434   Josef Bacik   blk-rq-qos: refac...
700
  		rq_qos_issue(q, rq);
cf43e6be8   Jens Axboe   block: add scalab...
701
  	}
1d9bd5161   Tejun Heo   blk-mq: replace t...
702
  	WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);
538b75341   Jens Axboe   blk-mq: request d...
703

1d9bd5161   Tejun Heo   blk-mq: replace t...
704
  	blk_add_timer(rq);
12f5b9314   Keith Busch   blk-mq: Remove ge...
705
  	WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT);
49f5baa51   Christoph Hellwig   blk-mq: pair blk_...
706

54d4e6ab9   Max Gurtovoy   block: centralize...
707
708
709
710
  #ifdef CONFIG_BLK_DEV_INTEGRITY
  	if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE)
  		q->integrity.profile->prepare_fn(rq);
  #endif
320ae51fe   Jens Axboe   blk-mq: new multi...
711
  }
e2490073c   Christoph Hellwig   blk-mq: call blk_...
712
  EXPORT_SYMBOL(blk_mq_start_request);
320ae51fe   Jens Axboe   blk-mq: new multi...
713

ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
714
  static void __blk_mq_requeue_request(struct request *rq)
320ae51fe   Jens Axboe   blk-mq: new multi...
715
716
  {
  	struct request_queue *q = rq->q;
923218f61   Ming Lei   blk-mq: don't all...
717
  	blk_mq_put_driver_tag(rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
718
  	trace_block_rq_requeue(q, rq);
a79050434   Josef Bacik   blk-rq-qos: refac...
719
  	rq_qos_requeue(q, rq);
49f5baa51   Christoph Hellwig   blk-mq: pair blk_...
720

12f5b9314   Keith Busch   blk-mq: Remove ge...
721
722
  	if (blk_mq_request_started(rq)) {
  		WRITE_ONCE(rq->state, MQ_RQ_IDLE);
da6612673   Christoph Hellwig   blk-mq: don't tim...
723
  		rq->rq_flags &= ~RQF_TIMED_OUT;
e2490073c   Christoph Hellwig   blk-mq: call blk_...
724
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
725
  }
2b053aca7   Bart Van Assche   blk-mq: Add a kic...
726
  void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
727
  {
ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
728
  	__blk_mq_requeue_request(rq);
ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
729

105976f51   Ming Lei   blk-mq: don't cal...
730
731
  	/* this request will be re-inserted to io scheduler queue */
  	blk_mq_sched_requeue_request(rq);
7d692330e   Jens Axboe   block: get rid of...
732
  	BUG_ON(!list_empty(&rq->queuelist));
2b053aca7   Bart Van Assche   blk-mq: Add a kic...
733
  	blk_mq_add_to_requeue_list(rq, true, kick_requeue_list);
ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
734
735
  }
  EXPORT_SYMBOL(blk_mq_requeue_request);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
736
737
738
  static void blk_mq_requeue_work(struct work_struct *work)
  {
  	struct request_queue *q =
2849450ad   Mike Snitzer   blk-mq: introduce...
739
  		container_of(work, struct request_queue, requeue_work.work);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
740
741
  	LIST_HEAD(rq_list);
  	struct request *rq, *next;
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
742

18e9781d4   Jens Axboe   blk-mq: blk_mq_re...
743
  	spin_lock_irq(&q->requeue_lock);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
744
  	list_splice_init(&q->requeue_list, &rq_list);
18e9781d4   Jens Axboe   blk-mq: blk_mq_re...
745
  	spin_unlock_irq(&q->requeue_lock);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
746
747
  
  	list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
aef1897cd   Jianchao Wang   blk-mq: insert rq...
748
  		if (!(rq->rq_flags & (RQF_SOFTBARRIER | RQF_DONTPREP)))
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
749
  			continue;
e80640213   Christoph Hellwig   block: split out ...
750
  		rq->rq_flags &= ~RQF_SOFTBARRIER;
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
751
  		list_del_init(&rq->queuelist);
aef1897cd   Jianchao Wang   blk-mq: insert rq...
752
753
754
755
756
757
  		/*
  		 * If RQF_DONTPREP, rq has contained some driver specific
  		 * data, so insert it to hctx dispatch list to avoid any
  		 * merge.
  		 */
  		if (rq->rq_flags & RQF_DONTPREP)
01e99aeca   Ming Lei   blk-mq: insert pa...
758
  			blk_mq_request_bypass_insert(rq, false, false);
aef1897cd   Jianchao Wang   blk-mq: insert rq...
759
760
  		else
  			blk_mq_sched_insert_request(rq, true, false, false);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
761
762
763
764
765
  	}
  
  	while (!list_empty(&rq_list)) {
  		rq = list_entry(rq_list.next, struct request, queuelist);
  		list_del_init(&rq->queuelist);
9e97d2951   Mike Snitzer   blk-mq-sched: rem...
766
  		blk_mq_sched_insert_request(rq, false, false, false);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
767
  	}
52d7f1b5c   Bart Van Assche   blk-mq: Avoid tha...
768
  	blk_mq_run_hw_queues(q, false);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
769
  }
2b053aca7   Bart Van Assche   blk-mq: Add a kic...
770
771
  void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
  				bool kick_requeue_list)
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
772
773
774
775
776
777
  {
  	struct request_queue *q = rq->q;
  	unsigned long flags;
  
  	/*
  	 * We abuse this flag that is otherwise used by the I/O scheduler to
ff821d271   Jens Axboe   blk-mq: fixup som...
778
  	 * request head insertion from the workqueue.
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
779
  	 */
e80640213   Christoph Hellwig   block: split out ...
780
  	BUG_ON(rq->rq_flags & RQF_SOFTBARRIER);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
781
782
783
  
  	spin_lock_irqsave(&q->requeue_lock, flags);
  	if (at_head) {
e80640213   Christoph Hellwig   block: split out ...
784
  		rq->rq_flags |= RQF_SOFTBARRIER;
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
785
786
787
788
789
  		list_add(&rq->queuelist, &q->requeue_list);
  	} else {
  		list_add_tail(&rq->queuelist, &q->requeue_list);
  	}
  	spin_unlock_irqrestore(&q->requeue_lock, flags);
2b053aca7   Bart Van Assche   blk-mq: Add a kic...
790
791
792
  
  	if (kick_requeue_list)
  		blk_mq_kick_requeue_list(q);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
793
  }
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
794
795
796
  
  void blk_mq_kick_requeue_list(struct request_queue *q)
  {
ae943d206   Bart Van Assche   blk-mq: Avoid tha...
797
  	kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, 0);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
798
799
  }
  EXPORT_SYMBOL(blk_mq_kick_requeue_list);
2849450ad   Mike Snitzer   blk-mq: introduce...
800
801
802
  void blk_mq_delay_kick_requeue_list(struct request_queue *q,
  				    unsigned long msecs)
  {
d4acf3650   Bart Van Assche   block: Make blk_m...
803
804
  	kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work,
  				    msecs_to_jiffies(msecs));
2849450ad   Mike Snitzer   blk-mq: introduce...
805
806
  }
  EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
0e62f51f8   Jens Axboe   blk-mq: let blk_m...
807
808
  struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
  {
88c7b2b75   Jens Axboe   blk-mq: prefetch ...
809
810
  	if (tag < tags->nr_tags) {
  		prefetch(tags->rqs[tag]);
4ee86babe   Hannes Reinecke   blk-mq: add bound...
811
  		return tags->rqs[tag];
88c7b2b75   Jens Axboe   blk-mq: prefetch ...
812
  	}
4ee86babe   Hannes Reinecke   blk-mq: add bound...
813
814
  
  	return NULL;
24d2f9030   Christoph Hellwig   blk-mq: split out...
815
816
  }
  EXPORT_SYMBOL(blk_mq_tag_to_rq);
3c94d83cb   Jens Axboe   blk-mq: change bl...
817
818
  static bool blk_mq_rq_inflight(struct blk_mq_hw_ctx *hctx, struct request *rq,
  			       void *priv, bool reserved)
ae8799125   Jens Axboe   blk-mq: provide a...
819
820
  {
  	/*
05a4fed69   Ming Lei   blk-mq: consider ...
821
  	 * If we find a request that isn't idle and the queue matches,
3c94d83cb   Jens Axboe   blk-mq: change bl...
822
  	 * we know the queue is busy. Return false to stop the iteration.
ae8799125   Jens Axboe   blk-mq: provide a...
823
  	 */
05a4fed69   Ming Lei   blk-mq: consider ...
824
  	if (blk_mq_request_started(rq) && rq->q == hctx->queue) {
ae8799125   Jens Axboe   blk-mq: provide a...
825
826
827
828
829
830
831
832
  		bool *busy = priv;
  
  		*busy = true;
  		return false;
  	}
  
  	return true;
  }
3c94d83cb   Jens Axboe   blk-mq: change bl...
833
  bool blk_mq_queue_inflight(struct request_queue *q)
ae8799125   Jens Axboe   blk-mq: provide a...
834
835
  {
  	bool busy = false;
3c94d83cb   Jens Axboe   blk-mq: change bl...
836
  	blk_mq_queue_tag_busy_iter(q, blk_mq_rq_inflight, &busy);
ae8799125   Jens Axboe   blk-mq: provide a...
837
838
  	return busy;
  }
3c94d83cb   Jens Axboe   blk-mq: change bl...
839
  EXPORT_SYMBOL_GPL(blk_mq_queue_inflight);
ae8799125   Jens Axboe   blk-mq: provide a...
840

358f70da4   Tejun Heo   blk-mq: make blk_...
841
  static void blk_mq_rq_timed_out(struct request *req, bool reserved)
320ae51fe   Jens Axboe   blk-mq: new multi...
842
  {
da6612673   Christoph Hellwig   blk-mq: don't tim...
843
  	req->rq_flags |= RQF_TIMED_OUT;
d1210d5af   Christoph Hellwig   blk-mq: simplify ...
844
845
846
847
848
849
850
  	if (req->q->mq_ops->timeout) {
  		enum blk_eh_timer_return ret;
  
  		ret = req->q->mq_ops->timeout(req, reserved);
  		if (ret == BLK_EH_DONE)
  			return;
  		WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER);
46f92d42e   Christoph Hellwig   blk-mq: unshared ...
851
  	}
d1210d5af   Christoph Hellwig   blk-mq: simplify ...
852
853
  
  	blk_add_timer(req);
87ee7b112   Jens Axboe   blk-mq: fix race ...
854
  }
5b3f25fc3   Keith Busch   blk-mq: Allow req...
855

12f5b9314   Keith Busch   blk-mq: Remove ge...
856
  static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
81481eb42   Christoph Hellwig   blk-mq: fix and s...
857
  {
12f5b9314   Keith Busch   blk-mq: Remove ge...
858
  	unsigned long deadline;
87ee7b112   Jens Axboe   blk-mq: fix race ...
859

12f5b9314   Keith Busch   blk-mq: Remove ge...
860
861
  	if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT)
  		return false;
da6612673   Christoph Hellwig   blk-mq: don't tim...
862
863
  	if (rq->rq_flags & RQF_TIMED_OUT)
  		return false;
a7af0af32   Peter Zijlstra   blk-mq: attempt t...
864

079076b34   Christoph Hellwig   block: remove dea...
865
  	deadline = READ_ONCE(rq->deadline);
12f5b9314   Keith Busch   blk-mq: Remove ge...
866
867
  	if (time_after_eq(jiffies, deadline))
  		return true;
a7af0af32   Peter Zijlstra   blk-mq: attempt t...
868

12f5b9314   Keith Busch   blk-mq: Remove ge...
869
870
871
872
873
  	if (*next == 0)
  		*next = deadline;
  	else if (time_after(*next, deadline))
  		*next = deadline;
  	return false;
87ee7b112   Jens Axboe   blk-mq: fix race ...
874
  }
7baa85727   Jens Axboe   blk-mq-tag: chang...
875
  static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
1d9bd5161   Tejun Heo   blk-mq: replace t...
876
877
  		struct request *rq, void *priv, bool reserved)
  {
12f5b9314   Keith Busch   blk-mq: Remove ge...
878
879
880
881
882
883
884
  	unsigned long *next = priv;
  
  	/*
  	 * Just do a quick check if it is expired before locking the request in
  	 * so we're not unnecessarilly synchronizing across CPUs.
  	 */
  	if (!blk_mq_req_expired(rq, next))
7baa85727   Jens Axboe   blk-mq-tag: chang...
885
  		return true;
12f5b9314   Keith Busch   blk-mq: Remove ge...
886
887
888
889
890
891
892
893
894
895
896
  
  	/*
  	 * We have reason to believe the request may be expired. Take a
  	 * reference on the request to lock this request lifetime into its
  	 * currently allocated context to prevent it from being reallocated in
  	 * the event the completion by-passes this timeout handler.
  	 *
  	 * If the reference was already released, then the driver beat the
  	 * timeout handler to posting a natural completion.
  	 */
  	if (!refcount_inc_not_zero(&rq->ref))
7baa85727   Jens Axboe   blk-mq-tag: chang...
897
  		return true;
12f5b9314   Keith Busch   blk-mq: Remove ge...
898

1d9bd5161   Tejun Heo   blk-mq: replace t...
899
  	/*
12f5b9314   Keith Busch   blk-mq: Remove ge...
900
901
902
903
  	 * The request is now locked and cannot be reallocated underneath the
  	 * timeout handler's processing. Re-verify this exact request is truly
  	 * expired; if it is not expired, then the request was completed and
  	 * reallocated as a new request.
1d9bd5161   Tejun Heo   blk-mq: replace t...
904
  	 */
12f5b9314   Keith Busch   blk-mq: Remove ge...
905
  	if (blk_mq_req_expired(rq, next))
1d9bd5161   Tejun Heo   blk-mq: replace t...
906
  		blk_mq_rq_timed_out(rq, reserved);
8d6996630   Yufen Yu   block: fix null p...
907
908
909
910
  
  	if (is_flush_rq(rq, hctx))
  		rq->end_io(rq, 0);
  	else if (refcount_dec_and_test(&rq->ref))
12f5b9314   Keith Busch   blk-mq: Remove ge...
911
  		__blk_mq_free_request(rq);
7baa85727   Jens Axboe   blk-mq-tag: chang...
912
913
  
  	return true;
1d9bd5161   Tejun Heo   blk-mq: replace t...
914
  }
287922eb0   Christoph Hellwig   block: defer time...
915
  static void blk_mq_timeout_work(struct work_struct *work)
320ae51fe   Jens Axboe   blk-mq: new multi...
916
  {
287922eb0   Christoph Hellwig   block: defer time...
917
918
  	struct request_queue *q =
  		container_of(work, struct request_queue, timeout_work);
12f5b9314   Keith Busch   blk-mq: Remove ge...
919
  	unsigned long next = 0;
1d9bd5161   Tejun Heo   blk-mq: replace t...
920
  	struct blk_mq_hw_ctx *hctx;
81481eb42   Christoph Hellwig   blk-mq: fix and s...
921
  	int i;
320ae51fe   Jens Axboe   blk-mq: new multi...
922

71f79fb31   Gabriel Krisman Bertazi   blk-mq: Allow tim...
923
924
925
926
927
928
929
930
931
  	/* A deadlock might occur if a request is stuck requiring a
  	 * timeout at the same time a queue freeze is waiting
  	 * completion, since the timeout code would not be able to
  	 * acquire the queue reference here.
  	 *
  	 * That's why we don't use blk_queue_enter here; instead, we use
  	 * percpu_ref_tryget directly, because we need to be able to
  	 * obtain a reference even in the short window between the queue
  	 * starting to freeze, by dropping the first reference in
1671d522c   Ming Lei   block: rename blk...
932
  	 * blk_freeze_queue_start, and the moment the last request is
71f79fb31   Gabriel Krisman Bertazi   blk-mq: Allow tim...
933
934
935
936
  	 * consumed, marked by the instant q_usage_counter reaches
  	 * zero.
  	 */
  	if (!percpu_ref_tryget(&q->q_usage_counter))
287922eb0   Christoph Hellwig   block: defer time...
937
  		return;
12f5b9314   Keith Busch   blk-mq: Remove ge...
938
  	blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &next);
320ae51fe   Jens Axboe   blk-mq: new multi...
939

12f5b9314   Keith Busch   blk-mq: Remove ge...
940
941
  	if (next != 0) {
  		mod_timer(&q->timeout, next);
0d2602ca3   Jens Axboe   blk-mq: improve s...
942
  	} else {
fcd36c36f   Bart Van Assche   blk-mq: Explain w...
943
944
945
946
947
948
  		/*
  		 * Request timeouts are handled as a forward rolling timer. If
  		 * we end up here it means that no requests are pending and
  		 * also that no request has been pending for a while. Mark
  		 * each hctx as idle.
  		 */
f054b56c9   Ming Lei   blk-mq: fix race ...
949
950
951
952
953
  		queue_for_each_hw_ctx(q, hctx, i) {
  			/* the hctx may be unmapped, so check it here */
  			if (blk_mq_hw_queue_mapped(hctx))
  				blk_mq_tag_idle(hctx);
  		}
0d2602ca3   Jens Axboe   blk-mq: improve s...
954
  	}
287922eb0   Christoph Hellwig   block: defer time...
955
  	blk_queue_exit(q);
320ae51fe   Jens Axboe   blk-mq: new multi...
956
  }
88459642c   Omar Sandoval   blk-mq: abstract ...
957
958
959
960
961
962
963
964
965
966
  struct flush_busy_ctx_data {
  	struct blk_mq_hw_ctx *hctx;
  	struct list_head *list;
  };
  
  static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
  {
  	struct flush_busy_ctx_data *flush_data = data;
  	struct blk_mq_hw_ctx *hctx = flush_data->hctx;
  	struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
967
  	enum hctx_type type = hctx->type;
88459642c   Omar Sandoval   blk-mq: abstract ...
968

88459642c   Omar Sandoval   blk-mq: abstract ...
969
  	spin_lock(&ctx->lock);
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
970
  	list_splice_tail_init(&ctx->rq_lists[type], flush_data->list);
e9a99a638   Omar Sandoval   block: clear ctx ...
971
  	sbitmap_clear_bit(sb, bitnr);
88459642c   Omar Sandoval   blk-mq: abstract ...
972
973
974
  	spin_unlock(&ctx->lock);
  	return true;
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
975
  /*
1429d7c94   Jens Axboe   blk-mq: switch ct...
976
977
978
   * Process software queues that have been marked busy, splicing them
   * to the for-dispatch
   */
2c3ad6679   Jens Axboe   blk-mq: export so...
979
  void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
1429d7c94   Jens Axboe   blk-mq: switch ct...
980
  {
88459642c   Omar Sandoval   blk-mq: abstract ...
981
982
983
984
  	struct flush_busy_ctx_data data = {
  		.hctx = hctx,
  		.list = list,
  	};
1429d7c94   Jens Axboe   blk-mq: switch ct...
985

88459642c   Omar Sandoval   blk-mq: abstract ...
986
  	sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data);
1429d7c94   Jens Axboe   blk-mq: switch ct...
987
  }
2c3ad6679   Jens Axboe   blk-mq: export so...
988
  EXPORT_SYMBOL_GPL(blk_mq_flush_busy_ctxs);
1429d7c94   Jens Axboe   blk-mq: switch ct...
989

b347689ff   Ming Lei   blk-mq-sched: imp...
990
991
992
993
994
995
996
997
998
999
1000
  struct dispatch_rq_data {
  	struct blk_mq_hw_ctx *hctx;
  	struct request *rq;
  };
  
  static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr,
  		void *data)
  {
  	struct dispatch_rq_data *dispatch_data = data;
  	struct blk_mq_hw_ctx *hctx = dispatch_data->hctx;
  	struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
1001
  	enum hctx_type type = hctx->type;
b347689ff   Ming Lei   blk-mq-sched: imp...
1002
1003
  
  	spin_lock(&ctx->lock);
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
1004
1005
  	if (!list_empty(&ctx->rq_lists[type])) {
  		dispatch_data->rq = list_entry_rq(ctx->rq_lists[type].next);
b347689ff   Ming Lei   blk-mq-sched: imp...
1006
  		list_del_init(&dispatch_data->rq->queuelist);
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
1007
  		if (list_empty(&ctx->rq_lists[type]))
b347689ff   Ming Lei   blk-mq-sched: imp...
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
  			sbitmap_clear_bit(sb, bitnr);
  	}
  	spin_unlock(&ctx->lock);
  
  	return !dispatch_data->rq;
  }
  
  struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
  					struct blk_mq_ctx *start)
  {
f31967f0e   Jens Axboe   blk-mq: allow sof...
1018
  	unsigned off = start ? start->index_hw[hctx->type] : 0;
b347689ff   Ming Lei   blk-mq-sched: imp...
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
  	struct dispatch_rq_data data = {
  		.hctx = hctx,
  		.rq   = NULL,
  	};
  
  	__sbitmap_for_each_set(&hctx->ctx_map, off,
  			       dispatch_rq_from_ctx, &data);
  
  	return data.rq;
  }
703fd1c0f   Jens Axboe   blk-mq: account h...
1029
1030
1031
1032
  static inline unsigned int queued_to_index(unsigned int queued)
  {
  	if (!queued)
  		return 0;
1429d7c94   Jens Axboe   blk-mq: switch ct...
1033

703fd1c0f   Jens Axboe   blk-mq: account h...
1034
  	return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
1429d7c94   Jens Axboe   blk-mq: switch ct...
1035
  }
570e9b73b   Ming Lei   blk-mq: move blk_...
1036
1037
  static bool __blk_mq_get_driver_tag(struct request *rq)
  {
222a5ae03   John Garry   blk-mq: Use point...
1038
  	struct sbitmap_queue *bt = rq->mq_hctx->tags->bitmap_tags;
570e9b73b   Ming Lei   blk-mq: move blk_...
1039
  	unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;
570e9b73b   Ming Lei   blk-mq: move blk_...
1040
  	int tag;
568f27006   Ming Lei   blk-mq: centralis...
1041
  	blk_mq_tag_busy(rq->mq_hctx);
570e9b73b   Ming Lei   blk-mq: move blk_...
1042
  	if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) {
222a5ae03   John Garry   blk-mq: Use point...
1043
  		bt = rq->mq_hctx->tags->breserved_tags;
570e9b73b   Ming Lei   blk-mq: move blk_...
1044
  		tag_offset = 0;
285008501   Ming Lei   blk-mq: always al...
1045
1046
1047
  	} else {
  		if (!hctx_may_queue(rq->mq_hctx, bt))
  			return false;
570e9b73b   Ming Lei   blk-mq: move blk_...
1048
  	}
570e9b73b   Ming Lei   blk-mq: move blk_...
1049
1050
1051
1052
1053
  	tag = __sbitmap_queue_get(bt);
  	if (tag == BLK_MQ_NO_TAG)
  		return false;
  
  	rq->tag = tag + tag_offset;
570e9b73b   Ming Lei   blk-mq: move blk_...
1054
1055
1056
1057
1058
  	return true;
  }
  
  static bool blk_mq_get_driver_tag(struct request *rq)
  {
568f27006   Ming Lei   blk-mq: centralis...
1059
1060
1061
1062
  	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
  
  	if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_get_driver_tag(rq))
  		return false;
51db1c37e   Ming Lei   blk-mq: Rename BL...
1063
  	if ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) &&
568f27006   Ming Lei   blk-mq: centralis...
1064
1065
  			!(rq->rq_flags & RQF_MQ_INFLIGHT)) {
  		rq->rq_flags |= RQF_MQ_INFLIGHT;
bccf5e26d   John Garry   blk-mq: Record nr...
1066
  		__blk_mq_inc_active_requests(hctx);
568f27006   Ming Lei   blk-mq: centralis...
1067
1068
1069
  	}
  	hctx->tags->rqs[rq->tag] = rq;
  	return true;
570e9b73b   Ming Lei   blk-mq: move blk_...
1070
  }
eb619fdb2   Jens Axboe   blk-mq: fix issue...
1071
1072
  static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
  				int flags, void *key)
da55f2cc7   Omar Sandoval   blk-mq: use sbq w...
1073
1074
1075
1076
  {
  	struct blk_mq_hw_ctx *hctx;
  
  	hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
5815839b3   Ming Lei   blk-mq: introduce...
1077
  	spin_lock(&hctx->dispatch_wait_lock);
e86185754   Jens Axboe   blk-mq: fix sbitm...
1078
1079
1080
1081
  	if (!list_empty(&wait->entry)) {
  		struct sbitmap_queue *sbq;
  
  		list_del_init(&wait->entry);
222a5ae03   John Garry   blk-mq: Use point...
1082
  		sbq = hctx->tags->bitmap_tags;
e86185754   Jens Axboe   blk-mq: fix sbitm...
1083
1084
  		atomic_dec(&sbq->ws_active);
  	}
5815839b3   Ming Lei   blk-mq: introduce...
1085
  	spin_unlock(&hctx->dispatch_wait_lock);
da55f2cc7   Omar Sandoval   blk-mq: use sbq w...
1086
1087
1088
  	blk_mq_run_hw_queue(hctx, true);
  	return 1;
  }
f906a6a0f   Jens Axboe   blk-mq: improve t...
1089
1090
  /*
   * Mark us waiting for a tag. For shared tags, this involves hooking us into
ee3e4de52   Bart Van Assche   blk-mq: Fix spell...
1091
1092
   * the tag wakeups. For non-shared tags, we can simply mark us needing a
   * restart. For both cases, take care to check the condition again after
f906a6a0f   Jens Axboe   blk-mq: improve t...
1093
1094
   * marking us as waiting.
   */
2278d69f0   Ming Lei   blk-mq: don't pas...
1095
  static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
f906a6a0f   Jens Axboe   blk-mq: improve t...
1096
  				 struct request *rq)
da55f2cc7   Omar Sandoval   blk-mq: use sbq w...
1097
  {
222a5ae03   John Garry   blk-mq: Use point...
1098
  	struct sbitmap_queue *sbq = hctx->tags->bitmap_tags;
5815839b3   Ming Lei   blk-mq: introduce...
1099
  	struct wait_queue_head *wq;
f906a6a0f   Jens Axboe   blk-mq: improve t...
1100
1101
  	wait_queue_entry_t *wait;
  	bool ret;
da55f2cc7   Omar Sandoval   blk-mq: use sbq w...
1102

51db1c37e   Ming Lei   blk-mq: Rename BL...
1103
  	if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) {
684b73245   Yufen Yu   blk-mq: use blk_m...
1104
  		blk_mq_sched_mark_restart_hctx(hctx);
f906a6a0f   Jens Axboe   blk-mq: improve t...
1105

c27d53fb4   Bart Van Assche   blk-mq: Reduce th...
1106
1107
1108
1109
1110
1111
1112
1113
  		/*
  		 * It's possible that a tag was freed in the window between the
  		 * allocation failure and adding the hardware queue to the wait
  		 * queue.
  		 *
  		 * Don't clear RESTART here, someone else could have set it.
  		 * At most this will cost an extra queue run.
  		 */
8ab6bb9ee   Ming Lei   blk-mq: cleanup b...
1114
  		return blk_mq_get_driver_tag(rq);
eb619fdb2   Jens Axboe   blk-mq: fix issue...
1115
  	}
eb619fdb2   Jens Axboe   blk-mq: fix issue...
1116

2278d69f0   Ming Lei   blk-mq: don't pas...
1117
  	wait = &hctx->dispatch_wait;
c27d53fb4   Bart Van Assche   blk-mq: Reduce th...
1118
1119
  	if (!list_empty_careful(&wait->entry))
  		return false;
e86185754   Jens Axboe   blk-mq: fix sbitm...
1120
  	wq = &bt_wait_ptr(sbq, hctx)->wait;
5815839b3   Ming Lei   blk-mq: introduce...
1121
1122
1123
  
  	spin_lock_irq(&wq->lock);
  	spin_lock(&hctx->dispatch_wait_lock);
c27d53fb4   Bart Van Assche   blk-mq: Reduce th...
1124
  	if (!list_empty(&wait->entry)) {
5815839b3   Ming Lei   blk-mq: introduce...
1125
1126
  		spin_unlock(&hctx->dispatch_wait_lock);
  		spin_unlock_irq(&wq->lock);
c27d53fb4   Bart Van Assche   blk-mq: Reduce th...
1127
  		return false;
eb619fdb2   Jens Axboe   blk-mq: fix issue...
1128
  	}
e86185754   Jens Axboe   blk-mq: fix sbitm...
1129
  	atomic_inc(&sbq->ws_active);
5815839b3   Ming Lei   blk-mq: introduce...
1130
1131
  	wait->flags &= ~WQ_FLAG_EXCLUSIVE;
  	__add_wait_queue(wq, wait);
c27d53fb4   Bart Van Assche   blk-mq: Reduce th...
1132

da55f2cc7   Omar Sandoval   blk-mq: use sbq w...
1133
  	/*
eb619fdb2   Jens Axboe   blk-mq: fix issue...
1134
1135
1136
  	 * It's possible that a tag was freed in the window between the
  	 * allocation failure and adding the hardware queue to the wait
  	 * queue.
da55f2cc7   Omar Sandoval   blk-mq: use sbq w...
1137
  	 */
8ab6bb9ee   Ming Lei   blk-mq: cleanup b...
1138
  	ret = blk_mq_get_driver_tag(rq);
c27d53fb4   Bart Van Assche   blk-mq: Reduce th...
1139
  	if (!ret) {
5815839b3   Ming Lei   blk-mq: introduce...
1140
1141
  		spin_unlock(&hctx->dispatch_wait_lock);
  		spin_unlock_irq(&wq->lock);
c27d53fb4   Bart Van Assche   blk-mq: Reduce th...
1142
  		return false;
eb619fdb2   Jens Axboe   blk-mq: fix issue...
1143
  	}
c27d53fb4   Bart Van Assche   blk-mq: Reduce th...
1144
1145
1146
1147
1148
  
  	/*
  	 * We got a tag, remove ourselves from the wait queue to ensure
  	 * someone else gets the wakeup.
  	 */
c27d53fb4   Bart Van Assche   blk-mq: Reduce th...
1149
  	list_del_init(&wait->entry);
e86185754   Jens Axboe   blk-mq: fix sbitm...
1150
  	atomic_dec(&sbq->ws_active);
5815839b3   Ming Lei   blk-mq: introduce...
1151
1152
  	spin_unlock(&hctx->dispatch_wait_lock);
  	spin_unlock_irq(&wq->lock);
c27d53fb4   Bart Van Assche   blk-mq: Reduce th...
1153
1154
  
  	return true;
da55f2cc7   Omar Sandoval   blk-mq: use sbq w...
1155
  }
6e7687173   Ming Lei   blk-mq: dequeue r...
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
  #define BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT  8
  #define BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR  4
  /*
   * Update dispatch busy with the Exponential Weighted Moving Average(EWMA):
   * - EWMA is one simple way to compute running average value
   * - weight(7/8 and 1/8) is applied so that it can decrease exponentially
   * - take 4 as factor for avoiding to get too small(0) result, and this
   *   factor doesn't matter because EWMA decreases exponentially
   */
  static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
  {
  	unsigned int ewma;
  
  	if (hctx->queue->elevator)
  		return;
  
  	ewma = hctx->dispatch_busy;
  
  	if (!ewma && !busy)
  		return;
  
  	ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1;
  	if (busy)
  		ewma += 1 << BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR;
  	ewma /= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT;
  
  	hctx->dispatch_busy = ewma;
  }
86ff7c2a8   Ming Lei   blk-mq: introduce...
1184
  #define BLK_MQ_RESOURCE_DELAY	3		/* ms units */
c92a41031   Johannes Thumshirn   block: factor out...
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
  static void blk_mq_handle_dev_resource(struct request *rq,
  				       struct list_head *list)
  {
  	struct request *next =
  		list_first_entry_or_null(list, struct request, queuelist);
  
  	/*
  	 * If an I/O scheduler has been configured and we got a driver tag for
  	 * the next request already, free it.
  	 */
  	if (next)
  		blk_mq_put_driver_tag(next);
  
  	list_add(&rq->queuelist, list);
  	__blk_mq_requeue_request(rq);
  }
0512a75b9   Keith Busch   block: Introduce ...
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
  static void blk_mq_handle_zone_resource(struct request *rq,
  					struct list_head *zone_list)
  {
  	/*
  	 * If we end up here it is because we cannot dispatch a request to a
  	 * specific zone due to LLD level zone-write locking or other zone
  	 * related resource not being available. In this case, set the request
  	 * aside in zone_list for retrying it later.
  	 */
  	list_add(&rq->queuelist, zone_list);
  	__blk_mq_requeue_request(rq);
  }
753835245   Ming Lei   blk-mq: move gett...
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
  enum prep_dispatch {
  	PREP_DISPATCH_OK,
  	PREP_DISPATCH_NO_TAG,
  	PREP_DISPATCH_NO_BUDGET,
  };
  
  static enum prep_dispatch blk_mq_prep_dispatch_rq(struct request *rq,
  						  bool need_budget)
  {
  	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
  
  	if (need_budget && !blk_mq_get_dispatch_budget(rq->q)) {
  		blk_mq_put_driver_tag(rq);
  		return PREP_DISPATCH_NO_BUDGET;
  	}
  
  	if (!blk_mq_get_driver_tag(rq)) {
  		/*
  		 * The initial allocation attempt failed, so we need to
  		 * rerun the hardware queue when a tag is freed. The
  		 * waitqueue takes care of that. If the queue is run
  		 * before we add this entry back on the dispatch list,
  		 * we'll re-run it below.
  		 */
  		if (!blk_mq_mark_tag_wait(hctx, rq)) {
1fd40b5ea   Ming Lei   blk-mq: pass obta...
1238
1239
1240
1241
1242
1243
  			/*
  			 * All budgets not got from this function will be put
  			 * together during handling partial dispatch
  			 */
  			if (need_budget)
  				blk_mq_put_dispatch_budget(rq->q);
753835245   Ming Lei   blk-mq: move gett...
1244
1245
1246
1247
1248
1249
  			return PREP_DISPATCH_NO_TAG;
  		}
  	}
  
  	return PREP_DISPATCH_OK;
  }
1fd40b5ea   Ming Lei   blk-mq: pass obta...
1250
1251
1252
1253
1254
1255
1256
1257
1258
  /* release all allocated budgets before calling to blk_mq_dispatch_rq_list */
  static void blk_mq_release_budgets(struct request_queue *q,
  		unsigned int nr_budgets)
  {
  	int i;
  
  	for (i = 0; i < nr_budgets; i++)
  		blk_mq_put_dispatch_budget(q);
  }
1f57f8d44   Jens Axboe   blk-mq: don't que...
1259
1260
1261
  /*
   * Returns true if we did some work AND can potentially do more.
   */
445874e89   Ming Lei   blk-mq: pass hctx...
1262
  bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
1fd40b5ea   Ming Lei   blk-mq: pass obta...
1263
  			     unsigned int nr_budgets)
320ae51fe   Jens Axboe   blk-mq: new multi...
1264
  {
753835245   Ming Lei   blk-mq: move gett...
1265
  	enum prep_dispatch prep;
445874e89   Ming Lei   blk-mq: pass hctx...
1266
  	struct request_queue *q = hctx->queue;
6d6f167ce   Jianchao Wang   blk-mq: put the d...
1267
  	struct request *rq, *nxt;
fc17b6534   Christoph Hellwig   blk-mq: switch ->...
1268
  	int errors, queued;
86ff7c2a8   Ming Lei   blk-mq: introduce...
1269
  	blk_status_t ret = BLK_STS_OK;
0512a75b9   Keith Busch   block: Introduce ...
1270
  	LIST_HEAD(zone_list);
320ae51fe   Jens Axboe   blk-mq: new multi...
1271

81380ca10   Omar Sandoval   blk-mq: use the r...
1272
1273
  	if (list_empty(list))
  		return false;
320ae51fe   Jens Axboe   blk-mq: new multi...
1274
  	/*
320ae51fe   Jens Axboe   blk-mq: new multi...
1275
1276
  	 * Now process all the entries, sending them to the driver.
  	 */
93efe9817   Jens Axboe   blk-mq: include e...
1277
  	errors = queued = 0;
81380ca10   Omar Sandoval   blk-mq: use the r...
1278
  	do {
74c450521   Jens Axboe   blk-mq: add a 'li...
1279
  		struct blk_mq_queue_data bd;
320ae51fe   Jens Axboe   blk-mq: new multi...
1280

f04c3df3e   Jens Axboe   blk-mq: abstract ...
1281
  		rq = list_first_entry(list, struct request, queuelist);
0bca799b9   Ming Lei   blk-mq: order get...
1282

445874e89   Ming Lei   blk-mq: pass hctx...
1283
  		WARN_ON_ONCE(hctx != rq->mq_hctx);
1fd40b5ea   Ming Lei   blk-mq: pass obta...
1284
  		prep = blk_mq_prep_dispatch_rq(rq, !nr_budgets);
753835245   Ming Lei   blk-mq: move gett...
1285
  		if (prep != PREP_DISPATCH_OK)
0bca799b9   Ming Lei   blk-mq: order get...
1286
  			break;
de1482974   Ming Lei   blk-mq: introduce...
1287

320ae51fe   Jens Axboe   blk-mq: new multi...
1288
  		list_del_init(&rq->queuelist);
320ae51fe   Jens Axboe   blk-mq: new multi...
1289

74c450521   Jens Axboe   blk-mq: add a 'li...
1290
  		bd.rq = rq;
113285b47   Jens Axboe   blk-mq: ensure th...
1291
1292
1293
1294
1295
1296
1297
1298
  
  		/*
  		 * Flag last if we have no more requests, or if we have more
  		 * but can't assign a driver tag to it.
  		 */
  		if (list_empty(list))
  			bd.last = true;
  		else {
113285b47   Jens Axboe   blk-mq: ensure th...
1299
  			nxt = list_first_entry(list, struct request, queuelist);
8ab6bb9ee   Ming Lei   blk-mq: cleanup b...
1300
  			bd.last = !blk_mq_get_driver_tag(nxt);
113285b47   Jens Axboe   blk-mq: ensure th...
1301
  		}
74c450521   Jens Axboe   blk-mq: add a 'li...
1302

1fd40b5ea   Ming Lei   blk-mq: pass obta...
1303
1304
1305
1306
1307
1308
  		/*
  		 * once the request is queued to lld, no need to cover the
  		 * budget any more
  		 */
  		if (nr_budgets)
  			nr_budgets--;
74c450521   Jens Axboe   blk-mq: add a 'li...
1309
  		ret = q->mq_ops->queue_rq(hctx, &bd);
7bf137298   Ming Lei   blk-mq: streamlin...
1310
1311
1312
  		switch (ret) {
  		case BLK_STS_OK:
  			queued++;
320ae51fe   Jens Axboe   blk-mq: new multi...
1313
  			break;
7bf137298   Ming Lei   blk-mq: streamlin...
1314
1315
1316
1317
1318
  		case BLK_STS_RESOURCE:
  		case BLK_STS_DEV_RESOURCE:
  			blk_mq_handle_dev_resource(rq, list);
  			goto out;
  		case BLK_STS_ZONE_RESOURCE:
0512a75b9   Keith Busch   block: Introduce ...
1319
1320
1321
1322
1323
1324
  			/*
  			 * Move the request to zone_list and keep going through
  			 * the dispatch list to find more requests the drive can
  			 * accept.
  			 */
  			blk_mq_handle_zone_resource(rq, &zone_list);
7bf137298   Ming Lei   blk-mq: streamlin...
1325
1326
  			break;
  		default:
93efe9817   Jens Axboe   blk-mq: include e...
1327
  			errors++;
2a842acab   Christoph Hellwig   block: introduce ...
1328
  			blk_mq_end_request(rq, BLK_STS_IOERR);
320ae51fe   Jens Axboe   blk-mq: new multi...
1329
  		}
81380ca10   Omar Sandoval   blk-mq: use the r...
1330
  	} while (!list_empty(list));
7bf137298   Ming Lei   blk-mq: streamlin...
1331
  out:
0512a75b9   Keith Busch   block: Introduce ...
1332
1333
  	if (!list_empty(&zone_list))
  		list_splice_tail_init(&zone_list, list);
703fd1c0f   Jens Axboe   blk-mq: account h...
1334
  	hctx->dispatched[queued_to_index(queued)]++;
320ae51fe   Jens Axboe   blk-mq: new multi...
1335

632bfb632   yangerkun   blk-mq: call comm...
1336
1337
1338
1339
1340
  	/* If we didn't flush the entire list, we could have told the driver
  	 * there was more coming, but that turned out to be a lie.
  	 */
  	if ((!list_empty(list) || errors) && q->mq_ops->commit_rqs && queued)
  		q->mq_ops->commit_rqs(hctx);
320ae51fe   Jens Axboe   blk-mq: new multi...
1341
1342
1343
1344
  	/*
  	 * Any items that need requeuing? Stuff them into hctx->dispatch,
  	 * that is where we will continue on next queue run.
  	 */
f04c3df3e   Jens Axboe   blk-mq: abstract ...
1345
  	if (!list_empty(list)) {
86ff7c2a8   Ming Lei   blk-mq: introduce...
1346
  		bool needs_restart;
753835245   Ming Lei   blk-mq: move gett...
1347
1348
  		/* For non-shared tags, the RESTART check will suffice */
  		bool no_tag = prep == PREP_DISPATCH_NO_TAG &&
51db1c37e   Ming Lei   blk-mq: Rename BL...
1349
  			(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED);
753835245   Ming Lei   blk-mq: move gett...
1350
  		bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET;
86ff7c2a8   Ming Lei   blk-mq: introduce...
1351

1fd40b5ea   Ming Lei   blk-mq: pass obta...
1352
  		blk_mq_release_budgets(q, nr_budgets);
86ff7c2a8   Ming Lei   blk-mq: introduce...
1353

320ae51fe   Jens Axboe   blk-mq: new multi...
1354
  		spin_lock(&hctx->lock);
01e99aeca   Ming Lei   blk-mq: insert pa...
1355
  		list_splice_tail_init(list, &hctx->dispatch);
320ae51fe   Jens Axboe   blk-mq: new multi...
1356
  		spin_unlock(&hctx->lock);
f04c3df3e   Jens Axboe   blk-mq: abstract ...
1357

9ba52e581   Shaohua Li   blk-mq: don't los...
1358
  		/*
d7d8535f3   Ming Lei   blk-mq: order add...
1359
1360
1361
1362
1363
1364
1365
1366
1367
  		 * Order adding requests to hctx->dispatch and checking
  		 * SCHED_RESTART flag. The pair of this smp_mb() is the one
  		 * in blk_mq_sched_restart(). Avoid restart code path to
  		 * miss the new added requests to hctx->dispatch, meantime
  		 * SCHED_RESTART is observed here.
  		 */
  		smp_mb();
  
  		/*
710c785f8   Bart Van Assche   blk-mq: Clarify c...
1368
1369
1370
  		 * If SCHED_RESTART was set by the caller of this function and
  		 * it is no longer set that means that it was cleared by another
  		 * thread and hence that a queue rerun is needed.
9ba52e581   Shaohua Li   blk-mq: don't los...
1371
  		 *
eb619fdb2   Jens Axboe   blk-mq: fix issue...
1372
1373
1374
1375
  		 * If 'no_tag' is set, that means that we failed getting
  		 * a driver tag with an I/O scheduler attached. If our dispatch
  		 * waitqueue is no longer active, ensure that we run the queue
  		 * AFTER adding our entries back to the list.
bd166ef18   Jens Axboe   blk-mq-sched: add...
1376
  		 *
710c785f8   Bart Van Assche   blk-mq: Clarify c...
1377
1378
1379
1380
1381
1382
1383
  		 * If no I/O scheduler has been configured it is possible that
  		 * the hardware queue got stopped and restarted before requests
  		 * were pushed back onto the dispatch list. Rerun the queue to
  		 * avoid starvation. Notes:
  		 * - blk_mq_run_hw_queue() checks whether or not a queue has
  		 *   been stopped before rerunning a queue.
  		 * - Some but not all block drivers stop a queue before
fc17b6534   Christoph Hellwig   blk-mq: switch ->...
1384
  		 *   returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
710c785f8   Bart Van Assche   blk-mq: Clarify c...
1385
  		 *   and dm-rq.
86ff7c2a8   Ming Lei   blk-mq: introduce...
1386
1387
1388
  		 *
  		 * If driver returns BLK_STS_RESOURCE and SCHED_RESTART
  		 * bit is set, run queue after a delay to avoid IO stalls
ab3cee376   Douglas Anderson   blk-mq: In blk_mq...
1389
1390
  		 * that could otherwise occur if the queue is idle.  We'll do
  		 * similar if we couldn't get budget and SCHED_RESTART is set.
bd166ef18   Jens Axboe   blk-mq-sched: add...
1391
  		 */
86ff7c2a8   Ming Lei   blk-mq: introduce...
1392
1393
  		needs_restart = blk_mq_sched_needs_restart(hctx);
  		if (!needs_restart ||
eb619fdb2   Jens Axboe   blk-mq: fix issue...
1394
  		    (no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
bd166ef18   Jens Axboe   blk-mq-sched: add...
1395
  			blk_mq_run_hw_queue(hctx, true);
ab3cee376   Douglas Anderson   blk-mq: In blk_mq...
1396
1397
  		else if (needs_restart && (ret == BLK_STS_RESOURCE ||
  					   no_budget_avail))
86ff7c2a8   Ming Lei   blk-mq: introduce...
1398
  			blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
1f57f8d44   Jens Axboe   blk-mq: don't que...
1399

6e7687173   Ming Lei   blk-mq: dequeue r...
1400
  		blk_mq_update_dispatch_busy(hctx, true);
1f57f8d44   Jens Axboe   blk-mq: don't que...
1401
  		return false;
6e7687173   Ming Lei   blk-mq: dequeue r...
1402
1403
  	} else
  		blk_mq_update_dispatch_busy(hctx, false);
f04c3df3e   Jens Axboe   blk-mq: abstract ...
1404

93efe9817   Jens Axboe   blk-mq: include e...
1405
  	return (queued + errors) != 0;
f04c3df3e   Jens Axboe   blk-mq: abstract ...
1406
  }
105663f73   André Almeida   blk-mq: Document ...
1407
1408
1409
1410
1411
1412
  /**
   * __blk_mq_run_hw_queue - Run a hardware queue.
   * @hctx: Pointer to the hardware queue to run.
   *
   * Send pending requests to the hardware.
   */
6a83e74d2   Bart Van Assche   blk-mq: Introduce...
1413
1414
1415
  static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
  {
  	int srcu_idx;
b7a71e66d   Jens Axboe   blk-mq: add warni...
1416
1417
1418
  	/*
  	 * We should be running this queue from one of the CPUs that
  	 * are mapped to it.
7df938fbc   Ming Lei   blk-mq: turn WARN...
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
  	 *
  	 * There are at least two related races now between setting
  	 * hctx->next_cpu from blk_mq_hctx_next_cpu() and running
  	 * __blk_mq_run_hw_queue():
  	 *
  	 * - hctx->next_cpu is found offline in blk_mq_hctx_next_cpu(),
  	 *   but later it becomes online, then this warning is harmless
  	 *   at all
  	 *
  	 * - hctx->next_cpu is found online in blk_mq_hctx_next_cpu(),
  	 *   but later it becomes offline, then the warning can't be
  	 *   triggered, and we depend on blk-mq timeout handler to
  	 *   handle dispatched requests to this hctx
b7a71e66d   Jens Axboe   blk-mq: add warni...
1432
  	 */
7df938fbc   Ming Lei   blk-mq: turn WARN...
1433
1434
1435
1436
1437
1438
1439
1440
  	if (!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
  		cpu_online(hctx->next_cpu)) {
  		printk(KERN_WARNING "run queue from wrong CPU %d, hctx %s
  ",
  			raw_smp_processor_id(),
  			cpumask_empty(hctx->cpumask) ? "inactive": "active");
  		dump_stack();
  	}
6a83e74d2   Bart Van Assche   blk-mq: Introduce...
1441

b7a71e66d   Jens Axboe   blk-mq: add warni...
1442
1443
1444
1445
1446
  	/*
  	 * We can't run the queue inline with ints disabled. Ensure that
  	 * we catch bad users of this early.
  	 */
  	WARN_ON_ONCE(in_interrupt());
04ced159c   Jens Axboe   blk-mq: move hctx...
1447
  	might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
bf4907c05   Jens Axboe   blk-mq: fix sched...
1448

04ced159c   Jens Axboe   blk-mq: move hctx...
1449
1450
1451
  	hctx_lock(hctx, &srcu_idx);
  	blk_mq_sched_dispatch_requests(hctx);
  	hctx_unlock(hctx, srcu_idx);
6a83e74d2   Bart Van Assche   blk-mq: Introduce...
1452
  }
f82ddf192   Ming Lei   blk-mq: introduce...
1453
1454
1455
1456
1457
1458
1459
1460
  static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx)
  {
  	int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask);
  
  	if (cpu >= nr_cpu_ids)
  		cpu = cpumask_first(hctx->cpumask);
  	return cpu;
  }
506e931f9   Jens Axboe   blk-mq: add basic...
1461
1462
1463
1464
1465
1466
1467
1468
  /*
   * It'd be great if the workqueue API had a way to pass
   * in a mask and had some smarts for more clever placement.
   * For now we just round-robin here, switching for every
   * BLK_MQ_CPU_WORK_BATCH queued items.
   */
  static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
  {
7bed45954   Ming Lei   blk-mq: make sure...
1469
  	bool tried = false;
476f8c98a   Ming Lei   blk-mq: avoid to ...
1470
  	int next_cpu = hctx->next_cpu;
7bed45954   Ming Lei   blk-mq: make sure...
1471

b657d7e63   Christoph Hellwig   blk-mq: handle th...
1472
1473
  	if (hctx->queue->nr_hw_queues == 1)
  		return WORK_CPU_UNBOUND;
506e931f9   Jens Axboe   blk-mq: add basic...
1474
1475
  
  	if (--hctx->next_cpu_batch <= 0) {
7bed45954   Ming Lei   blk-mq: make sure...
1476
  select_cpu:
476f8c98a   Ming Lei   blk-mq: avoid to ...
1477
  		next_cpu = cpumask_next_and(next_cpu, hctx->cpumask,
20e4d8139   Christoph Hellwig   blk-mq: simplify ...
1478
  				cpu_online_mask);
506e931f9   Jens Axboe   blk-mq: add basic...
1479
  		if (next_cpu >= nr_cpu_ids)
f82ddf192   Ming Lei   blk-mq: introduce...
1480
  			next_cpu = blk_mq_first_mapped_cpu(hctx);
506e931f9   Jens Axboe   blk-mq: add basic...
1481
1482
  		hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
  	}
7bed45954   Ming Lei   blk-mq: make sure...
1483
1484
1485
1486
  	/*
  	 * Do unbound schedule if we can't find a online CPU for this hctx,
  	 * and it should only happen in the path of handling CPU DEAD.
  	 */
476f8c98a   Ming Lei   blk-mq: avoid to ...
1487
  	if (!cpu_online(next_cpu)) {
7bed45954   Ming Lei   blk-mq: make sure...
1488
1489
1490
1491
1492
1493
1494
1495
1496
  		if (!tried) {
  			tried = true;
  			goto select_cpu;
  		}
  
  		/*
  		 * Make sure to re-select CPU next time once after CPUs
  		 * in hctx->cpumask become online again.
  		 */
476f8c98a   Ming Lei   blk-mq: avoid to ...
1497
  		hctx->next_cpu = next_cpu;
7bed45954   Ming Lei   blk-mq: make sure...
1498
1499
1500
  		hctx->next_cpu_batch = 1;
  		return WORK_CPU_UNBOUND;
  	}
476f8c98a   Ming Lei   blk-mq: avoid to ...
1501
1502
1503
  
  	hctx->next_cpu = next_cpu;
  	return next_cpu;
506e931f9   Jens Axboe   blk-mq: add basic...
1504
  }
105663f73   André Almeida   blk-mq: Document ...
1505
1506
1507
1508
1509
1510
1511
1512
1513
  /**
   * __blk_mq_delay_run_hw_queue - Run (or schedule to run) a hardware queue.
   * @hctx: Pointer to the hardware queue to run.
   * @async: If we want to run the queue asynchronously.
   * @msecs: Microseconds of delay to wait before running the queue.
   *
   * If !@async, try to run the queue now. Else, run the queue asynchronously and
   * with a delay of @msecs.
   */
7587a5ae7   Bart Van Assche   blk-mq: Introduce...
1514
1515
  static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
  					unsigned long msecs)
320ae51fe   Jens Axboe   blk-mq: new multi...
1516
  {
5435c023b   Bart Van Assche   blk-mq: Warn when...
1517
  	if (unlikely(blk_mq_hctx_stopped(hctx)))
320ae51fe   Jens Axboe   blk-mq: new multi...
1518
  		return;
1b792f2f9   Jens Axboe   blk-mq: add flag ...
1519
  	if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
2a90d4aae   Paolo Bonzini   blk-mq: use get_c...
1520
1521
  		int cpu = get_cpu();
  		if (cpumask_test_cpu(cpu, hctx->cpumask)) {
398205b83   Paolo Bonzini   blk_mq: call pree...
1522
  			__blk_mq_run_hw_queue(hctx);
2a90d4aae   Paolo Bonzini   blk-mq: use get_c...
1523
  			put_cpu();
398205b83   Paolo Bonzini   blk_mq: call pree...
1524
1525
  			return;
  		}
e4043dcf3   Jens Axboe   blk-mq: ensure th...
1526

2a90d4aae   Paolo Bonzini   blk-mq: use get_c...
1527
  		put_cpu();
e4043dcf3   Jens Axboe   blk-mq: ensure th...
1528
  	}
398205b83   Paolo Bonzini   blk_mq: call pree...
1529

ae943d206   Bart Van Assche   blk-mq: Avoid tha...
1530
1531
  	kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
  				    msecs_to_jiffies(msecs));
7587a5ae7   Bart Van Assche   blk-mq: Introduce...
1532
  }
105663f73   André Almeida   blk-mq: Document ...
1533
1534
1535
1536
1537
1538
1539
  /**
   * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously.
   * @hctx: Pointer to the hardware queue to run.
   * @msecs: Microseconds of delay to wait before running the queue.
   *
   * Run a hardware queue asynchronously with a delay of @msecs.
   */
7587a5ae7   Bart Van Assche   blk-mq: Introduce...
1540
1541
1542
1543
1544
  void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
  {
  	__blk_mq_delay_run_hw_queue(hctx, true, msecs);
  }
  EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);
105663f73   André Almeida   blk-mq: Document ...
1545
1546
1547
1548
1549
1550
1551
1552
1553
  /**
   * blk_mq_run_hw_queue - Start to run a hardware queue.
   * @hctx: Pointer to the hardware queue to run.
   * @async: If we want to run the queue asynchronously.
   *
   * Check if the request queue is not in a quiesced state and if there are
   * pending requests to be sent. If this is true, run the queue to send requests
   * to hardware.
   */
626fb735a   John Garry   blk-mq: Make blk_...
1554
  void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
7587a5ae7   Bart Van Assche   blk-mq: Introduce...
1555
  {
24f5a90f0   Ming Lei   blk-mq: quiesce q...
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
  	int srcu_idx;
  	bool need_run;
  
  	/*
  	 * When queue is quiesced, we may be switching io scheduler, or
  	 * updating nr_hw_queues, or other things, and we can't run queue
  	 * any more, even __blk_mq_hctx_has_pending() can't be called safely.
  	 *
  	 * And queue will be rerun in blk_mq_unquiesce_queue() if it is
  	 * quiesced.
  	 */
04ced159c   Jens Axboe   blk-mq: move hctx...
1567
1568
1569
1570
  	hctx_lock(hctx, &srcu_idx);
  	need_run = !blk_queue_quiesced(hctx->queue) &&
  		blk_mq_hctx_has_pending(hctx);
  	hctx_unlock(hctx, srcu_idx);
24f5a90f0   Ming Lei   blk-mq: quiesce q...
1571

626fb735a   John Garry   blk-mq: Make blk_...
1572
  	if (need_run)
79f720a75   Jens Axboe   blk-mq: only run ...
1573
  		__blk_mq_delay_run_hw_queue(hctx, async, 0);
320ae51fe   Jens Axboe   blk-mq: new multi...
1574
  }
5b7272729   Omar Sandoval   blk-mq: export he...
1575
  EXPORT_SYMBOL(blk_mq_run_hw_queue);
320ae51fe   Jens Axboe   blk-mq: new multi...
1576

105663f73   André Almeida   blk-mq: Document ...
1577
  /**
24f7bb886   Mauro Carvalho Chehab   block: blk-mq: fi...
1578
   * blk_mq_run_hw_queues - Run all hardware queues in a request queue.
105663f73   André Almeida   blk-mq: Document ...
1579
1580
1581
   * @q: Pointer to the request queue to run.
   * @async: If we want to run the queue asynchronously.
   */
b94ec2964   Mike Snitzer   blk-mq: export bl...
1582
  void blk_mq_run_hw_queues(struct request_queue *q, bool async)
320ae51fe   Jens Axboe   blk-mq: new multi...
1583
1584
1585
1586
1587
  {
  	struct blk_mq_hw_ctx *hctx;
  	int i;
  
  	queue_for_each_hw_ctx(q, hctx, i) {
79f720a75   Jens Axboe   blk-mq: only run ...
1588
  		if (blk_mq_hctx_stopped(hctx))
320ae51fe   Jens Axboe   blk-mq: new multi...
1589
  			continue;
b94ec2964   Mike Snitzer   blk-mq: export bl...
1590
  		blk_mq_run_hw_queue(hctx, async);
320ae51fe   Jens Axboe   blk-mq: new multi...
1591
1592
  	}
  }
b94ec2964   Mike Snitzer   blk-mq: export bl...
1593
  EXPORT_SYMBOL(blk_mq_run_hw_queues);
320ae51fe   Jens Axboe   blk-mq: new multi...
1594

fd0014430   Bart Van Assche   blk-mq: Introduce...
1595
  /**
b9151e7bc   Douglas Anderson   blk-mq: Add blk_m...
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
   * blk_mq_delay_run_hw_queues - Run all hardware queues asynchronously.
   * @q: Pointer to the request queue to run.
   * @msecs: Microseconds of delay to wait before running the queues.
   */
  void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs)
  {
  	struct blk_mq_hw_ctx *hctx;
  	int i;
  
  	queue_for_each_hw_ctx(q, hctx, i) {
  		if (blk_mq_hctx_stopped(hctx))
  			continue;
  
  		blk_mq_delay_run_hw_queue(hctx, msecs);
  	}
  }
  EXPORT_SYMBOL(blk_mq_delay_run_hw_queues);
  
  /**
fd0014430   Bart Van Assche   blk-mq: Introduce...
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
   * blk_mq_queue_stopped() - check whether one or more hctxs have been stopped
   * @q: request queue.
   *
   * The caller is responsible for serializing this function against
   * blk_mq_{start,stop}_hw_queue().
   */
  bool blk_mq_queue_stopped(struct request_queue *q)
  {
  	struct blk_mq_hw_ctx *hctx;
  	int i;
  
  	queue_for_each_hw_ctx(q, hctx, i)
  		if (blk_mq_hctx_stopped(hctx))
  			return true;
  
  	return false;
  }
  EXPORT_SYMBOL(blk_mq_queue_stopped);
39a70c76b   Ming Lei   blk-mq: clarify d...
1633
1634
1635
  /*
   * This function is often used for pausing .queue_rq() by driver when
   * there isn't enough resource or some conditions aren't satisfied, and
4d6062193   Bart Van Assche   block: Fix two co...
1636
   * BLK_STS_RESOURCE is usually returned.
39a70c76b   Ming Lei   blk-mq: clarify d...
1637
1638
1639
1640
1641
   *
   * We do not guarantee that dispatch can be drained or blocked
   * after blk_mq_stop_hw_queue() returns. Please use
   * blk_mq_quiesce_queue() for that requirement.
   */
2719aa217   Jens Axboe   blk-mq: don't use...
1642
1643
  void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
  {
641a9ed60   Ming Lei   Revert "blk-mq: d...
1644
  	cancel_delayed_work(&hctx->run_work);
280d45f6c   Christoph Hellwig   blk-mq: add blk_m...
1645

641a9ed60   Ming Lei   Revert "blk-mq: d...
1646
  	set_bit(BLK_MQ_S_STOPPED, &hctx->state);
2719aa217   Jens Axboe   blk-mq: don't use...
1647
  }
641a9ed60   Ming Lei   Revert "blk-mq: d...
1648
  EXPORT_SYMBOL(blk_mq_stop_hw_queue);
2719aa217   Jens Axboe   blk-mq: don't use...
1649

39a70c76b   Ming Lei   blk-mq: clarify d...
1650
1651
1652
  /*
   * This function is often used for pausing .queue_rq() by driver when
   * there isn't enough resource or some conditions aren't satisfied, and
4d6062193   Bart Van Assche   block: Fix two co...
1653
   * BLK_STS_RESOURCE is usually returned.
39a70c76b   Ming Lei   blk-mq: clarify d...
1654
1655
1656
1657
1658
   *
   * We do not guarantee that dispatch can be drained or blocked
   * after blk_mq_stop_hw_queues() returns. Please use
   * blk_mq_quiesce_queue() for that requirement.
   */
2719aa217   Jens Axboe   blk-mq: don't use...
1659
1660
  void blk_mq_stop_hw_queues(struct request_queue *q)
  {
641a9ed60   Ming Lei   Revert "blk-mq: d...
1661
1662
1663
1664
1665
  	struct blk_mq_hw_ctx *hctx;
  	int i;
  
  	queue_for_each_hw_ctx(q, hctx, i)
  		blk_mq_stop_hw_queue(hctx);
280d45f6c   Christoph Hellwig   blk-mq: add blk_m...
1666
1667
  }
  EXPORT_SYMBOL(blk_mq_stop_hw_queues);
320ae51fe   Jens Axboe   blk-mq: new multi...
1668
1669
1670
  void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
  {
  	clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
e4043dcf3   Jens Axboe   blk-mq: ensure th...
1671

0ffbce80c   Jens Axboe   blk-mq: blk_mq_st...
1672
  	blk_mq_run_hw_queue(hctx, false);
320ae51fe   Jens Axboe   blk-mq: new multi...
1673
1674
  }
  EXPORT_SYMBOL(blk_mq_start_hw_queue);
2f2685565   Christoph Hellwig   blk-mq: add blk_m...
1675
1676
1677
1678
1679
1680
1681
1682
1683
  void blk_mq_start_hw_queues(struct request_queue *q)
  {
  	struct blk_mq_hw_ctx *hctx;
  	int i;
  
  	queue_for_each_hw_ctx(q, hctx, i)
  		blk_mq_start_hw_queue(hctx);
  }
  EXPORT_SYMBOL(blk_mq_start_hw_queues);
ae911c5e7   Jens Axboe   blk-mq: add blk_m...
1684
1685
1686
1687
1688
1689
1690
1691
1692
  void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
  {
  	if (!blk_mq_hctx_stopped(hctx))
  		return;
  
  	clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
  	blk_mq_run_hw_queue(hctx, async);
  }
  EXPORT_SYMBOL_GPL(blk_mq_start_stopped_hw_queue);
1b4a32585   Christoph Hellwig   blk-mq: add async...
1693
  void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
320ae51fe   Jens Axboe   blk-mq: new multi...
1694
1695
1696
  {
  	struct blk_mq_hw_ctx *hctx;
  	int i;
ae911c5e7   Jens Axboe   blk-mq: add blk_m...
1697
1698
  	queue_for_each_hw_ctx(q, hctx, i)
  		blk_mq_start_stopped_hw_queue(hctx, async);
320ae51fe   Jens Axboe   blk-mq: new multi...
1699
1700
  }
  EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
70f4db639   Christoph Hellwig   blk-mq: add blk_m...
1701
  static void blk_mq_run_work_fn(struct work_struct *work)
320ae51fe   Jens Axboe   blk-mq: new multi...
1702
1703
  {
  	struct blk_mq_hw_ctx *hctx;
9f9937379   Jens Axboe   blk-mq: unify hct...
1704
  	hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work);
320ae51fe   Jens Axboe   blk-mq: new multi...
1705

21c6e939a   Jens Axboe   blk-mq: unify hct...
1706
  	/*
15fe8a90b   Ming Lei   blk-mq: remove bl...
1707
  	 * If we are stopped, don't run the queue.
21c6e939a   Jens Axboe   blk-mq: unify hct...
1708
  	 */
0841031ab   Yufen Yu   blk-mq: use helpe...
1709
  	if (blk_mq_hctx_stopped(hctx))
0196d6b40   Jianchao Wang   blk-mq: return wh...
1710
  		return;
7587a5ae7   Bart Van Assche   blk-mq: Introduce...
1711
1712
1713
  
  	__blk_mq_run_hw_queue(hctx);
  }
cfd0c552a   Ming Lei   blk-mq: mark ctx ...
1714
  static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
cfd0c552a   Ming Lei   blk-mq: mark ctx ...
1715
1716
  					    struct request *rq,
  					    bool at_head)
320ae51fe   Jens Axboe   blk-mq: new multi...
1717
  {
e57690fe0   Jens Axboe   blk-mq: don't ove...
1718
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
1719
  	enum hctx_type type = hctx->type;
e57690fe0   Jens Axboe   blk-mq: don't ove...
1720

7b6078146   Bart Van Assche   blk-mq: Document ...
1721
  	lockdep_assert_held(&ctx->lock);
01b983c9f   Jens Axboe   blk-mq: add blktr...
1722
  	trace_block_rq_insert(hctx->queue, rq);
72a0a36e2   Christoph Hellwig   blk-mq: support a...
1723
  	if (at_head)
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
1724
  		list_add(&rq->queuelist, &ctx->rq_lists[type]);
72a0a36e2   Christoph Hellwig   blk-mq: support a...
1725
  	else
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
1726
  		list_add_tail(&rq->queuelist, &ctx->rq_lists[type]);
cfd0c552a   Ming Lei   blk-mq: mark ctx ...
1727
  }
4bb659b15   Jens Axboe   blk-mq: implement...
1728

2c3ad6679   Jens Axboe   blk-mq: export so...
1729
1730
  void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
  			     bool at_head)
cfd0c552a   Ming Lei   blk-mq: mark ctx ...
1731
1732
  {
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
7b6078146   Bart Van Assche   blk-mq: Document ...
1733
  	lockdep_assert_held(&ctx->lock);
e57690fe0   Jens Axboe   blk-mq: don't ove...
1734
  	__blk_mq_insert_req_list(hctx, rq, at_head);
320ae51fe   Jens Axboe   blk-mq: new multi...
1735
  	blk_mq_hctx_mark_pending(hctx, ctx);
320ae51fe   Jens Axboe   blk-mq: new multi...
1736
  }
105663f73   André Almeida   blk-mq: Document ...
1737
1738
1739
  /**
   * blk_mq_request_bypass_insert - Insert a request at dispatch list.
   * @rq: Pointer to request to be inserted.
26bfeb266   Randy Dunlap   block: blk-mq.c: ...
1740
   * @at_head: true if the request should be inserted at the head of the list.
105663f73   André Almeida   blk-mq: Document ...
1741
1742
   * @run_queue: If we should run the hardware queue after inserting the request.
   *
157f377be   Jens Axboe   block: directly i...
1743
1744
1745
   * Should only be used carefully, when the caller knows we want to
   * bypass a potential IO scheduler on the target device.
   */
01e99aeca   Ming Lei   blk-mq: insert pa...
1746
1747
  void blk_mq_request_bypass_insert(struct request *rq, bool at_head,
  				  bool run_queue)
157f377be   Jens Axboe   block: directly i...
1748
  {
ea4f995ee   Jens Axboe   blk-mq: cache req...
1749
  	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
157f377be   Jens Axboe   block: directly i...
1750
1751
  
  	spin_lock(&hctx->lock);
01e99aeca   Ming Lei   blk-mq: insert pa...
1752
1753
1754
1755
  	if (at_head)
  		list_add(&rq->queuelist, &hctx->dispatch);
  	else
  		list_add_tail(&rq->queuelist, &hctx->dispatch);
157f377be   Jens Axboe   block: directly i...
1756
  	spin_unlock(&hctx->lock);
b0850297c   Ming Lei   block: pass 'run_...
1757
1758
  	if (run_queue)
  		blk_mq_run_hw_queue(hctx, false);
157f377be   Jens Axboe   block: directly i...
1759
  }
bd166ef18   Jens Axboe   blk-mq-sched: add...
1760
1761
  void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
  			    struct list_head *list)
320ae51fe   Jens Axboe   blk-mq: new multi...
1762
1763
  
  {
3f0cedc7e   Ming Lei   blk-mq: use list_...
1764
  	struct request *rq;
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
1765
  	enum hctx_type type = hctx->type;
3f0cedc7e   Ming Lei   blk-mq: use list_...
1766

320ae51fe   Jens Axboe   blk-mq: new multi...
1767
1768
1769
1770
  	/*
  	 * preemption doesn't flush plug list, so it's possible ctx->cpu is
  	 * offline now
  	 */
3f0cedc7e   Ming Lei   blk-mq: use list_...
1771
  	list_for_each_entry(rq, list, queuelist) {
e57690fe0   Jens Axboe   blk-mq: don't ove...
1772
  		BUG_ON(rq->mq_ctx != ctx);
3f0cedc7e   Ming Lei   blk-mq: use list_...
1773
  		trace_block_rq_insert(hctx->queue, rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
1774
  	}
3f0cedc7e   Ming Lei   blk-mq: use list_...
1775
1776
  
  	spin_lock(&ctx->lock);
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
1777
  	list_splice_tail_init(list, &ctx->rq_lists[type]);
cfd0c552a   Ming Lei   blk-mq: mark ctx ...
1778
  	blk_mq_hctx_mark_pending(hctx, ctx);
320ae51fe   Jens Axboe   blk-mq: new multi...
1779
  	spin_unlock(&ctx->lock);
320ae51fe   Jens Axboe   blk-mq: new multi...
1780
  }
3110fc796   Jens Axboe   blk-mq: improve p...
1781
  static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
320ae51fe   Jens Axboe   blk-mq: new multi...
1782
1783
1784
  {
  	struct request *rqa = container_of(a, struct request, queuelist);
  	struct request *rqb = container_of(b, struct request, queuelist);
7d30a6210   Pavel Begunkov   blk-mq: optimise ...
1785
1786
1787
1788
  	if (rqa->mq_ctx != rqb->mq_ctx)
  		return rqa->mq_ctx > rqb->mq_ctx;
  	if (rqa->mq_hctx != rqb->mq_hctx)
  		return rqa->mq_hctx > rqb->mq_hctx;
3110fc796   Jens Axboe   blk-mq: improve p...
1789
1790
  
  	return blk_rq_pos(rqa) > blk_rq_pos(rqb);
320ae51fe   Jens Axboe   blk-mq: new multi...
1791
1792
1793
1794
  }
  
  void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  {
320ae51fe   Jens Axboe   blk-mq: new multi...
1795
  	LIST_HEAD(list);
320ae51fe   Jens Axboe   blk-mq: new multi...
1796

95ed0c5b1   Pavel Begunkov   blk-mq: optimise ...
1797
1798
  	if (list_empty(&plug->mq_list))
  		return;
320ae51fe   Jens Axboe   blk-mq: new multi...
1799
  	list_splice_init(&plug->mq_list, &list);
ce5b009cf   Jens Axboe   block: improve lo...
1800
1801
  	if (plug->rq_count > 2 && plug->multiple_queues)
  		list_sort(NULL, &list, plug_rq_cmp);
320ae51fe   Jens Axboe   blk-mq: new multi...
1802

bcc816dfe   Dongli Zhang   blk-mq: do not re...
1803
  	plug->rq_count = 0;
95ed0c5b1   Pavel Begunkov   blk-mq: optimise ...
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
  	do {
  		struct list_head rq_list;
  		struct request *rq, *head_rq = list_entry_rq(list.next);
  		struct list_head *pos = &head_rq->queuelist; /* skip first */
  		struct blk_mq_hw_ctx *this_hctx = head_rq->mq_hctx;
  		struct blk_mq_ctx *this_ctx = head_rq->mq_ctx;
  		unsigned int depth = 1;
  
  		list_for_each_continue(pos, &list) {
  			rq = list_entry_rq(pos);
  			BUG_ON(!rq->q);
  			if (rq->mq_hctx != this_hctx || rq->mq_ctx != this_ctx)
  				break;
  			depth++;
320ae51fe   Jens Axboe   blk-mq: new multi...
1818
  		}
95ed0c5b1   Pavel Begunkov   blk-mq: optimise ...
1819
1820
  		list_cut_before(&rq_list, &list, pos);
  		trace_block_unplug(head_rq->q, depth, !from_schedule);
67cae4c94   Jens Axboe   blk-mq: cleanup a...
1821
  		blk_mq_sched_insert_requests(this_hctx, this_ctx, &rq_list,
bd166ef18   Jens Axboe   blk-mq-sched: add...
1822
  						from_schedule);
95ed0c5b1   Pavel Begunkov   blk-mq: optimise ...
1823
  	} while(!list_empty(&list));
320ae51fe   Jens Axboe   blk-mq: new multi...
1824
  }
14ccb66b3   Christoph Hellwig   block: remove the...
1825
1826
  static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
  		unsigned int nr_segs)
320ae51fe   Jens Axboe   blk-mq: new multi...
1827
  {
93f221ae0   Eric Biggers   block: make blk_c...
1828
  	int err;
f924cddeb   Christoph Hellwig   block: remove blk...
1829
1830
1831
1832
1833
  	if (bio->bi_opf & REQ_RAHEAD)
  		rq->cmd_flags |= REQ_FAILFAST_MASK;
  
  	rq->__sector = bio->bi_iter.bi_sector;
  	rq->write_hint = bio->bi_write_hint;
14ccb66b3   Christoph Hellwig   block: remove the...
1834
  	blk_rq_bio_prep(rq, bio, nr_segs);
93f221ae0   Eric Biggers   block: make blk_c...
1835
1836
1837
1838
  
  	/* This can't fail, since GFP_NOIO includes __GFP_DIRECT_RECLAIM. */
  	err = blk_crypto_rq_bio_prep(rq, bio, GFP_NOIO);
  	WARN_ON_ONCE(err);
4b570521b   Jens Axboe   blk-mq: request i...
1839

b5af37ab3   Konstantin Khlebnikov   block: add a blk_...
1840
  	blk_account_io_start(rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
1841
  }
0f95549c0   Mike Snitzer   blk-mq: factor ou...
1842
1843
  static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
  					    struct request *rq,
be94f058f   Jens Axboe   blk-mq: use bd->l...
1844
  					    blk_qc_t *cookie, bool last)
f984df1f0   Shaohua Li   blk-mq: do limite...
1845
  {
f984df1f0   Shaohua Li   blk-mq: do limite...
1846
  	struct request_queue *q = rq->q;
f984df1f0   Shaohua Li   blk-mq: do limite...
1847
1848
  	struct blk_mq_queue_data bd = {
  		.rq = rq,
be94f058f   Jens Axboe   blk-mq: use bd->l...
1849
  		.last = last,
f984df1f0   Shaohua Li   blk-mq: do limite...
1850
  	};
bd166ef18   Jens Axboe   blk-mq-sched: add...
1851
  	blk_qc_t new_cookie;
f06345add   Jens Axboe   blk-mq: fixup typ...
1852
  	blk_status_t ret;
0f95549c0   Mike Snitzer   blk-mq: factor ou...
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
  
  	new_cookie = request_to_qc_t(hctx, rq);
  
  	/*
  	 * For OK queue, we are done. For error, caller may kill it.
  	 * Any other error (busy), just add it to our list as we
  	 * previously would have done.
  	 */
  	ret = q->mq_ops->queue_rq(hctx, &bd);
  	switch (ret) {
  	case BLK_STS_OK:
6ce3dd6ee   Ming Lei   blk-mq: issue dir...
1864
  		blk_mq_update_dispatch_busy(hctx, false);
0f95549c0   Mike Snitzer   blk-mq: factor ou...
1865
1866
1867
  		*cookie = new_cookie;
  		break;
  	case BLK_STS_RESOURCE:
86ff7c2a8   Ming Lei   blk-mq: introduce...
1868
  	case BLK_STS_DEV_RESOURCE:
6ce3dd6ee   Ming Lei   blk-mq: issue dir...
1869
  		blk_mq_update_dispatch_busy(hctx, true);
0f95549c0   Mike Snitzer   blk-mq: factor ou...
1870
1871
1872
  		__blk_mq_requeue_request(rq);
  		break;
  	default:
6ce3dd6ee   Ming Lei   blk-mq: issue dir...
1873
  		blk_mq_update_dispatch_busy(hctx, false);
0f95549c0   Mike Snitzer   blk-mq: factor ou...
1874
1875
1876
1877
1878
1879
  		*cookie = BLK_QC_T_NONE;
  		break;
  	}
  
  	return ret;
  }
fd9c40f64   Bart Van Assche   block: Revert v5....
1880
  static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
0f95549c0   Mike Snitzer   blk-mq: factor ou...
1881
  						struct request *rq,
396eaf21e   Ming Lei   blk-mq: improve D...
1882
  						blk_qc_t *cookie,
fd9c40f64   Bart Van Assche   block: Revert v5....
1883
  						bool bypass_insert, bool last)
0f95549c0   Mike Snitzer   blk-mq: factor ou...
1884
1885
  {
  	struct request_queue *q = rq->q;
d964f04a8   Ming Lei   blk-mq: fix direc...
1886
  	bool run_queue = true;
23d4ee19e   Ming Lei   blk-mq: don't dis...
1887
  	/*
fd9c40f64   Bart Van Assche   block: Revert v5....
1888
  	 * RCU or SRCU read lock is needed before checking quiesced flag.
23d4ee19e   Ming Lei   blk-mq: don't dis...
1889
  	 *
fd9c40f64   Bart Van Assche   block: Revert v5....
1890
1891
1892
  	 * When queue is stopped or quiesced, ignore 'bypass_insert' from
  	 * blk_mq_request_issue_directly(), and return BLK_STS_OK to caller,
  	 * and avoid driver to try to dispatch again.
23d4ee19e   Ming Lei   blk-mq: don't dis...
1893
  	 */
fd9c40f64   Bart Van Assche   block: Revert v5....
1894
  	if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
d964f04a8   Ming Lei   blk-mq: fix direc...
1895
  		run_queue = false;
fd9c40f64   Bart Van Assche   block: Revert v5....
1896
1897
  		bypass_insert = false;
  		goto insert;
d964f04a8   Ming Lei   blk-mq: fix direc...
1898
  	}
f984df1f0   Shaohua Li   blk-mq: do limite...
1899

fd9c40f64   Bart Van Assche   block: Revert v5....
1900
1901
  	if (q->elevator && !bypass_insert)
  		goto insert;
2253efc85   Bart Van Assche   blk-mq: Move more...
1902

65c763694   Ming Lei   blk-mq: pass requ...
1903
  	if (!blk_mq_get_dispatch_budget(q))
fd9c40f64   Bart Van Assche   block: Revert v5....
1904
  		goto insert;
bd166ef18   Jens Axboe   blk-mq-sched: add...
1905

8ab6bb9ee   Ming Lei   blk-mq: cleanup b...
1906
  	if (!blk_mq_get_driver_tag(rq)) {
65c763694   Ming Lei   blk-mq: pass requ...
1907
  		blk_mq_put_dispatch_budget(q);
fd9c40f64   Bart Van Assche   block: Revert v5....
1908
  		goto insert;
88022d720   Ming Lei   blk-mq: don't han...
1909
  	}
de1482974   Ming Lei   blk-mq: introduce...
1910

fd9c40f64   Bart Van Assche   block: Revert v5....
1911
1912
1913
1914
  	return __blk_mq_issue_directly(hctx, rq, cookie, last);
  insert:
  	if (bypass_insert)
  		return BLK_STS_RESOURCE;
db03f88fa   Ming Lei   blk-mq: insert re...
1915
  	blk_mq_sched_insert_request(rq, false, run_queue, false);
fd9c40f64   Bart Van Assche   block: Revert v5....
1916
1917
  	return BLK_STS_OK;
  }
105663f73   André Almeida   blk-mq: Document ...
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
  /**
   * blk_mq_try_issue_directly - Try to send a request directly to device driver.
   * @hctx: Pointer of the associated hardware queue.
   * @rq: Pointer to request to be sent.
   * @cookie: Request queue cookie.
   *
   * If the device has enough resources to accept a new request now, send the
   * request directly to device driver. Else, insert at hctx->dispatch queue, so
   * we can try send it another time in the future. Requests inserted at this
   * queue have higher priority.
   */
fd9c40f64   Bart Van Assche   block: Revert v5....
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
  static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
  		struct request *rq, blk_qc_t *cookie)
  {
  	blk_status_t ret;
  	int srcu_idx;
  
  	might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
  
  	hctx_lock(hctx, &srcu_idx);
  
  	ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true);
  	if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
01e99aeca   Ming Lei   blk-mq: insert pa...
1941
  		blk_mq_request_bypass_insert(rq, false, true);
fd9c40f64   Bart Van Assche   block: Revert v5....
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
  	else if (ret != BLK_STS_OK)
  		blk_mq_end_request(rq, ret);
  
  	hctx_unlock(hctx, srcu_idx);
  }
  
  blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
  {
  	blk_status_t ret;
  	int srcu_idx;
  	blk_qc_t unused_cookie;
  	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
  
  	hctx_lock(hctx, &srcu_idx);
  	ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true, last);
04ced159c   Jens Axboe   blk-mq: move hctx...
1957
  	hctx_unlock(hctx, srcu_idx);
7f556a44e   Jianchao Wang   blk-mq: refactor ...
1958
1959
  
  	return ret;
5eb6126e1   Christoph Hellwig   blk-mq: improve b...
1960
  }
6ce3dd6ee   Ming Lei   blk-mq: issue dir...
1961
1962
1963
  void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
  		struct list_head *list)
  {
536167d47   Keith Busch   blk-mq: don't com...
1964
  	int queued = 0;
632bfb632   yangerkun   blk-mq: call comm...
1965
  	int errors = 0;
536167d47   Keith Busch   blk-mq: don't com...
1966

6ce3dd6ee   Ming Lei   blk-mq: issue dir...
1967
  	while (!list_empty(list)) {
fd9c40f64   Bart Van Assche   block: Revert v5....
1968
  		blk_status_t ret;
6ce3dd6ee   Ming Lei   blk-mq: issue dir...
1969
1970
1971
1972
  		struct request *rq = list_first_entry(list, struct request,
  				queuelist);
  
  		list_del_init(&rq->queuelist);
fd9c40f64   Bart Van Assche   block: Revert v5....
1973
1974
1975
1976
  		ret = blk_mq_request_issue_directly(rq, list_empty(list));
  		if (ret != BLK_STS_OK) {
  			if (ret == BLK_STS_RESOURCE ||
  					ret == BLK_STS_DEV_RESOURCE) {
01e99aeca   Ming Lei   blk-mq: insert pa...
1977
  				blk_mq_request_bypass_insert(rq, false,
c616cbee9   Jens Axboe   blk-mq: punt fail...
1978
  							list_empty(list));
fd9c40f64   Bart Van Assche   block: Revert v5....
1979
1980
1981
  				break;
  			}
  			blk_mq_end_request(rq, ret);
632bfb632   yangerkun   blk-mq: call comm...
1982
  			errors++;
536167d47   Keith Busch   blk-mq: don't com...
1983
1984
  		} else
  			queued++;
6ce3dd6ee   Ming Lei   blk-mq: issue dir...
1985
  	}
d666ba98f   Jens Axboe   blk-mq: add mq_op...
1986
1987
1988
1989
1990
1991
  
  	/*
  	 * If we didn't flush the entire list, we could have told
  	 * the driver there was more coming, but that turned out to
  	 * be a lie.
  	 */
632bfb632   yangerkun   blk-mq: call comm...
1992
1993
  	if ((!list_empty(list) || errors) &&
  	     hctx->queue->mq_ops->commit_rqs && queued)
d666ba98f   Jens Axboe   blk-mq: add mq_op...
1994
  		hctx->queue->mq_ops->commit_rqs(hctx);
6ce3dd6ee   Ming Lei   blk-mq: issue dir...
1995
  }
ce5b009cf   Jens Axboe   block: improve lo...
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
  static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
  {
  	list_add_tail(&rq->queuelist, &plug->mq_list);
  	plug->rq_count++;
  	if (!plug->multiple_queues && !list_is_singular(&plug->mq_list)) {
  		struct request *tmp;
  
  		tmp = list_first_entry(&plug->mq_list, struct request,
  						queuelist);
  		if (tmp->q != rq->q)
  			plug->multiple_queues = true;
  	}
  }
105663f73   André Almeida   blk-mq: Document ...
2009
  /**
c62b37d96   Christoph Hellwig   block: move ->mak...
2010
   * blk_mq_submit_bio - Create and send a request to block device.
105663f73   André Almeida   blk-mq: Document ...
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
   * @bio: Bio pointer.
   *
   * Builds up a request structure from @q and @bio and send to the device. The
   * request may not be queued directly to hardware if:
   * * This request can be merged with another one
   * * We want to place request at plug queue for possible future merging
   * * There is an IO scheduler active at this queue
   *
   * It will not queue the request if there is an error with the bio, or at the
   * request creation.
   *
   * Returns: Request queue cookie.
   */
c62b37d96   Christoph Hellwig   block: move ->mak...
2024
  blk_qc_t blk_mq_submit_bio(struct bio *bio)
07068d5b8   Jens Axboe   blk-mq: split mak...
2025
  {
c62b37d96   Christoph Hellwig   block: move ->mak...
2026
  	struct request_queue *q = bio->bi_disk->queue;
ef295ecf0   Christoph Hellwig   block: better op ...
2027
  	const int is_sync = op_is_sync(bio->bi_opf);
f73f44eb0   Christoph Hellwig   block: add a op_i...
2028
  	const int is_flush_fua = op_is_flush(bio->bi_opf);
e6e7abffe   Christoph Hellwig   blk-mq: simplify ...
2029
2030
2031
  	struct blk_mq_alloc_data data = {
  		.q		= q,
  	};
07068d5b8   Jens Axboe   blk-mq: split mak...
2032
  	struct request *rq;
f984df1f0   Shaohua Li   blk-mq: do limite...
2033
  	struct blk_plug *plug;
5b3f341f0   Shaohua Li   blk-mq: make plug...
2034
  	struct request *same_queue_rq = NULL;
14ccb66b3   Christoph Hellwig   block: remove the...
2035
  	unsigned int nr_segs;
7b371636f   Jens Axboe   blk-mq: return ta...
2036
  	blk_qc_t cookie;
a892c8d52   Satya Tangirala   block: Inline enc...
2037
  	blk_status_t ret;
07068d5b8   Jens Axboe   blk-mq: split mak...
2038
2039
  
  	blk_queue_bounce(q, &bio);
f695ca388   Christoph Hellwig   block: remove the...
2040
  	__blk_queue_split(&bio, &nr_segs);
f36ea50ca   Wen Xiong   blk-mq: NVMe 512B...
2041

e23947bd7   Dmitry Monakhov   bio-integrity: fo...
2042
  	if (!bio_integrity_prep(bio))
ac7c5675f   Christoph Hellwig   blk-mq: allow blk...
2043
  		goto queue_exit;
07068d5b8   Jens Axboe   blk-mq: split mak...
2044

87c279e61   Omar Sandoval   blk-mq: really fi...
2045
  	if (!is_flush_fua && !blk_queue_nomerges(q) &&
14ccb66b3   Christoph Hellwig   block: remove the...
2046
  	    blk_attempt_plug_merge(q, bio, nr_segs, &same_queue_rq))
ac7c5675f   Christoph Hellwig   blk-mq: allow blk...
2047
  		goto queue_exit;
f984df1f0   Shaohua Li   blk-mq: do limite...
2048

14ccb66b3   Christoph Hellwig   block: remove the...
2049
  	if (blk_mq_sched_bio_merge(q, bio, nr_segs))
ac7c5675f   Christoph Hellwig   blk-mq: allow blk...
2050
  		goto queue_exit;
bd166ef18   Jens Axboe   blk-mq-sched: add...
2051

d53375608   Christoph Hellwig   block: remove the...
2052
  	rq_qos_throttle(q, bio);
87760e5ee   Jens Axboe   block: hook up wr...
2053

7809167da   Ming Lei   block: don't lose...
2054
  	data.cmd_flags = bio->bi_opf;
e6e7abffe   Christoph Hellwig   blk-mq: simplify ...
2055
  	rq = __blk_mq_alloc_request(&data);
87760e5ee   Jens Axboe   block: hook up wr...
2056
  	if (unlikely(!rq)) {
c1c80384c   Josef Bacik   block: remove ext...
2057
  		rq_qos_cleanup(q, bio);
7b6620d7d   Jens Axboe   block: remove REQ...
2058
  		if (bio->bi_opf & REQ_NOWAIT)
03a07c92a   Goldwyn Rodrigues   block: return on ...
2059
  			bio_wouldblock_error(bio);
ac7c5675f   Christoph Hellwig   blk-mq: allow blk...
2060
  		goto queue_exit;
87760e5ee   Jens Axboe   block: hook up wr...
2061
  	}
d6f1dda27   Xiaoguang Wang   blk-mq: place tra...
2062
  	trace_block_getrq(q, bio, bio->bi_opf);
c1c80384c   Josef Bacik   block: remove ext...
2063
  	rq_qos_track(q, rq, bio);
07068d5b8   Jens Axboe   blk-mq: split mak...
2064

fd2d33267   Jens Axboe   blk-mq: add suppo...
2065
  	cookie = request_to_qc_t(data.hctx, rq);
07068d5b8   Jens Axboe   blk-mq: split mak...
2066

970d168de   Bart Van Assche   blk-mq: simplify ...
2067
  	blk_mq_bio_to_request(rq, bio, nr_segs);
a892c8d52   Satya Tangirala   block: Inline enc...
2068
2069
2070
2071
2072
2073
2074
  	ret = blk_crypto_init_request(rq);
  	if (ret != BLK_STS_OK) {
  		bio->bi_status = ret;
  		bio_endio(bio);
  		blk_mq_free_request(rq);
  		return BLK_QC_T_NONE;
  	}
b49773e7b   Damien Le Moal   block: Disable wr...
2075
  	plug = blk_mq_plug(q, bio);
07068d5b8   Jens Axboe   blk-mq: split mak...
2076
  	if (unlikely(is_flush_fua)) {
105663f73   André Almeida   blk-mq: Document ...
2077
  		/* Bypass scheduler for flush requests */
923218f61   Ming Lei   blk-mq: don't all...
2078
2079
  		blk_insert_flush(rq);
  		blk_mq_run_hw_queue(data.hctx, true);
3154df262   Ming Lei   blk-mq: apply nor...
2080
2081
  	} else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs ||
  				!blk_queue_nonrot(q))) {
b2c5d16b7   Jens Axboe   blk-mq: use plug ...
2082
2083
2084
  		/*
  		 * Use plugging if we have a ->commit_rqs() hook as well, as
  		 * we know the driver uses bd->last in a smart fashion.
3154df262   Ming Lei   blk-mq: apply nor...
2085
2086
2087
  		 *
  		 * Use normal plugging if this disk is slow HDD, as sequential
  		 * IO may benefit a lot from plug merging.
b2c5d16b7   Jens Axboe   blk-mq: use plug ...
2088
  		 */
5f0ed774e   Jens Axboe   block: sum reques...
2089
  		unsigned int request_count = plug->rq_count;
600271d90   Shaohua Li   blk-mq: immediate...
2090
  		struct request *last = NULL;
676d06077   Ming Lei   blk-mq: fix for t...
2091
  		if (!request_count)
e6c4438ba   Jeff Moyer   blk-mq: fix plugg...
2092
  			trace_block_plug(q);
600271d90   Shaohua Li   blk-mq: immediate...
2093
2094
  		else
  			last = list_entry_rq(plug->mq_list.prev);
b094f89ca   Jens Axboe   blk-mq: fix calli...
2095

600271d90   Shaohua Li   blk-mq: immediate...
2096
2097
  		if (request_count >= BLK_MAX_REQUEST_COUNT || (last &&
  		    blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
e6c4438ba   Jeff Moyer   blk-mq: fix plugg...
2098
2099
  			blk_flush_plug_list(plug, false);
  			trace_block_plug(q);
320ae51fe   Jens Axboe   blk-mq: new multi...
2100
  		}
b094f89ca   Jens Axboe   blk-mq: fix calli...
2101

ce5b009cf   Jens Axboe   block: improve lo...
2102
  		blk_add_rq_to_plug(plug, rq);
a12de1d42   Ming Lei   blk-mq: honor IO ...
2103
  	} else if (q->elevator) {
105663f73   André Almeida   blk-mq: Document ...
2104
  		/* Insert the request at the IO scheduler queue */
a12de1d42   Ming Lei   blk-mq: honor IO ...
2105
  		blk_mq_sched_insert_request(rq, false, true, true);
2299722c4   Christoph Hellwig   blk-mq: split the...
2106
  	} else if (plug && !blk_queue_nomerges(q)) {
07068d5b8   Jens Axboe   blk-mq: split mak...
2107
  		/*
6a83e74d2   Bart Van Assche   blk-mq: Introduce...
2108
  		 * We do limited plugging. If the bio can be merged, do that.
f984df1f0   Shaohua Li   blk-mq: do limite...
2109
2110
  		 * Otherwise the existing request in the plug list will be
  		 * issued. So the plug list will have one request at most
2299722c4   Christoph Hellwig   blk-mq: split the...
2111
2112
  		 * The plug list might get flushed before this. If that happens,
  		 * the plug list is empty, and same_queue_rq is invalid.
07068d5b8   Jens Axboe   blk-mq: split mak...
2113
  		 */
2299722c4   Christoph Hellwig   blk-mq: split the...
2114
2115
  		if (list_empty(&plug->mq_list))
  			same_queue_rq = NULL;
4711b5731   Jens Axboe   blk-mq: fix failu...
2116
  		if (same_queue_rq) {
2299722c4   Christoph Hellwig   blk-mq: split the...
2117
  			list_del_init(&same_queue_rq->queuelist);
4711b5731   Jens Axboe   blk-mq: fix failu...
2118
2119
  			plug->rq_count--;
  		}
ce5b009cf   Jens Axboe   block: improve lo...
2120
  		blk_add_rq_to_plug(plug, rq);
ff3b74b8e   Yufen Yu   blk-mq: add trace...
2121
  		trace_block_plug(q);
2299722c4   Christoph Hellwig   blk-mq: split the...
2122

dad7a3be4   Ming Lei   blk-mq: pass corr...
2123
  		if (same_queue_rq) {
ea4f995ee   Jens Axboe   blk-mq: cache req...
2124
  			data.hctx = same_queue_rq->mq_hctx;
ff3b74b8e   Yufen Yu   blk-mq: add trace...
2125
  			trace_block_unplug(q, 1, true);
2299722c4   Christoph Hellwig   blk-mq: split the...
2126
  			blk_mq_try_issue_directly(data.hctx, same_queue_rq,
fd9c40f64   Bart Van Assche   block: Revert v5....
2127
  					&cookie);
dad7a3be4   Ming Lei   blk-mq: pass corr...
2128
  		}
a12de1d42   Ming Lei   blk-mq: honor IO ...
2129
2130
  	} else if ((q->nr_hw_queues > 1 && is_sync) ||
  			!data.hctx->dispatch_busy) {
105663f73   André Almeida   blk-mq: Document ...
2131
2132
2133
2134
  		/*
  		 * There is no scheduler and we can try to send directly
  		 * to the hardware.
  		 */
fd9c40f64   Bart Van Assche   block: Revert v5....
2135
  		blk_mq_try_issue_directly(data.hctx, rq, &cookie);
ab42f35d9   Ming Lei   blk-mq: merge bio...
2136
  	} else {
105663f73   André Almeida   blk-mq: Document ...
2137
  		/* Default case. */
8fa9f5564   huhai   blk-mq: remove re...
2138
  		blk_mq_sched_insert_request(rq, false, true, true);
ab42f35d9   Ming Lei   blk-mq: merge bio...
2139
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
2140

7b371636f   Jens Axboe   blk-mq: return ta...
2141
  	return cookie;
ac7c5675f   Christoph Hellwig   blk-mq: allow blk...
2142
2143
2144
  queue_exit:
  	blk_queue_exit(q);
  	return BLK_QC_T_NONE;
320ae51fe   Jens Axboe   blk-mq: new multi...
2145
  }
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2146
2147
  void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
  		     unsigned int hctx_idx)
95363efde   Jens Axboe   blk-mq: allow blk...
2148
  {
e9b267d91   Christoph Hellwig   blk-mq: add ->ini...
2149
  	struct page *page;
320ae51fe   Jens Axboe   blk-mq: new multi...
2150

24d2f9030   Christoph Hellwig   blk-mq: split out...
2151
  	if (tags->rqs && set->ops->exit_request) {
e9b267d91   Christoph Hellwig   blk-mq: add ->ini...
2152
  		int i;
320ae51fe   Jens Axboe   blk-mq: new multi...
2153

24d2f9030   Christoph Hellwig   blk-mq: split out...
2154
  		for (i = 0; i < tags->nr_tags; i++) {
2af8cbe30   Jens Axboe   blk-mq: split tag...
2155
2156
2157
  			struct request *rq = tags->static_rqs[i];
  
  			if (!rq)
e9b267d91   Christoph Hellwig   blk-mq: add ->ini...
2158
  				continue;
d6296d39e   Christoph Hellwig   blk-mq: update ->...
2159
  			set->ops->exit_request(set, rq, hctx_idx);
2af8cbe30   Jens Axboe   blk-mq: split tag...
2160
  			tags->static_rqs[i] = NULL;
e9b267d91   Christoph Hellwig   blk-mq: add ->ini...
2161
  		}
320ae51fe   Jens Axboe   blk-mq: new multi...
2162
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
2163

24d2f9030   Christoph Hellwig   blk-mq: split out...
2164
2165
  	while (!list_empty(&tags->page_list)) {
  		page = list_first_entry(&tags->page_list, struct page, lru);
6753471c0   Dave Hansen   blk-mq: uses page...
2166
  		list_del_init(&page->lru);
f75782e4e   Catalin Marinas   block: kmemleak: ...
2167
2168
  		/*
  		 * Remove kmemleak object previously allocated in
273938bf7   Raul E Rangel   block: fix functi...
2169
  		 * blk_mq_alloc_rqs().
f75782e4e   Catalin Marinas   block: kmemleak: ...
2170
2171
  		 */
  		kmemleak_free(page_address(page));
320ae51fe   Jens Axboe   blk-mq: new multi...
2172
2173
  		__free_pages(page, page->private);
  	}
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2174
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
2175

1c0706a70   John Garry   blk-mq: Pass flag...
2176
  void blk_mq_free_rq_map(struct blk_mq_tags *tags, unsigned int flags)
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2177
  {
24d2f9030   Christoph Hellwig   blk-mq: split out...
2178
  	kfree(tags->rqs);
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2179
  	tags->rqs = NULL;
2af8cbe30   Jens Axboe   blk-mq: split tag...
2180
2181
  	kfree(tags->static_rqs);
  	tags->static_rqs = NULL;
320ae51fe   Jens Axboe   blk-mq: new multi...
2182

1c0706a70   John Garry   blk-mq: Pass flag...
2183
  	blk_mq_free_tags(tags, flags);
320ae51fe   Jens Axboe   blk-mq: new multi...
2184
  }
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2185
2186
2187
  struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
  					unsigned int hctx_idx,
  					unsigned int nr_tags,
1c0706a70   John Garry   blk-mq: Pass flag...
2188
2189
  					unsigned int reserved_tags,
  					unsigned int flags)
320ae51fe   Jens Axboe   blk-mq: new multi...
2190
  {
24d2f9030   Christoph Hellwig   blk-mq: split out...
2191
  	struct blk_mq_tags *tags;
59f082e46   Shaohua Li   blk-mq: allocate ...
2192
  	int node;
320ae51fe   Jens Axboe   blk-mq: new multi...
2193

7d76f8562   Dongli Zhang   blk-mq: use HCTX_...
2194
  	node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], hctx_idx);
59f082e46   Shaohua Li   blk-mq: allocate ...
2195
2196
  	if (node == NUMA_NO_NODE)
  		node = set->numa_node;
1c0706a70   John Garry   blk-mq: Pass flag...
2197
  	tags = blk_mq_init_tags(nr_tags, reserved_tags, node, flags);
24d2f9030   Christoph Hellwig   blk-mq: split out...
2198
2199
  	if (!tags)
  		return NULL;
320ae51fe   Jens Axboe   blk-mq: new multi...
2200

590b5b7d8   Kees Cook   treewide: kzalloc...
2201
  	tags->rqs = kcalloc_node(nr_tags, sizeof(struct request *),
36e1f3d10   Gabriel Krisman Bertazi   blk-mq: Avoid mem...
2202
  				 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
59f082e46   Shaohua Li   blk-mq: allocate ...
2203
  				 node);
24d2f9030   Christoph Hellwig   blk-mq: split out...
2204
  	if (!tags->rqs) {
1c0706a70   John Garry   blk-mq: Pass flag...
2205
  		blk_mq_free_tags(tags, flags);
24d2f9030   Christoph Hellwig   blk-mq: split out...
2206
2207
  		return NULL;
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
2208

590b5b7d8   Kees Cook   treewide: kzalloc...
2209
2210
2211
  	tags->static_rqs = kcalloc_node(nr_tags, sizeof(struct request *),
  					GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
  					node);
2af8cbe30   Jens Axboe   blk-mq: split tag...
2212
2213
  	if (!tags->static_rqs) {
  		kfree(tags->rqs);
1c0706a70   John Garry   blk-mq: Pass flag...
2214
  		blk_mq_free_tags(tags, flags);
2af8cbe30   Jens Axboe   blk-mq: split tag...
2215
2216
  		return NULL;
  	}
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2217
2218
2219
2220
2221
2222
2223
  	return tags;
  }
  
  static size_t order_to_size(unsigned int order)
  {
  	return (size_t)PAGE_SIZE << order;
  }
1d9bd5161   Tejun Heo   blk-mq: replace t...
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
  static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
  			       unsigned int hctx_idx, int node)
  {
  	int ret;
  
  	if (set->ops->init_request) {
  		ret = set->ops->init_request(set, rq, hctx_idx, node);
  		if (ret)
  			return ret;
  	}
12f5b9314   Keith Busch   blk-mq: Remove ge...
2234
  	WRITE_ONCE(rq->state, MQ_RQ_IDLE);
1d9bd5161   Tejun Heo   blk-mq: replace t...
2235
2236
  	return 0;
  }
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2237
2238
2239
2240
2241
  int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
  		     unsigned int hctx_idx, unsigned int depth)
  {
  	unsigned int i, j, entries_per_page, max_order = 4;
  	size_t rq_size, left;
59f082e46   Shaohua Li   blk-mq: allocate ...
2242
  	int node;
7d76f8562   Dongli Zhang   blk-mq: use HCTX_...
2243
  	node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], hctx_idx);
59f082e46   Shaohua Li   blk-mq: allocate ...
2244
2245
  	if (node == NUMA_NO_NODE)
  		node = set->numa_node;
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2246
2247
  
  	INIT_LIST_HEAD(&tags->page_list);
320ae51fe   Jens Axboe   blk-mq: new multi...
2248
2249
2250
2251
  	/*
  	 * rq_size is the size of the request plus driver payload, rounded
  	 * to the cacheline size
  	 */
24d2f9030   Christoph Hellwig   blk-mq: split out...
2252
  	rq_size = round_up(sizeof(struct request) + set->cmd_size,
320ae51fe   Jens Axboe   blk-mq: new multi...
2253
  				cache_line_size());
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2254
  	left = rq_size * depth;
320ae51fe   Jens Axboe   blk-mq: new multi...
2255

cc71a6f43   Jens Axboe   blk-mq: abstract ...
2256
  	for (i = 0; i < depth; ) {
320ae51fe   Jens Axboe   blk-mq: new multi...
2257
2258
2259
2260
  		int this_order = max_order;
  		struct page *page;
  		int to_do;
  		void *p;
b3a834b15   Bartlomiej Zolnierkiewicz   blk-mq: fix undef...
2261
  		while (this_order && left < order_to_size(this_order - 1))
320ae51fe   Jens Axboe   blk-mq: new multi...
2262
2263
2264
  			this_order--;
  
  		do {
59f082e46   Shaohua Li   blk-mq: allocate ...
2265
  			page = alloc_pages_node(node,
36e1f3d10   Gabriel Krisman Bertazi   blk-mq: Avoid mem...
2266
  				GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
a51644054   Jens Axboe   blk-mq: scale dep...
2267
  				this_order);
320ae51fe   Jens Axboe   blk-mq: new multi...
2268
2269
2270
2271
2272
2273
2274
2275
2276
  			if (page)
  				break;
  			if (!this_order--)
  				break;
  			if (order_to_size(this_order) < rq_size)
  				break;
  		} while (1);
  
  		if (!page)
24d2f9030   Christoph Hellwig   blk-mq: split out...
2277
  			goto fail;
320ae51fe   Jens Axboe   blk-mq: new multi...
2278
2279
  
  		page->private = this_order;
24d2f9030   Christoph Hellwig   blk-mq: split out...
2280
  		list_add_tail(&page->lru, &tags->page_list);
320ae51fe   Jens Axboe   blk-mq: new multi...
2281
2282
  
  		p = page_address(page);
f75782e4e   Catalin Marinas   block: kmemleak: ...
2283
2284
2285
2286
  		/*
  		 * Allow kmemleak to scan these pages as they contain pointers
  		 * to additional allocations like via ops->init_request().
  		 */
36e1f3d10   Gabriel Krisman Bertazi   blk-mq: Avoid mem...
2287
  		kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO);
320ae51fe   Jens Axboe   blk-mq: new multi...
2288
  		entries_per_page = order_to_size(this_order) / rq_size;
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2289
  		to_do = min(entries_per_page, depth - i);
320ae51fe   Jens Axboe   blk-mq: new multi...
2290
2291
  		left -= to_do * rq_size;
  		for (j = 0; j < to_do; j++) {
2af8cbe30   Jens Axboe   blk-mq: split tag...
2292
2293
2294
  			struct request *rq = p;
  
  			tags->static_rqs[i] = rq;
1d9bd5161   Tejun Heo   blk-mq: replace t...
2295
2296
2297
  			if (blk_mq_init_request(set, rq, hctx_idx, node)) {
  				tags->static_rqs[i] = NULL;
  				goto fail;
e9b267d91   Christoph Hellwig   blk-mq: add ->ini...
2298
  			}
320ae51fe   Jens Axboe   blk-mq: new multi...
2299
2300
2301
2302
  			p += rq_size;
  			i++;
  		}
  	}
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2303
  	return 0;
320ae51fe   Jens Axboe   blk-mq: new multi...
2304

24d2f9030   Christoph Hellwig   blk-mq: split out...
2305
  fail:
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2306
2307
  	blk_mq_free_rqs(set, tags, hctx_idx);
  	return -ENOMEM;
320ae51fe   Jens Axboe   blk-mq: new multi...
2308
  }
bf0beec06   Ming Lei   blk-mq: drain I/O...
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
  struct rq_iter_data {
  	struct blk_mq_hw_ctx *hctx;
  	bool has_rq;
  };
  
  static bool blk_mq_has_request(struct request *rq, void *data, bool reserved)
  {
  	struct rq_iter_data *iter_data = data;
  
  	if (rq->mq_hctx != iter_data->hctx)
  		return true;
  	iter_data->has_rq = true;
  	return false;
  }
  
  static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx)
  {
  	struct blk_mq_tags *tags = hctx->sched_tags ?
  			hctx->sched_tags : hctx->tags;
  	struct rq_iter_data data = {
  		.hctx	= hctx,
  	};
  
  	blk_mq_all_tag_iter(tags, blk_mq_has_request, &data);
  	return data.has_rq;
  }
  
  static inline bool blk_mq_last_cpu_in_hctx(unsigned int cpu,
  		struct blk_mq_hw_ctx *hctx)
  {
  	if (cpumask_next_and(-1, hctx->cpumask, cpu_online_mask) != cpu)
  		return false;
  	if (cpumask_next_and(cpu, hctx->cpumask, cpu_online_mask) < nr_cpu_ids)
  		return false;
  	return true;
  }
  
  static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node)
  {
  	struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node,
  			struct blk_mq_hw_ctx, cpuhp_online);
  
  	if (!cpumask_test_cpu(cpu, hctx->cpumask) ||
  	    !blk_mq_last_cpu_in_hctx(cpu, hctx))
  		return 0;
  
  	/*
  	 * Prevent new request from being allocated on the current hctx.
  	 *
  	 * The smp_mb__after_atomic() Pairs with the implied barrier in
  	 * test_and_set_bit_lock in sbitmap_get().  Ensures the inactive flag is
  	 * seen once we return from the tag allocator.
  	 */
  	set_bit(BLK_MQ_S_INACTIVE, &hctx->state);
  	smp_mb__after_atomic();
  
  	/*
  	 * Try to grab a reference to the queue and wait for any outstanding
  	 * requests.  If we could not grab a reference the queue has been
  	 * frozen and there are no requests.
  	 */
  	if (percpu_ref_tryget(&hctx->queue->q_usage_counter)) {
  		while (blk_mq_hctx_has_requests(hctx))
  			msleep(5);
  		percpu_ref_put(&hctx->queue->q_usage_counter);
  	}
  
  	return 0;
  }
  
  static int blk_mq_hctx_notify_online(unsigned int cpu, struct hlist_node *node)
  {
  	struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node,
  			struct blk_mq_hw_ctx, cpuhp_online);
  
  	if (cpumask_test_cpu(cpu, hctx->cpumask))
  		clear_bit(BLK_MQ_S_INACTIVE, &hctx->state);
  	return 0;
  }
e57690fe0   Jens Axboe   blk-mq: don't ove...
2388
2389
2390
2391
2392
  /*
   * 'cpu' is going away. splice any existing rq_list entries from this
   * software queue to the hw queue dispatch list, and ensure that it
   * gets run.
   */
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
2393
  static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
484b4061e   Jens Axboe   blk-mq: save memo...
2394
  {
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
2395
  	struct blk_mq_hw_ctx *hctx;
484b4061e   Jens Axboe   blk-mq: save memo...
2396
2397
  	struct blk_mq_ctx *ctx;
  	LIST_HEAD(tmp);
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
2398
  	enum hctx_type type;
484b4061e   Jens Axboe   blk-mq: save memo...
2399

9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
2400
  	hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead);
bf0beec06   Ming Lei   blk-mq: drain I/O...
2401
2402
  	if (!cpumask_test_cpu(cpu, hctx->cpumask))
  		return 0;
e57690fe0   Jens Axboe   blk-mq: don't ove...
2403
  	ctx = __blk_mq_get_ctx(hctx->queue, cpu);
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
2404
  	type = hctx->type;
484b4061e   Jens Axboe   blk-mq: save memo...
2405
2406
  
  	spin_lock(&ctx->lock);
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
2407
2408
  	if (!list_empty(&ctx->rq_lists[type])) {
  		list_splice_init(&ctx->rq_lists[type], &tmp);
484b4061e   Jens Axboe   blk-mq: save memo...
2409
2410
2411
2412
2413
  		blk_mq_hctx_clear_pending(hctx, ctx);
  	}
  	spin_unlock(&ctx->lock);
  
  	if (list_empty(&tmp))
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
2414
  		return 0;
484b4061e   Jens Axboe   blk-mq: save memo...
2415

e57690fe0   Jens Axboe   blk-mq: don't ove...
2416
2417
2418
  	spin_lock(&hctx->lock);
  	list_splice_tail_init(&tmp, &hctx->dispatch);
  	spin_unlock(&hctx->lock);
484b4061e   Jens Axboe   blk-mq: save memo...
2419
2420
  
  	blk_mq_run_hw_queue(hctx, true);
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
2421
  	return 0;
484b4061e   Jens Axboe   blk-mq: save memo...
2422
  }
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
2423
  static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
484b4061e   Jens Axboe   blk-mq: save memo...
2424
  {
bf0beec06   Ming Lei   blk-mq: drain I/O...
2425
2426
2427
  	if (!(hctx->flags & BLK_MQ_F_STACKING))
  		cpuhp_state_remove_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
  						    &hctx->cpuhp_online);
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
2428
2429
  	cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD,
  					    &hctx->cpuhp_dead);
484b4061e   Jens Axboe   blk-mq: save memo...
2430
  }
c3b4afca7   Ming Lei   blk-mq: free hctx...
2431
  /* hctx->ctxs will be freed in queue's release handler */
08e98fc60   Ming Lei   blk-mq: handle fa...
2432
2433
2434
2435
  static void blk_mq_exit_hctx(struct request_queue *q,
  		struct blk_mq_tag_set *set,
  		struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
  {
8ab0b7dc7   Ming Lei   blk-mq: fix kerne...
2436
2437
  	if (blk_mq_hw_queue_mapped(hctx))
  		blk_mq_tag_idle(hctx);
08e98fc60   Ming Lei   blk-mq: handle fa...
2438

f70ced091   Ming Lei   blk-mq: support p...
2439
  	if (set->ops->exit_request)
d6296d39e   Christoph Hellwig   blk-mq: update ->...
2440
  		set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);
f70ced091   Ming Lei   blk-mq: support p...
2441

08e98fc60   Ming Lei   blk-mq: handle fa...
2442
2443
  	if (set->ops->exit_hctx)
  		set->ops->exit_hctx(hctx, hctx_idx);
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
2444
  	blk_mq_remove_cpuhp(hctx);
2f8f1336a   Ming Lei   blk-mq: always fr...
2445
2446
2447
2448
  
  	spin_lock(&q->unused_hctx_lock);
  	list_add(&hctx->hctx_list, &q->unused_hctx_list);
  	spin_unlock(&q->unused_hctx_lock);
08e98fc60   Ming Lei   blk-mq: handle fa...
2449
  }
624dbe475   Ming Lei   blk-mq: avoid cod...
2450
2451
2452
2453
2454
2455
2456
2457
2458
  static void blk_mq_exit_hw_queues(struct request_queue *q,
  		struct blk_mq_tag_set *set, int nr_queue)
  {
  	struct blk_mq_hw_ctx *hctx;
  	unsigned int i;
  
  	queue_for_each_hw_ctx(q, hctx, i) {
  		if (i == nr_queue)
  			break;
477e19ded   Jianchao Wang   blk-mq: adjust de...
2459
  		blk_mq_debugfs_unregister_hctx(hctx);
08e98fc60   Ming Lei   blk-mq: handle fa...
2460
  		blk_mq_exit_hctx(q, set, hctx, i);
624dbe475   Ming Lei   blk-mq: avoid cod...
2461
  	}
624dbe475   Ming Lei   blk-mq: avoid cod...
2462
  }
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
  static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
  {
  	int hw_ctx_size = sizeof(struct blk_mq_hw_ctx);
  
  	BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, srcu),
  			   __alignof__(struct blk_mq_hw_ctx)) !=
  		     sizeof(struct blk_mq_hw_ctx));
  
  	if (tag_set->flags & BLK_MQ_F_BLOCKING)
  		hw_ctx_size += sizeof(struct srcu_struct);
  
  	return hw_ctx_size;
  }
08e98fc60   Ming Lei   blk-mq: handle fa...
2476
2477
2478
  static int blk_mq_init_hctx(struct request_queue *q,
  		struct blk_mq_tag_set *set,
  		struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
320ae51fe   Jens Axboe   blk-mq: new multi...
2479
  {
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2480
  	hctx->queue_num = hctx_idx;
bf0beec06   Ming Lei   blk-mq: drain I/O...
2481
2482
2483
  	if (!(hctx->flags & BLK_MQ_F_STACKING))
  		cpuhp_state_add_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
  				&hctx->cpuhp_online);
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2484
2485
2486
2487
2488
2489
2490
  	cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
  
  	hctx->tags = set->tags[hctx_idx];
  
  	if (set->ops->init_hctx &&
  	    set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
  		goto unregister_cpu_notifier;
08e98fc60   Ming Lei   blk-mq: handle fa...
2491

7c6c5b7c9   Ming Lei   blk-mq: split blk...
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
  	if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx,
  				hctx->numa_node))
  		goto exit_hctx;
  	return 0;
  
   exit_hctx:
  	if (set->ops->exit_hctx)
  		set->ops->exit_hctx(hctx, hctx_idx);
   unregister_cpu_notifier:
  	blk_mq_remove_cpuhp(hctx);
  	return -1;
  }
  
  static struct blk_mq_hw_ctx *
  blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
  		int node)
  {
  	struct blk_mq_hw_ctx *hctx;
  	gfp_t gfp = GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY;
  
  	hctx = kzalloc_node(blk_mq_hw_ctx_size(set), gfp, node);
  	if (!hctx)
  		goto fail_alloc_hctx;
  
  	if (!zalloc_cpumask_var_node(&hctx->cpumask, gfp, node))
  		goto free_hctx;
  
  	atomic_set(&hctx->nr_active, 0);
b445547ec   Kashyap Desai   blk-mq, elevator:...
2520
  	atomic_set(&hctx->elevator_queued, 0);
08e98fc60   Ming Lei   blk-mq: handle fa...
2521
  	if (node == NUMA_NO_NODE)
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2522
2523
  		node = set->numa_node;
  	hctx->numa_node = node;
08e98fc60   Ming Lei   blk-mq: handle fa...
2524

9f9937379   Jens Axboe   blk-mq: unify hct...
2525
  	INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
08e98fc60   Ming Lei   blk-mq: handle fa...
2526
2527
2528
  	spin_lock_init(&hctx->lock);
  	INIT_LIST_HEAD(&hctx->dispatch);
  	hctx->queue = q;
51db1c37e   Ming Lei   blk-mq: Rename BL...
2529
  	hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED;
08e98fc60   Ming Lei   blk-mq: handle fa...
2530

2f8f1336a   Ming Lei   blk-mq: always fr...
2531
  	INIT_LIST_HEAD(&hctx->hctx_list);
320ae51fe   Jens Axboe   blk-mq: new multi...
2532
  	/*
08e98fc60   Ming Lei   blk-mq: handle fa...
2533
2534
  	 * Allocate space for all possible cpus to avoid allocation at
  	 * runtime
320ae51fe   Jens Axboe   blk-mq: new multi...
2535
  	 */
d904bfa79   Johannes Thumshirn   block/blk-mq.c: u...
2536
  	hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *),
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2537
  			gfp, node);
08e98fc60   Ming Lei   blk-mq: handle fa...
2538
  	if (!hctx->ctxs)
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2539
  		goto free_cpumask;
320ae51fe   Jens Axboe   blk-mq: new multi...
2540

5b202853f   Jianchao Wang   blk-mq: change gf...
2541
  	if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8),
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2542
  				gfp, node))
08e98fc60   Ming Lei   blk-mq: handle fa...
2543
  		goto free_ctxs;
08e98fc60   Ming Lei   blk-mq: handle fa...
2544
  	hctx->nr_ctx = 0;
320ae51fe   Jens Axboe   blk-mq: new multi...
2545

5815839b3   Ming Lei   blk-mq: introduce...
2546
  	spin_lock_init(&hctx->dispatch_wait_lock);
eb619fdb2   Jens Axboe   blk-mq: fix issue...
2547
2548
  	init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
  	INIT_LIST_HEAD(&hctx->dispatch_wait.entry);
754a15726   Guoqing Jiang   block: remove unn...
2549
  	hctx->fq = blk_alloc_flush_queue(hctx->numa_node, set->cmd_size, gfp);
f70ced091   Ming Lei   blk-mq: support p...
2550
  	if (!hctx->fq)
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2551
  		goto free_bitmap;
320ae51fe   Jens Axboe   blk-mq: new multi...
2552

6a83e74d2   Bart Van Assche   blk-mq: Introduce...
2553
  	if (hctx->flags & BLK_MQ_F_BLOCKING)
05707b64a   Tejun Heo   blk-mq: rename bl...
2554
  		init_srcu_struct(hctx->srcu);
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2555
  	blk_mq_hctx_kobj_init(hctx);
6a83e74d2   Bart Van Assche   blk-mq: Introduce...
2556

7c6c5b7c9   Ming Lei   blk-mq: split blk...
2557
  	return hctx;
320ae51fe   Jens Axboe   blk-mq: new multi...
2558

08e98fc60   Ming Lei   blk-mq: handle fa...
2559
   free_bitmap:
88459642c   Omar Sandoval   blk-mq: abstract ...
2560
  	sbitmap_free(&hctx->ctx_map);
08e98fc60   Ming Lei   blk-mq: handle fa...
2561
2562
   free_ctxs:
  	kfree(hctx->ctxs);
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2563
2564
2565
2566
2567
2568
   free_cpumask:
  	free_cpumask_var(hctx->cpumask);
   free_hctx:
  	kfree(hctx);
   fail_alloc_hctx:
  	return NULL;
08e98fc60   Ming Lei   blk-mq: handle fa...
2569
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
2570

320ae51fe   Jens Axboe   blk-mq: new multi...
2571
2572
2573
  static void blk_mq_init_cpu_queues(struct request_queue *q,
  				   unsigned int nr_hw_queues)
  {
b3c661b15   Jens Axboe   blk-mq: support m...
2574
2575
  	struct blk_mq_tag_set *set = q->tag_set;
  	unsigned int i, j;
320ae51fe   Jens Axboe   blk-mq: new multi...
2576
2577
2578
2579
  
  	for_each_possible_cpu(i) {
  		struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
  		struct blk_mq_hw_ctx *hctx;
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
2580
  		int k;
320ae51fe   Jens Axboe   blk-mq: new multi...
2581

320ae51fe   Jens Axboe   blk-mq: new multi...
2582
2583
  		__ctx->cpu = i;
  		spin_lock_init(&__ctx->lock);
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
2584
2585
  		for (k = HCTX_TYPE_DEFAULT; k < HCTX_MAX_TYPES; k++)
  			INIT_LIST_HEAD(&__ctx->rq_lists[k]);
320ae51fe   Jens Axboe   blk-mq: new multi...
2586
  		__ctx->queue = q;
320ae51fe   Jens Axboe   blk-mq: new multi...
2587
2588
2589
2590
  		/*
  		 * Set local node, IFF we have more than one hw queue. If
  		 * not, we remain on the home node of the device
  		 */
b3c661b15   Jens Axboe   blk-mq: support m...
2591
2592
2593
  		for (j = 0; j < set->nr_maps; j++) {
  			hctx = blk_mq_map_queue_type(q, j, i);
  			if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
576e85c5e   Xianting Tian   blk-mq: remove th...
2594
  				hctx->numa_node = cpu_to_node(i);
b3c661b15   Jens Axboe   blk-mq: support m...
2595
  		}
320ae51fe   Jens Axboe   blk-mq: new multi...
2596
2597
  	}
  }
03b63b029   Weiping Zhang   block: rename __b...
2598
2599
  static bool __blk_mq_alloc_map_and_request(struct blk_mq_tag_set *set,
  					int hctx_idx)
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2600
  {
1c0706a70   John Garry   blk-mq: Pass flag...
2601
  	unsigned int flags = set->flags;
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2602
2603
2604
  	int ret = 0;
  
  	set->tags[hctx_idx] = blk_mq_alloc_rq_map(set, hctx_idx,
1c0706a70   John Garry   blk-mq: Pass flag...
2605
  					set->queue_depth, set->reserved_tags, flags);
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2606
2607
2608
2609
2610
2611
2612
  	if (!set->tags[hctx_idx])
  		return false;
  
  	ret = blk_mq_alloc_rqs(set, set->tags[hctx_idx], hctx_idx,
  				set->queue_depth);
  	if (!ret)
  		return true;
1c0706a70   John Garry   blk-mq: Pass flag...
2613
  	blk_mq_free_rq_map(set->tags[hctx_idx], flags);
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2614
2615
2616
2617
2618
2619
2620
  	set->tags[hctx_idx] = NULL;
  	return false;
  }
  
  static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set,
  					 unsigned int hctx_idx)
  {
1c0706a70   John Garry   blk-mq: Pass flag...
2621
  	unsigned int flags = set->flags;
4e6db0f21   Dan Carpenter   blk-mq: Add a NUL...
2622
  	if (set->tags && set->tags[hctx_idx]) {
bd166ef18   Jens Axboe   blk-mq-sched: add...
2623
  		blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx);
1c0706a70   John Garry   blk-mq: Pass flag...
2624
  		blk_mq_free_rq_map(set->tags[hctx_idx], flags);
bd166ef18   Jens Axboe   blk-mq-sched: add...
2625
2626
  		set->tags[hctx_idx] = NULL;
  	}
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2627
  }
4b855ad37   Christoph Hellwig   blk-mq: Create hc...
2628
  static void blk_mq_map_swqueue(struct request_queue *q)
320ae51fe   Jens Axboe   blk-mq: new multi...
2629
  {
b3c661b15   Jens Axboe   blk-mq: support m...
2630
  	unsigned int i, j, hctx_idx;
320ae51fe   Jens Axboe   blk-mq: new multi...
2631
2632
  	struct blk_mq_hw_ctx *hctx;
  	struct blk_mq_ctx *ctx;
2a34c0872   Ming Lei   blk-mq: fix CPU h...
2633
  	struct blk_mq_tag_set *set = q->tag_set;
320ae51fe   Jens Axboe   blk-mq: new multi...
2634
2635
  
  	queue_for_each_hw_ctx(q, hctx, i) {
e4043dcf3   Jens Axboe   blk-mq: ensure th...
2636
  		cpumask_clear(hctx->cpumask);
320ae51fe   Jens Axboe   blk-mq: new multi...
2637
  		hctx->nr_ctx = 0;
d416c92c5   huhai   blk-mq: clear hct...
2638
  		hctx->dispatch_from = NULL;
320ae51fe   Jens Axboe   blk-mq: new multi...
2639
2640
2641
  	}
  
  	/*
4b855ad37   Christoph Hellwig   blk-mq: Create hc...
2642
  	 * Map software to hardware queues.
4412efecf   Ming Lei   Revert "blk-mq: r...
2643
2644
  	 *
  	 * If the cpu isn't present, the cpu is mapped to first hctx.
320ae51fe   Jens Axboe   blk-mq: new multi...
2645
  	 */
20e4d8139   Christoph Hellwig   blk-mq: simplify ...
2646
  	for_each_possible_cpu(i) {
4412efecf   Ming Lei   Revert "blk-mq: r...
2647

897bb0c7f   Thomas Gleixner   blk-mq: Use prope...
2648
  		ctx = per_cpu_ptr(q->queue_ctx, i);
b3c661b15   Jens Axboe   blk-mq: support m...
2649
  		for (j = 0; j < set->nr_maps; j++) {
bb94aea14   Jianchao Wang   blk-mq: save defa...
2650
2651
2652
  			if (!set->map[j].nr_queues) {
  				ctx->hctxs[j] = blk_mq_map_queue_type(q,
  						HCTX_TYPE_DEFAULT, i);
e5edd5f29   Ming Lei   blk-mq: skip zero...
2653
  				continue;
bb94aea14   Jianchao Wang   blk-mq: save defa...
2654
  			}
fd689871b   Ming Lei   block: alloc map ...
2655
2656
2657
  			hctx_idx = set->map[j].mq_map[i];
  			/* unmapped hw queue can be remapped after CPU topo changed */
  			if (!set->tags[hctx_idx] &&
03b63b029   Weiping Zhang   block: rename __b...
2658
  			    !__blk_mq_alloc_map_and_request(set, hctx_idx)) {
fd689871b   Ming Lei   block: alloc map ...
2659
2660
2661
2662
2663
2664
2665
2666
  				/*
  				 * If tags initialization fail for some hctx,
  				 * that hctx won't be brought online.  In this
  				 * case, remap the current ctx to hctx[0] which
  				 * is guaranteed to always have tags allocated
  				 */
  				set->map[j].mq_map[i] = 0;
  			}
e5edd5f29   Ming Lei   blk-mq: skip zero...
2667

b3c661b15   Jens Axboe   blk-mq: support m...
2668
  			hctx = blk_mq_map_queue_type(q, j, i);
8ccdf4a37   Jianchao Wang   blk-mq: save queu...
2669
  			ctx->hctxs[j] = hctx;
b3c661b15   Jens Axboe   blk-mq: support m...
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
  			/*
  			 * If the CPU is already set in the mask, then we've
  			 * mapped this one already. This can happen if
  			 * devices share queues across queue maps.
  			 */
  			if (cpumask_test_cpu(i, hctx->cpumask))
  				continue;
  
  			cpumask_set_cpu(i, hctx->cpumask);
  			hctx->type = j;
  			ctx->index_hw[hctx->type] = hctx->nr_ctx;
  			hctx->ctxs[hctx->nr_ctx++] = ctx;
  
  			/*
  			 * If the nr_ctx type overflows, we have exceeded the
  			 * amount of sw queues we can support.
  			 */
  			BUG_ON(!hctx->nr_ctx);
  		}
bb94aea14   Jianchao Wang   blk-mq: save defa...
2689
2690
2691
2692
  
  		for (; j < HCTX_MAX_TYPES; j++)
  			ctx->hctxs[j] = blk_mq_map_queue_type(q,
  					HCTX_TYPE_DEFAULT, i);
320ae51fe   Jens Axboe   blk-mq: new multi...
2693
  	}
506e931f9   Jens Axboe   blk-mq: add basic...
2694
2695
  
  	queue_for_each_hw_ctx(q, hctx, i) {
4412efecf   Ming Lei   Revert "blk-mq: r...
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
  		/*
  		 * If no software queues are mapped to this hardware queue,
  		 * disable it and free the request entries.
  		 */
  		if (!hctx->nr_ctx) {
  			/* Never unmap queue 0.  We need it as a
  			 * fallback in case of a new remap fails
  			 * allocation
  			 */
  			if (i && set->tags[i])
  				blk_mq_free_map_and_requests(set, i);
  
  			hctx->tags = NULL;
  			continue;
  		}
484b4061e   Jens Axboe   blk-mq: save memo...
2711

2a34c0872   Ming Lei   blk-mq: fix CPU h...
2712
2713
  		hctx->tags = set->tags[i];
  		WARN_ON(!hctx->tags);
484b4061e   Jens Axboe   blk-mq: save memo...
2714
  		/*
889fa31f0   Chong Yuan   blk-mq: reduce un...
2715
2716
2717
2718
  		 * Set the map size to the number of mapped software queues.
  		 * This is more accurate and more efficient than looping
  		 * over all possibly mapped software queues.
  		 */
88459642c   Omar Sandoval   blk-mq: abstract ...
2719
  		sbitmap_resize(&hctx->ctx_map, hctx->nr_ctx);
889fa31f0   Chong Yuan   blk-mq: reduce un...
2720
2721
  
  		/*
484b4061e   Jens Axboe   blk-mq: save memo...
2722
2723
  		 * Initialize batch roundrobin counts
  		 */
f82ddf192   Ming Lei   blk-mq: introduce...
2724
  		hctx->next_cpu = blk_mq_first_mapped_cpu(hctx);
506e931f9   Jens Axboe   blk-mq: add basic...
2725
2726
  		hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
2727
  }
8e8320c93   Jens Axboe   blk-mq: fix perfo...
2728
2729
2730
2731
  /*
   * Caller needs to ensure that we're either frozen/quiesced, or that
   * the queue isn't live yet.
   */
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2732
  static void queue_set_hctx_shared(struct request_queue *q, bool shared)
0d2602ca3   Jens Axboe   blk-mq: improve s...
2733
2734
  {
  	struct blk_mq_hw_ctx *hctx;
0d2602ca3   Jens Axboe   blk-mq: improve s...
2735
  	int i;
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2736
  	queue_for_each_hw_ctx(q, hctx, i) {
97889f9ac   Ming Lei   blk-mq: remove sy...
2737
  		if (shared)
51db1c37e   Ming Lei   blk-mq: Rename BL...
2738
  			hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED;
97889f9ac   Ming Lei   blk-mq: remove sy...
2739
  		else
51db1c37e   Ming Lei   blk-mq: Rename BL...
2740
  			hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED;
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2741
2742
  	}
  }
655ac3009   Hannes Reinecke   blk-mq: Rename bl...
2743
2744
  static void blk_mq_update_tag_set_shared(struct blk_mq_tag_set *set,
  					 bool shared)
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2745
2746
  {
  	struct request_queue *q;
0d2602ca3   Jens Axboe   blk-mq: improve s...
2747

705cda97e   Bart Van Assche   blk-mq: Make it s...
2748
  	lockdep_assert_held(&set->tag_list_lock);
0d2602ca3   Jens Axboe   blk-mq: improve s...
2749
2750
  	list_for_each_entry(q, &set->tag_list, tag_set_list) {
  		blk_mq_freeze_queue(q);
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2751
  		queue_set_hctx_shared(q, shared);
0d2602ca3   Jens Axboe   blk-mq: improve s...
2752
2753
2754
2755
2756
2757
2758
  		blk_mq_unfreeze_queue(q);
  	}
  }
  
  static void blk_mq_del_queue_tag_set(struct request_queue *q)
  {
  	struct blk_mq_tag_set *set = q->tag_set;
0d2602ca3   Jens Axboe   blk-mq: improve s...
2759
  	mutex_lock(&set->tag_list_lock);
08c875cbf   Daniel Wagner   block: Use non _r...
2760
  	list_del(&q->tag_set_list);
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2761
2762
  	if (list_is_singular(&set->tag_list)) {
  		/* just transitioned to unshared */
51db1c37e   Ming Lei   blk-mq: Rename BL...
2763
  		set->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED;
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2764
  		/* update existing queue */
655ac3009   Hannes Reinecke   blk-mq: Rename bl...
2765
  		blk_mq_update_tag_set_shared(set, false);
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2766
  	}
0d2602ca3   Jens Axboe   blk-mq: improve s...
2767
  	mutex_unlock(&set->tag_list_lock);
a347c7ad8   Roman Pen   blk-mq: reinit q-...
2768
  	INIT_LIST_HEAD(&q->tag_set_list);
0d2602ca3   Jens Axboe   blk-mq: improve s...
2769
2770
2771
2772
2773
  }
  
  static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
  				     struct request_queue *q)
  {
0d2602ca3   Jens Axboe   blk-mq: improve s...
2774
  	mutex_lock(&set->tag_list_lock);
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2775

ff821d271   Jens Axboe   blk-mq: fixup som...
2776
2777
2778
2779
  	/*
  	 * Check to see if we're transitioning to shared (from 1 to 2 queues).
  	 */
  	if (!list_empty(&set->tag_list) &&
51db1c37e   Ming Lei   blk-mq: Rename BL...
2780
2781
  	    !(set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) {
  		set->flags |= BLK_MQ_F_TAG_QUEUE_SHARED;
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2782
  		/* update existing queue */
655ac3009   Hannes Reinecke   blk-mq: Rename bl...
2783
  		blk_mq_update_tag_set_shared(set, true);
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2784
  	}
51db1c37e   Ming Lei   blk-mq: Rename BL...
2785
  	if (set->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2786
  		queue_set_hctx_shared(q, true);
08c875cbf   Daniel Wagner   block: Use non _r...
2787
  	list_add_tail(&q->tag_set_list, &set->tag_list);
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2788

0d2602ca3   Jens Axboe   blk-mq: improve s...
2789
2790
  	mutex_unlock(&set->tag_list_lock);
  }
1db4909e7   Ming Lei   blk-mq: not embed...
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
  /* All allocations will be freed in release handler of q->mq_kobj */
  static int blk_mq_alloc_ctxs(struct request_queue *q)
  {
  	struct blk_mq_ctxs *ctxs;
  	int cpu;
  
  	ctxs = kzalloc(sizeof(*ctxs), GFP_KERNEL);
  	if (!ctxs)
  		return -ENOMEM;
  
  	ctxs->queue_ctx = alloc_percpu(struct blk_mq_ctx);
  	if (!ctxs->queue_ctx)
  		goto fail;
  
  	for_each_possible_cpu(cpu) {
  		struct blk_mq_ctx *ctx = per_cpu_ptr(ctxs->queue_ctx, cpu);
  		ctx->ctxs = ctxs;
  	}
  
  	q->mq_kobj = &ctxs->kobj;
  	q->queue_ctx = ctxs->queue_ctx;
  
  	return 0;
   fail:
  	kfree(ctxs);
  	return -ENOMEM;
  }
e09aae7ed   Ming Lei   blk-mq: release m...
2818
2819
2820
2821
2822
2823
2824
2825
  /*
   * It is the actual release handler for mq, but we do it from
   * request queue's release handler for avoiding use-after-free
   * and headache because q->mq_kobj shouldn't have been introduced,
   * but we can't group ctx/kctx kobj without it.
   */
  void blk_mq_release(struct request_queue *q)
  {
2f8f1336a   Ming Lei   blk-mq: always fr...
2826
2827
  	struct blk_mq_hw_ctx *hctx, *next;
  	int i;
e09aae7ed   Ming Lei   blk-mq: release m...
2828

2f8f1336a   Ming Lei   blk-mq: always fr...
2829
2830
2831
2832
2833
2834
  	queue_for_each_hw_ctx(q, hctx, i)
  		WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list));
  
  	/* all hctx are in .unused_hctx_list now */
  	list_for_each_entry_safe(hctx, next, &q->unused_hctx_list, hctx_list) {
  		list_del_init(&hctx->hctx_list);
6c8b232ef   Ming Lei   blk-mq: make life...
2835
  		kobject_put(&hctx->kobj);
c3b4afca7   Ming Lei   blk-mq: free hctx...
2836
  	}
e09aae7ed   Ming Lei   blk-mq: release m...
2837
2838
  
  	kfree(q->queue_hw_ctx);
7ea5fe31c   Ming Lei   blk-mq: make life...
2839
2840
2841
2842
2843
  	/*
  	 * release .mq_kobj and sw queue's kobject now because
  	 * both share lifetime with request queue.
  	 */
  	blk_mq_sysfs_deinit(q);
e09aae7ed   Ming Lei   blk-mq: release m...
2844
  }
2f227bb99   Christoph Hellwig   block: add a blk_...
2845
2846
  struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
  		void *queuedata)
320ae51fe   Jens Axboe   blk-mq: new multi...
2847
  {
b62c21b71   Mike Snitzer   blk-mq: add blk_m...
2848
  	struct request_queue *uninit_q, *q;
c62b37d96   Christoph Hellwig   block: move ->mak...
2849
  	uninit_q = blk_alloc_queue(set->numa_node);
b62c21b71   Mike Snitzer   blk-mq: add blk_m...
2850
2851
  	if (!uninit_q)
  		return ERR_PTR(-ENOMEM);
2f227bb99   Christoph Hellwig   block: add a blk_...
2852
  	uninit_q->queuedata = queuedata;
b62c21b71   Mike Snitzer   blk-mq: add blk_m...
2853

737eb78e8   Damien Le Moal   block: Delay defa...
2854
2855
2856
2857
2858
  	/*
  	 * Initialize the queue without an elevator. device_add_disk() will do
  	 * the initialization.
  	 */
  	q = blk_mq_init_allocated_queue(set, uninit_q, false);
b62c21b71   Mike Snitzer   blk-mq: add blk_m...
2859
2860
2861
2862
2863
  	if (IS_ERR(q))
  		blk_cleanup_queue(uninit_q);
  
  	return q;
  }
2f227bb99   Christoph Hellwig   block: add a blk_...
2864
2865
2866
2867
2868
2869
  EXPORT_SYMBOL_GPL(blk_mq_init_queue_data);
  
  struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
  {
  	return blk_mq_init_queue_data(set, NULL);
  }
b62c21b71   Mike Snitzer   blk-mq: add blk_m...
2870
  EXPORT_SYMBOL(blk_mq_init_queue);
9316a9ed6   Jens Axboe   blk-mq: provide h...
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
  /*
   * Helper for setting up a queue with mq ops, given queue depth, and
   * the passed in mq ops flags.
   */
  struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set,
  					   const struct blk_mq_ops *ops,
  					   unsigned int queue_depth,
  					   unsigned int set_flags)
  {
  	struct request_queue *q;
  	int ret;
  
  	memset(set, 0, sizeof(*set));
  	set->ops = ops;
  	set->nr_hw_queues = 1;
b3c661b15   Jens Axboe   blk-mq: support m...
2886
  	set->nr_maps = 1;
9316a9ed6   Jens Axboe   blk-mq: provide h...
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
  	set->queue_depth = queue_depth;
  	set->numa_node = NUMA_NO_NODE;
  	set->flags = set_flags;
  
  	ret = blk_mq_alloc_tag_set(set);
  	if (ret)
  		return ERR_PTR(ret);
  
  	q = blk_mq_init_queue(set);
  	if (IS_ERR(q)) {
  		blk_mq_free_tag_set(set);
  		return q;
  	}
  
  	return q;
  }
  EXPORT_SYMBOL(blk_mq_init_sq_queue);
34d11ffac   Jianchao Wang   blk-mq: realloc h...
2904
2905
2906
2907
  static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
  		struct blk_mq_tag_set *set, struct request_queue *q,
  		int hctx_idx, int node)
  {
2f8f1336a   Ming Lei   blk-mq: always fr...
2908
  	struct blk_mq_hw_ctx *hctx = NULL, *tmp;
34d11ffac   Jianchao Wang   blk-mq: realloc h...
2909

2f8f1336a   Ming Lei   blk-mq: always fr...
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
  	/* reuse dead hctx first */
  	spin_lock(&q->unused_hctx_lock);
  	list_for_each_entry(tmp, &q->unused_hctx_list, hctx_list) {
  		if (tmp->numa_node == node) {
  			hctx = tmp;
  			break;
  		}
  	}
  	if (hctx)
  		list_del_init(&hctx->hctx_list);
  	spin_unlock(&q->unused_hctx_lock);
  
  	if (!hctx)
  		hctx = blk_mq_alloc_hctx(q, set, node);
34d11ffac   Jianchao Wang   blk-mq: realloc h...
2924
  	if (!hctx)
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2925
  		goto fail;
34d11ffac   Jianchao Wang   blk-mq: realloc h...
2926

7c6c5b7c9   Ming Lei   blk-mq: split blk...
2927
2928
  	if (blk_mq_init_hctx(q, set, hctx, hctx_idx))
  		goto free_hctx;
34d11ffac   Jianchao Wang   blk-mq: realloc h...
2929
2930
  
  	return hctx;
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2931
2932
2933
2934
2935
  
   free_hctx:
  	kobject_put(&hctx->kobj);
   fail:
  	return NULL;
34d11ffac   Jianchao Wang   blk-mq: realloc h...
2936
  }
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2937
2938
  static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
  						struct request_queue *q)
b62c21b71   Mike Snitzer   blk-mq: add blk_m...
2939
  {
e01ad46d5   Jianchao Wang   blk-mq: fallback ...
2940
  	int i, j, end;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2941
  	struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
f14bbe77a   Jens Axboe   blk-mq: pass in s...
2942

ac0d6b926   Bart Van Assche   block: Reduce the...
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
  	if (q->nr_hw_queues < set->nr_hw_queues) {
  		struct blk_mq_hw_ctx **new_hctxs;
  
  		new_hctxs = kcalloc_node(set->nr_hw_queues,
  				       sizeof(*new_hctxs), GFP_KERNEL,
  				       set->numa_node);
  		if (!new_hctxs)
  			return;
  		if (hctxs)
  			memcpy(new_hctxs, hctxs, q->nr_hw_queues *
  			       sizeof(*hctxs));
  		q->queue_hw_ctx = new_hctxs;
ac0d6b926   Bart Van Assche   block: Reduce the...
2955
2956
2957
  		kfree(hctxs);
  		hctxs = new_hctxs;
  	}
fb350e0ad   Ming Lei   blk-mq: fix race ...
2958
2959
  	/* protect against switching io scheduler  */
  	mutex_lock(&q->sysfs_lock);
24d2f9030   Christoph Hellwig   blk-mq: split out...
2960
  	for (i = 0; i < set->nr_hw_queues; i++) {
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2961
  		int node;
34d11ffac   Jianchao Wang   blk-mq: realloc h...
2962
  		struct blk_mq_hw_ctx *hctx;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2963

7d76f8562   Dongli Zhang   blk-mq: use HCTX_...
2964
  		node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], i);
34d11ffac   Jianchao Wang   blk-mq: realloc h...
2965
2966
2967
2968
2969
2970
2971
  		/*
  		 * If the hw queue has been mapped to another numa node,
  		 * we need to realloc the hctx. If allocation fails, fallback
  		 * to use the previous one.
  		 */
  		if (hctxs[i] && (hctxs[i]->numa_node == node))
  			continue;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2972

34d11ffac   Jianchao Wang   blk-mq: realloc h...
2973
2974
  		hctx = blk_mq_alloc_and_init_hctx(set, q, i, node);
  		if (hctx) {
2f8f1336a   Ming Lei   blk-mq: always fr...
2975
  			if (hctxs[i])
34d11ffac   Jianchao Wang   blk-mq: realloc h...
2976
  				blk_mq_exit_hctx(q, set, hctxs[i], i);
34d11ffac   Jianchao Wang   blk-mq: realloc h...
2977
2978
2979
2980
2981
2982
2983
2984
2985
  			hctxs[i] = hctx;
  		} else {
  			if (hctxs[i])
  				pr_warn("Allocate new hctx on node %d fails,\
  						fallback to previous one on node %d
  ",
  						node, hctxs[i]->numa_node);
  			else
  				break;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2986
  		}
320ae51fe   Jens Axboe   blk-mq: new multi...
2987
  	}
e01ad46d5   Jianchao Wang   blk-mq: fallback ...
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
  	/*
  	 * Increasing nr_hw_queues fails. Free the newly allocated
  	 * hctxs and keep the previous q->nr_hw_queues.
  	 */
  	if (i != set->nr_hw_queues) {
  		j = q->nr_hw_queues;
  		end = i;
  	} else {
  		j = i;
  		end = q->nr_hw_queues;
  		q->nr_hw_queues = set->nr_hw_queues;
  	}
34d11ffac   Jianchao Wang   blk-mq: realloc h...
3000

e01ad46d5   Jianchao Wang   blk-mq: fallback ...
3001
  	for (; j < end; j++) {
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3002
3003
3004
  		struct blk_mq_hw_ctx *hctx = hctxs[j];
  
  		if (hctx) {
cc71a6f43   Jens Axboe   blk-mq: abstract ...
3005
3006
  			if (hctx->tags)
  				blk_mq_free_map_and_requests(set, j);
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3007
  			blk_mq_exit_hctx(q, set, hctx, j);
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3008
  			hctxs[j] = NULL;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3009
3010
  		}
  	}
fb350e0ad   Ming Lei   blk-mq: fix race ...
3011
  	mutex_unlock(&q->sysfs_lock);
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3012
3013
3014
  }
  
  struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
737eb78e8   Damien Le Moal   block: Delay defa...
3015
3016
  						  struct request_queue *q,
  						  bool elevator_init)
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3017
  {
668416721   Ming Lei   blk-mq: mark requ...
3018
3019
  	/* mark the queue as mq asap */
  	q->mq_ops = set->ops;
34dbad5d2   Omar Sandoval   blk-stat: convert...
3020
  	q->poll_cb = blk_stat_alloc_callback(blk_mq_poll_stats_fn,
720b8ccc4   Stephen Bates   blk-mq: Add a pol...
3021
3022
  					     blk_mq_poll_stats_bkt,
  					     BLK_MQ_POLL_STATS_BKTS, q);
34dbad5d2   Omar Sandoval   blk-stat: convert...
3023
3024
  	if (!q->poll_cb)
  		goto err_exit;
1db4909e7   Ming Lei   blk-mq: not embed...
3025
  	if (blk_mq_alloc_ctxs(q))
41de54c64   Jes Sorensen   blk-mq: Fix memor...
3026
  		goto err_poll;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3027

737f98cfe   Ming Lei   blk-mq: initializ...
3028
3029
  	/* init q->mq_kobj and sw queues' kobjects */
  	blk_mq_sysfs_init(q);
2f8f1336a   Ming Lei   blk-mq: always fr...
3030
3031
  	INIT_LIST_HEAD(&q->unused_hctx_list);
  	spin_lock_init(&q->unused_hctx_lock);
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3032
3033
3034
  	blk_mq_realloc_hw_ctxs(set, q);
  	if (!q->nr_hw_queues)
  		goto err_hctxs;
320ae51fe   Jens Axboe   blk-mq: new multi...
3035

287922eb0   Christoph Hellwig   block: defer time...
3036
  	INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
e56f698bd   Ming Lei   blk-mq: set defau...
3037
  	blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
320ae51fe   Jens Axboe   blk-mq: new multi...
3038

a8908939a   Jens Axboe   blk-mq: kill q->m...
3039
  	q->tag_set = set;
320ae51fe   Jens Axboe   blk-mq: new multi...
3040

94eddfbea   Jens Axboe   blk-mq: ensure th...
3041
  	q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
cd19181bf   Ming Lei   blk-mq: enable IO...
3042
3043
  	if (set->nr_maps > HCTX_TYPE_POLL &&
  	    set->map[HCTX_TYPE_POLL].nr_queues)
6544d229b   Christoph Hellwig   block: enable pol...
3044
  		blk_queue_flag_set(QUEUE_FLAG_POLL, q);
320ae51fe   Jens Axboe   blk-mq: new multi...
3045

1be036e94   Christoph Hellwig   blk-mq: initializ...
3046
  	q->sg_reserved_size = INT_MAX;
2849450ad   Mike Snitzer   blk-mq: introduce...
3047
  	INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
3048
3049
  	INIT_LIST_HEAD(&q->requeue_list);
  	spin_lock_init(&q->requeue_lock);
eba717682   Jens Axboe   blk-mq: initializ...
3050
  	q->nr_requests = set->queue_depth;
64f1c21e8   Jens Axboe   blk-mq: make the ...
3051
3052
3053
  	/*
  	 * Default to classic polling
  	 */
29ece8b43   Yufen Yu   block: add BLK_MQ...
3054
  	q->poll_nsec = BLK_MQ_POLL_CLASSIC;
64f1c21e8   Jens Axboe   blk-mq: make the ...
3055

24d2f9030   Christoph Hellwig   blk-mq: split out...
3056
  	blk_mq_init_cpu_queues(q, set->nr_hw_queues);
0d2602ca3   Jens Axboe   blk-mq: improve s...
3057
  	blk_mq_add_queue_tag_set(set, q);
4b855ad37   Christoph Hellwig   blk-mq: Create hc...
3058
  	blk_mq_map_swqueue(q);
4593fdbe7   Akinobu Mita   blk-mq: fix sysfs...
3059

737eb78e8   Damien Le Moal   block: Delay defa...
3060
3061
  	if (elevator_init)
  		elevator_init_mq(q);
d34849913   Jens Axboe   blk-mq-sched: all...
3062

320ae51fe   Jens Axboe   blk-mq: new multi...
3063
  	return q;
18741986a   Christoph Hellwig   blk-mq: rework fl...
3064

320ae51fe   Jens Axboe   blk-mq: new multi...
3065
  err_hctxs:
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3066
  	kfree(q->queue_hw_ctx);
73d9c8d4c   zhengbin   blk-mq: Fix memor...
3067
  	q->nr_hw_queues = 0;
1db4909e7   Ming Lei   blk-mq: not embed...
3068
  	blk_mq_sysfs_deinit(q);
41de54c64   Jes Sorensen   blk-mq: Fix memor...
3069
3070
3071
  err_poll:
  	blk_stat_free_callback(q->poll_cb);
  	q->poll_cb = NULL;
c7de57263   Ming Lin   blk-mq: clear q->...
3072
3073
  err_exit:
  	q->mq_ops = NULL;
320ae51fe   Jens Axboe   blk-mq: new multi...
3074
3075
  	return ERR_PTR(-ENOMEM);
  }
b62c21b71   Mike Snitzer   blk-mq: add blk_m...
3076
  EXPORT_SYMBOL(blk_mq_init_allocated_queue);
320ae51fe   Jens Axboe   blk-mq: new multi...
3077

c7e2d94b3   Ming Lei   blk-mq: free hw q...
3078
3079
  /* tags can _not_ be used after returning from blk_mq_exit_queue */
  void blk_mq_exit_queue(struct request_queue *q)
320ae51fe   Jens Axboe   blk-mq: new multi...
3080
  {
624dbe475   Ming Lei   blk-mq: avoid cod...
3081
  	struct blk_mq_tag_set	*set = q->tag_set;
320ae51fe   Jens Axboe   blk-mq: new multi...
3082

0d2602ca3   Jens Axboe   blk-mq: improve s...
3083
  	blk_mq_del_queue_tag_set(q);
624dbe475   Ming Lei   blk-mq: avoid cod...
3084
  	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
320ae51fe   Jens Axboe   blk-mq: new multi...
3085
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
3086

a51644054   Jens Axboe   blk-mq: scale dep...
3087
3088
3089
  static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
  {
  	int i;
8229cca8c   Xianting Tian   blk-mq: add cond_...
3090
  	for (i = 0; i < set->nr_hw_queues; i++) {
03b63b029   Weiping Zhang   block: rename __b...
3091
  		if (!__blk_mq_alloc_map_and_request(set, i))
a51644054   Jens Axboe   blk-mq: scale dep...
3092
  			goto out_unwind;
8229cca8c   Xianting Tian   blk-mq: add cond_...
3093
3094
  		cond_resched();
  	}
a51644054   Jens Axboe   blk-mq: scale dep...
3095
3096
3097
3098
3099
  
  	return 0;
  
  out_unwind:
  	while (--i >= 0)
2e194422f   Weiping Zhang   block: free both ...
3100
  		blk_mq_free_map_and_requests(set, i);
a51644054   Jens Axboe   blk-mq: scale dep...
3101

a51644054   Jens Axboe   blk-mq: scale dep...
3102
3103
3104
3105
3106
3107
3108
3109
  	return -ENOMEM;
  }
  
  /*
   * Allocate the request maps associated with this tag_set. Note that this
   * may reduce the depth asked for, if memory is tight. set->queue_depth
   * will be updated to reflect the allocated depth.
   */
79fab5287   Weiping Zhang   block: rename blk...
3110
  static int blk_mq_alloc_map_and_requests(struct blk_mq_tag_set *set)
a51644054   Jens Axboe   blk-mq: scale dep...
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
  {
  	unsigned int depth;
  	int err;
  
  	depth = set->queue_depth;
  	do {
  		err = __blk_mq_alloc_rq_maps(set);
  		if (!err)
  			break;
  
  		set->queue_depth >>= 1;
  		if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) {
  			err = -ENOMEM;
  			break;
  		}
  	} while (set->queue_depth);
  
  	if (!set->queue_depth || err) {
  		pr_err("blk-mq: failed to allocate request map
  ");
  		return -ENOMEM;
  	}
  
  	if (depth != set->queue_depth)
  		pr_info("blk-mq: reduced tag depth (%u -> %u)
  ",
  						depth, set->queue_depth);
  
  	return 0;
  }
ebe8bddb6   Omar Sandoval   blk-mq: remap que...
3141
3142
  static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
  {
6e66b4939   Bart Van Assche   blk-mq: Keep set-...
3143
3144
3145
3146
3147
3148
3149
  	/*
  	 * blk_mq_map_queues() and multiple .map_queues() implementations
  	 * expect that set->map[HCTX_TYPE_DEFAULT].nr_queues is set to the
  	 * number of hardware queues.
  	 */
  	if (set->nr_maps == 1)
  		set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues;
593887024   Ming Lei   blk-mq: re-build ...
3150
  	if (set->ops->map_queues && !is_kdump_kernel()) {
b3c661b15   Jens Axboe   blk-mq: support m...
3151
  		int i;
7d4901a90   Ming Lei   blk-mq: avoid to ...
3152
3153
3154
3155
3156
3157
3158
  		/*
  		 * transport .map_queues is usually done in the following
  		 * way:
  		 *
  		 * for (queue = 0; queue < set->nr_hw_queues; queue++) {
  		 * 	mask = get_cpu_mask(queue)
  		 * 	for_each_cpu(cpu, mask)
b3c661b15   Jens Axboe   blk-mq: support m...
3159
  		 * 		set->map[x].mq_map[cpu] = queue;
7d4901a90   Ming Lei   blk-mq: avoid to ...
3160
3161
3162
3163
3164
3165
  		 * }
  		 *
  		 * When we need to remap, the table has to be cleared for
  		 * killing stale mapping since one CPU may not be mapped
  		 * to any hw queue.
  		 */
b3c661b15   Jens Axboe   blk-mq: support m...
3166
3167
  		for (i = 0; i < set->nr_maps; i++)
  			blk_mq_clear_mq_map(&set->map[i]);
7d4901a90   Ming Lei   blk-mq: avoid to ...
3168

ebe8bddb6   Omar Sandoval   blk-mq: remap que...
3169
  		return set->ops->map_queues(set);
b3c661b15   Jens Axboe   blk-mq: support m...
3170
3171
  	} else {
  		BUG_ON(set->nr_maps > 1);
7d76f8562   Dongli Zhang   blk-mq: use HCTX_...
3172
  		return blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
b3c661b15   Jens Axboe   blk-mq: support m...
3173
  	}
ebe8bddb6   Omar Sandoval   blk-mq: remap que...
3174
  }
f7e76dbc2   Bart Van Assche   block: Reduce the...
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
  static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
  				  int cur_nr_hw_queues, int new_nr_hw_queues)
  {
  	struct blk_mq_tags **new_tags;
  
  	if (cur_nr_hw_queues >= new_nr_hw_queues)
  		return 0;
  
  	new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *),
  				GFP_KERNEL, set->numa_node);
  	if (!new_tags)
  		return -ENOMEM;
  
  	if (set->tags)
  		memcpy(new_tags, set->tags, cur_nr_hw_queues *
  		       sizeof(*set->tags));
  	kfree(set->tags);
  	set->tags = new_tags;
  	set->nr_hw_queues = new_nr_hw_queues;
  
  	return 0;
  }
a4391c646   Jens Axboe   blk-mq: bump max ...
3197
3198
3199
  /*
   * Alloc a tag set to be associated with one or more request queues.
   * May fail with EINVAL for various error conditions. May adjust the
c018c84fd   Minwoo Im   blk-mq: fix typo ...
3200
   * requested depth down, if it's too large. In that case, the set
a4391c646   Jens Axboe   blk-mq: bump max ...
3201
3202
   * value will be stored in set->queue_depth.
   */
24d2f9030   Christoph Hellwig   blk-mq: split out...
3203
3204
  int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
  {
b3c661b15   Jens Axboe   blk-mq: support m...
3205
  	int i, ret;
da695ba23   Christoph Hellwig   blk-mq: allow the...
3206

205fb5f5b   Bart Van Assche   blk-mq: add blk_m...
3207
  	BUILD_BUG_ON(BLK_MQ_MAX_DEPTH > 1 << BLK_MQ_UNIQUE_TAG_BITS);
24d2f9030   Christoph Hellwig   blk-mq: split out...
3208
3209
  	if (!set->nr_hw_queues)
  		return -EINVAL;
a4391c646   Jens Axboe   blk-mq: bump max ...
3210
  	if (!set->queue_depth)
24d2f9030   Christoph Hellwig   blk-mq: split out...
3211
3212
3213
  		return -EINVAL;
  	if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
  		return -EINVAL;
7d7e0f90b   Christoph Hellwig   blk-mq: remove ->...
3214
  	if (!set->ops->queue_rq)
24d2f9030   Christoph Hellwig   blk-mq: split out...
3215
  		return -EINVAL;
de1482974   Ming Lei   blk-mq: introduce...
3216
3217
  	if (!set->ops->get_budget ^ !set->ops->put_budget)
  		return -EINVAL;
a4391c646   Jens Axboe   blk-mq: bump max ...
3218
3219
3220
3221
3222
3223
  	if (set->queue_depth > BLK_MQ_MAX_DEPTH) {
  		pr_info("blk-mq: reduced tag depth to %u
  ",
  			BLK_MQ_MAX_DEPTH);
  		set->queue_depth = BLK_MQ_MAX_DEPTH;
  	}
24d2f9030   Christoph Hellwig   blk-mq: split out...
3224

b3c661b15   Jens Axboe   blk-mq: support m...
3225
3226
3227
3228
  	if (!set->nr_maps)
  		set->nr_maps = 1;
  	else if (set->nr_maps > HCTX_MAX_TYPES)
  		return -EINVAL;
6637fadf2   Shaohua Li   blk-mq: move the ...
3229
3230
3231
3232
3233
3234
3235
  	/*
  	 * If a crashdump is active, then we are potentially in a very
  	 * memory constrained environment. Limit us to 1 queue and
  	 * 64 tags to prevent using too much memory.
  	 */
  	if (is_kdump_kernel()) {
  		set->nr_hw_queues = 1;
593887024   Ming Lei   blk-mq: re-build ...
3236
  		set->nr_maps = 1;
6637fadf2   Shaohua Li   blk-mq: move the ...
3237
3238
  		set->queue_depth = min(64U, set->queue_depth);
  	}
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3239
  	/*
392546aed   Jens Axboe   blk-mq: separate ...
3240
3241
  	 * There is no use for more h/w queues than cpus if we just have
  	 * a single map
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3242
  	 */
392546aed   Jens Axboe   blk-mq: separate ...
3243
  	if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids)
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3244
  		set->nr_hw_queues = nr_cpu_ids;
6637fadf2   Shaohua Li   blk-mq: move the ...
3245

f7e76dbc2   Bart Van Assche   block: Reduce the...
3246
  	if (blk_mq_realloc_tag_set_tags(set, 0, set->nr_hw_queues) < 0)
a51644054   Jens Axboe   blk-mq: scale dep...
3247
  		return -ENOMEM;
24d2f9030   Christoph Hellwig   blk-mq: split out...
3248

da695ba23   Christoph Hellwig   blk-mq: allow the...
3249
  	ret = -ENOMEM;
b3c661b15   Jens Axboe   blk-mq: support m...
3250
3251
  	for (i = 0; i < set->nr_maps; i++) {
  		set->map[i].mq_map = kcalloc_node(nr_cpu_ids,
07b35eb5a   Ming Lei   blk-mq: fix alloc...
3252
  						  sizeof(set->map[i].mq_map[0]),
b3c661b15   Jens Axboe   blk-mq: support m...
3253
3254
3255
  						  GFP_KERNEL, set->numa_node);
  		if (!set->map[i].mq_map)
  			goto out_free_mq_map;
593887024   Ming Lei   blk-mq: re-build ...
3256
  		set->map[i].nr_queues = is_kdump_kernel() ? 1 : set->nr_hw_queues;
b3c661b15   Jens Axboe   blk-mq: support m...
3257
  	}
bdd17e75c   Christoph Hellwig   blk-mq: only allo...
3258

ebe8bddb6   Omar Sandoval   blk-mq: remap que...
3259
  	ret = blk_mq_update_queue_map(set);
da695ba23   Christoph Hellwig   blk-mq: allow the...
3260
3261
  	if (ret)
  		goto out_free_mq_map;
79fab5287   Weiping Zhang   block: rename blk...
3262
  	ret = blk_mq_alloc_map_and_requests(set);
da695ba23   Christoph Hellwig   blk-mq: allow the...
3263
  	if (ret)
bdd17e75c   Christoph Hellwig   blk-mq: only allo...
3264
  		goto out_free_mq_map;
24d2f9030   Christoph Hellwig   blk-mq: split out...
3265

32bc15afe   John Garry   blk-mq: Facilitat...
3266
  	if (blk_mq_is_sbitmap_shared(set->flags)) {
f1b49fdc1   John Garry   blk-mq: Record ac...
3267
  		atomic_set(&set->active_queues_shared_sbitmap, 0);
32bc15afe   John Garry   blk-mq: Facilitat...
3268
3269
3270
3271
3272
  		if (blk_mq_init_shared_sbitmap(set, set->flags)) {
  			ret = -ENOMEM;
  			goto out_free_mq_rq_maps;
  		}
  	}
0d2602ca3   Jens Axboe   blk-mq: improve s...
3273
3274
  	mutex_init(&set->tag_list_lock);
  	INIT_LIST_HEAD(&set->tag_list);
24d2f9030   Christoph Hellwig   blk-mq: split out...
3275
  	return 0;
bdd17e75c   Christoph Hellwig   blk-mq: only allo...
3276

32bc15afe   John Garry   blk-mq: Facilitat...
3277
3278
3279
  out_free_mq_rq_maps:
  	for (i = 0; i < set->nr_hw_queues; i++)
  		blk_mq_free_map_and_requests(set, i);
bdd17e75c   Christoph Hellwig   blk-mq: only allo...
3280
  out_free_mq_map:
b3c661b15   Jens Axboe   blk-mq: support m...
3281
3282
3283
3284
  	for (i = 0; i < set->nr_maps; i++) {
  		kfree(set->map[i].mq_map);
  		set->map[i].mq_map = NULL;
  	}
5676e7b6d   Robert Elliott   blk-mq: cleanup a...
3285
3286
  	kfree(set->tags);
  	set->tags = NULL;
da695ba23   Christoph Hellwig   blk-mq: allow the...
3287
  	return ret;
24d2f9030   Christoph Hellwig   blk-mq: split out...
3288
3289
3290
3291
3292
  }
  EXPORT_SYMBOL(blk_mq_alloc_tag_set);
  
  void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
  {
b3c661b15   Jens Axboe   blk-mq: support m...
3293
  	int i, j;
24d2f9030   Christoph Hellwig   blk-mq: split out...
3294

f7e76dbc2   Bart Van Assche   block: Reduce the...
3295
  	for (i = 0; i < set->nr_hw_queues; i++)
cc71a6f43   Jens Axboe   blk-mq: abstract ...
3296
  		blk_mq_free_map_and_requests(set, i);
484b4061e   Jens Axboe   blk-mq: save memo...
3297

32bc15afe   John Garry   blk-mq: Facilitat...
3298
3299
  	if (blk_mq_is_sbitmap_shared(set->flags))
  		blk_mq_exit_shared_sbitmap(set);
b3c661b15   Jens Axboe   blk-mq: support m...
3300
3301
3302
3303
  	for (j = 0; j < set->nr_maps; j++) {
  		kfree(set->map[j].mq_map);
  		set->map[j].mq_map = NULL;
  	}
bdd17e75c   Christoph Hellwig   blk-mq: only allo...
3304

981bd189f   Ming Lei   blk-mq: fix leak ...
3305
  	kfree(set->tags);
5676e7b6d   Robert Elliott   blk-mq: cleanup a...
3306
  	set->tags = NULL;
24d2f9030   Christoph Hellwig   blk-mq: split out...
3307
3308
  }
  EXPORT_SYMBOL(blk_mq_free_tag_set);
e3a2b3f93   Jens Axboe   blk-mq: allow cha...
3309
3310
3311
3312
3313
  int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
  {
  	struct blk_mq_tag_set *set = q->tag_set;
  	struct blk_mq_hw_ctx *hctx;
  	int i, ret;
bd166ef18   Jens Axboe   blk-mq-sched: add...
3314
  	if (!set)
e3a2b3f93   Jens Axboe   blk-mq: allow cha...
3315
  		return -EINVAL;
e5fa81408   Aleksei Zakharov   block: avoid sett...
3316
3317
  	if (q->nr_requests == nr)
  		return 0;
70f36b600   Jens Axboe   blk-mq: allow res...
3318
  	blk_mq_freeze_queue(q);
24f5a90f0   Ming Lei   blk-mq: quiesce q...
3319
  	blk_mq_quiesce_queue(q);
70f36b600   Jens Axboe   blk-mq: allow res...
3320

e3a2b3f93   Jens Axboe   blk-mq: allow cha...
3321
3322
  	ret = 0;
  	queue_for_each_hw_ctx(q, hctx, i) {
e9137d4b9   Keith Busch   blk-mq: Fix NULL ...
3323
3324
  		if (!hctx->tags)
  			continue;
bd166ef18   Jens Axboe   blk-mq-sched: add...
3325
3326
3327
3328
  		/*
  		 * If we're using an MQ scheduler, just update the scheduler
  		 * queue depth. This is similar to what the old code would do.
  		 */
70f36b600   Jens Axboe   blk-mq: allow res...
3329
  		if (!hctx->sched_tags) {
c2e82a234   weiping zhang   blk-mq: fix nr_re...
3330
  			ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr,
70f36b600   Jens Axboe   blk-mq: allow res...
3331
  							false);
32bc15afe   John Garry   blk-mq: Facilitat...
3332
3333
  			if (!ret && blk_mq_is_sbitmap_shared(set->flags))
  				blk_mq_tag_resize_shared_sbitmap(set, nr);
70f36b600   Jens Axboe   blk-mq: allow res...
3334
3335
3336
3337
  		} else {
  			ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags,
  							nr, true);
  		}
e3a2b3f93   Jens Axboe   blk-mq: allow cha...
3338
3339
  		if (ret)
  			break;
77f1e0a52   Jens Axboe   bfq: update inter...
3340
3341
  		if (q->elevator && q->elevator->type->ops.depth_updated)
  			q->elevator->type->ops.depth_updated(hctx);
e3a2b3f93   Jens Axboe   blk-mq: allow cha...
3342
3343
3344
3345
  	}
  
  	if (!ret)
  		q->nr_requests = nr;
24f5a90f0   Ming Lei   blk-mq: quiesce q...
3346
  	blk_mq_unquiesce_queue(q);
70f36b600   Jens Axboe   blk-mq: allow res...
3347
  	blk_mq_unfreeze_queue(q);
70f36b600   Jens Axboe   blk-mq: allow res...
3348

e3a2b3f93   Jens Axboe   blk-mq: allow cha...
3349
3350
  	return ret;
  }
d48ece209   Jianchao Wang   blk-mq: init hctx...
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
  /*
   * request_queue and elevator_type pair.
   * It is just used by __blk_mq_update_nr_hw_queues to cache
   * the elevator_type associated with a request_queue.
   */
  struct blk_mq_qe_pair {
  	struct list_head node;
  	struct request_queue *q;
  	struct elevator_type *type;
  };
  
  /*
   * Cache the elevator_type in qe pair list and switch the
   * io scheduler to 'none'
   */
  static bool blk_mq_elv_switch_none(struct list_head *head,
  		struct request_queue *q)
  {
  	struct blk_mq_qe_pair *qe;
  
  	if (!q->elevator)
  		return true;
  
  	qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
  	if (!qe)
  		return false;
  
  	INIT_LIST_HEAD(&qe->node);
  	qe->q = q;
  	qe->type = q->elevator->type;
  	list_add(&qe->node, head);
  
  	mutex_lock(&q->sysfs_lock);
  	/*
  	 * After elevator_switch_mq, the previous elevator_queue will be
  	 * released by elevator_release. The reference of the io scheduler
  	 * module get by elevator_get will also be put. So we need to get
  	 * a reference of the io scheduler module here to prevent it to be
  	 * removed.
  	 */
  	__module_get(qe->type->elevator_owner);
  	elevator_switch_mq(q, NULL);
  	mutex_unlock(&q->sysfs_lock);
  
  	return true;
  }
  
  static void blk_mq_elv_switch_back(struct list_head *head,
  		struct request_queue *q)
  {
  	struct blk_mq_qe_pair *qe;
  	struct elevator_type *t = NULL;
  
  	list_for_each_entry(qe, head, node)
  		if (qe->q == q) {
  			t = qe->type;
  			break;
  		}
  
  	if (!t)
  		return;
  
  	list_del(&qe->node);
  	kfree(qe);
  
  	mutex_lock(&q->sysfs_lock);
  	elevator_switch_mq(q, t);
  	mutex_unlock(&q->sysfs_lock);
  }
e4dc2b32d   Keith Busch   blk-mq: Take tags...
3420
3421
  static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
  							int nr_hw_queues)
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3422
3423
  {
  	struct request_queue *q;
d48ece209   Jianchao Wang   blk-mq: init hctx...
3424
  	LIST_HEAD(head);
e01ad46d5   Jianchao Wang   blk-mq: fallback ...
3425
  	int prev_nr_hw_queues;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3426

705cda97e   Bart Van Assche   blk-mq: Make it s...
3427
  	lockdep_assert_held(&set->tag_list_lock);
392546aed   Jens Axboe   blk-mq: separate ...
3428
  	if (set->nr_maps == 1 && nr_hw_queues > nr_cpu_ids)
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3429
  		nr_hw_queues = nr_cpu_ids;
fe35ec58f   Weiping Zhang   block: update hct...
3430
3431
3432
  	if (nr_hw_queues < 1)
  		return;
  	if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues)
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3433
3434
3435
3436
  		return;
  
  	list_for_each_entry(q, &set->tag_list, tag_set_list)
  		blk_mq_freeze_queue(q);
d48ece209   Jianchao Wang   blk-mq: init hctx...
3437
3438
3439
3440
3441
3442
3443
3444
  	/*
  	 * Switch IO scheduler to 'none', cleaning up the data associated
  	 * with the previous scheduler. We will switch back once we are done
  	 * updating the new sw to hw queue mappings.
  	 */
  	list_for_each_entry(q, &set->tag_list, tag_set_list)
  		if (!blk_mq_elv_switch_none(&head, q))
  			goto switch_back;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3445

477e19ded   Jianchao Wang   blk-mq: adjust de...
3446
3447
3448
3449
  	list_for_each_entry(q, &set->tag_list, tag_set_list) {
  		blk_mq_debugfs_unregister_hctxs(q);
  		blk_mq_sysfs_unregister(q);
  	}
a2584e43f   Weiping Zhang   block: save previ...
3450
  	prev_nr_hw_queues = set->nr_hw_queues;
f7e76dbc2   Bart Van Assche   block: Reduce the...
3451
3452
3453
  	if (blk_mq_realloc_tag_set_tags(set, set->nr_hw_queues, nr_hw_queues) <
  	    0)
  		goto reregister;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3454
  	set->nr_hw_queues = nr_hw_queues;
e01ad46d5   Jianchao Wang   blk-mq: fallback ...
3455
  fallback:
aa880ad69   Weiping Zhang   block: reset mapp...
3456
  	blk_mq_update_queue_map(set);
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3457
3458
  	list_for_each_entry(q, &set->tag_list, tag_set_list) {
  		blk_mq_realloc_hw_ctxs(set, q);
e01ad46d5   Jianchao Wang   blk-mq: fallback ...
3459
3460
3461
3462
3463
  		if (q->nr_hw_queues != set->nr_hw_queues) {
  			pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d
  ",
  					nr_hw_queues, prev_nr_hw_queues);
  			set->nr_hw_queues = prev_nr_hw_queues;
7d76f8562   Dongli Zhang   blk-mq: use HCTX_...
3464
  			blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
e01ad46d5   Jianchao Wang   blk-mq: fallback ...
3465
3466
  			goto fallback;
  		}
477e19ded   Jianchao Wang   blk-mq: adjust de...
3467
3468
  		blk_mq_map_swqueue(q);
  	}
f7e76dbc2   Bart Van Assche   block: Reduce the...
3469
  reregister:
477e19ded   Jianchao Wang   blk-mq: adjust de...
3470
3471
3472
  	list_for_each_entry(q, &set->tag_list, tag_set_list) {
  		blk_mq_sysfs_register(q);
  		blk_mq_debugfs_register_hctxs(q);
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3473
  	}
d48ece209   Jianchao Wang   blk-mq: init hctx...
3474
3475
3476
  switch_back:
  	list_for_each_entry(q, &set->tag_list, tag_set_list)
  		blk_mq_elv_switch_back(&head, q);
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3477
3478
3479
  	list_for_each_entry(q, &set->tag_list, tag_set_list)
  		blk_mq_unfreeze_queue(q);
  }
e4dc2b32d   Keith Busch   blk-mq: Take tags...
3480
3481
3482
3483
3484
3485
3486
  
  void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
  {
  	mutex_lock(&set->tag_list_lock);
  	__blk_mq_update_nr_hw_queues(set, nr_hw_queues);
  	mutex_unlock(&set->tag_list_lock);
  }
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3487
  EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
34dbad5d2   Omar Sandoval   blk-stat: convert...
3488
3489
3490
3491
  /* Enable polling stats and return whether they were already enabled. */
  static bool blk_poll_stats_enable(struct request_queue *q)
  {
  	if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
7dfdbc736   Bart Van Assche   block: Protect qu...
3492
  	    blk_queue_flag_test_and_set(QUEUE_FLAG_POLL_STATS, q))
34dbad5d2   Omar Sandoval   blk-stat: convert...
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
  		return true;
  	blk_stat_add_callback(q, q->poll_cb);
  	return false;
  }
  
  static void blk_mq_poll_stats_start(struct request_queue *q)
  {
  	/*
  	 * We don't arm the callback if polling stats are not enabled or the
  	 * callback is already active.
  	 */
  	if (!test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
  	    blk_stat_is_active(q->poll_cb))
  		return;
  
  	blk_stat_activate_msecs(q->poll_cb, 100);
  }
  
  static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb)
  {
  	struct request_queue *q = cb->data;
720b8ccc4   Stephen Bates   blk-mq: Add a pol...
3514
  	int bucket;
34dbad5d2   Omar Sandoval   blk-stat: convert...
3515

720b8ccc4   Stephen Bates   blk-mq: Add a pol...
3516
3517
3518
3519
  	for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS; bucket++) {
  		if (cb->stat[bucket].nr_samples)
  			q->poll_stat[bucket] = cb->stat[bucket];
  	}
34dbad5d2   Omar Sandoval   blk-stat: convert...
3520
  }
64f1c21e8   Jens Axboe   blk-mq: make the ...
3521
  static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
64f1c21e8   Jens Axboe   blk-mq: make the ...
3522
3523
  				       struct request *rq)
  {
64f1c21e8   Jens Axboe   blk-mq: make the ...
3524
  	unsigned long ret = 0;
720b8ccc4   Stephen Bates   blk-mq: Add a pol...
3525
  	int bucket;
64f1c21e8   Jens Axboe   blk-mq: make the ...
3526
3527
3528
3529
3530
  
  	/*
  	 * If stats collection isn't on, don't sleep but turn it on for
  	 * future users
  	 */
34dbad5d2   Omar Sandoval   blk-stat: convert...
3531
  	if (!blk_poll_stats_enable(q))
64f1c21e8   Jens Axboe   blk-mq: make the ...
3532
3533
3534
  		return 0;
  
  	/*
64f1c21e8   Jens Axboe   blk-mq: make the ...
3535
3536
3537
3538
3539
  	 * As an optimistic guess, use half of the mean service time
  	 * for this type of request. We can (and should) make this smarter.
  	 * For instance, if the completion latencies are tight, we can
  	 * get closer than just half the mean. This is especially
  	 * important on devices where the completion latencies are longer
720b8ccc4   Stephen Bates   blk-mq: Add a pol...
3540
3541
  	 * than ~10 usec. We do use the stats for the relevant IO size
  	 * if available which does lead to better estimates.
64f1c21e8   Jens Axboe   blk-mq: make the ...
3542
  	 */
720b8ccc4   Stephen Bates   blk-mq: Add a pol...
3543
3544
3545
3546
3547
3548
  	bucket = blk_mq_poll_stats_bkt(rq);
  	if (bucket < 0)
  		return ret;
  
  	if (q->poll_stat[bucket].nr_samples)
  		ret = (q->poll_stat[bucket].mean + 1) / 2;
64f1c21e8   Jens Axboe   blk-mq: make the ...
3549
3550
3551
  
  	return ret;
  }
06426adf0   Jens Axboe   blk-mq: implement...
3552
3553
3554
3555
3556
  static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
  				     struct request *rq)
  {
  	struct hrtimer_sleeper hs;
  	enum hrtimer_mode mode;
64f1c21e8   Jens Axboe   blk-mq: make the ...
3557
  	unsigned int nsecs;
06426adf0   Jens Axboe   blk-mq: implement...
3558
  	ktime_t kt;
76a86f9d0   Jens Axboe   block: remove REQ...
3559
  	if (rq->rq_flags & RQF_MQ_POLL_SLEPT)
64f1c21e8   Jens Axboe   blk-mq: make the ...
3560
3561
3562
  		return false;
  
  	/*
1052b8ac5   Jens Axboe   blk-mq: when poll...
3563
  	 * If we get here, hybrid polling is enabled. Hence poll_nsec can be:
64f1c21e8   Jens Axboe   blk-mq: make the ...
3564
  	 *
64f1c21e8   Jens Axboe   blk-mq: make the ...
3565
3566
3567
  	 *  0:	use half of prev avg
  	 * >0:	use this specific value
  	 */
1052b8ac5   Jens Axboe   blk-mq: when poll...
3568
  	if (q->poll_nsec > 0)
64f1c21e8   Jens Axboe   blk-mq: make the ...
3569
3570
  		nsecs = q->poll_nsec;
  	else
cae740a04   John Garry   blk-mq: Remove so...
3571
  		nsecs = blk_mq_poll_nsecs(q, rq);
64f1c21e8   Jens Axboe   blk-mq: make the ...
3572
3573
  
  	if (!nsecs)
06426adf0   Jens Axboe   blk-mq: implement...
3574
  		return false;
76a86f9d0   Jens Axboe   block: remove REQ...
3575
  	rq->rq_flags |= RQF_MQ_POLL_SLEPT;
06426adf0   Jens Axboe   blk-mq: implement...
3576
3577
3578
3579
3580
  
  	/*
  	 * This will be replaced with the stats tracking code, using
  	 * 'avg_completion_time / 2' as the pre-sleep target.
  	 */
8b0e19531   Thomas Gleixner   ktime: Cleanup kt...
3581
  	kt = nsecs;
06426adf0   Jens Axboe   blk-mq: implement...
3582
3583
  
  	mode = HRTIMER_MODE_REL;
dbc1625fc   Sebastian Andrzej Siewior   hrtimer: Consolid...
3584
  	hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode);
06426adf0   Jens Axboe   blk-mq: implement...
3585
  	hrtimer_set_expires(&hs.timer, kt);
06426adf0   Jens Axboe   blk-mq: implement...
3586
  	do {
5a61c3639   Tejun Heo   blk-mq: remove RE...
3587
  		if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE)
06426adf0   Jens Axboe   blk-mq: implement...
3588
3589
  			break;
  		set_current_state(TASK_UNINTERRUPTIBLE);
9dd8813ed   Thomas Gleixner   hrtimer/treewide:...
3590
  		hrtimer_sleeper_start_expires(&hs, mode);
06426adf0   Jens Axboe   blk-mq: implement...
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
  		if (hs.task)
  			io_schedule();
  		hrtimer_cancel(&hs.timer);
  		mode = HRTIMER_MODE_ABS;
  	} while (hs.task && !signal_pending(current));
  
  	__set_current_state(TASK_RUNNING);
  	destroy_hrtimer_on_stack(&hs.timer);
  	return true;
  }
1052b8ac5   Jens Axboe   blk-mq: when poll...
3601
3602
  static bool blk_mq_poll_hybrid(struct request_queue *q,
  			       struct blk_mq_hw_ctx *hctx, blk_qc_t cookie)
bbd7bb701   Jens Axboe   block: move poll ...
3603
  {
1052b8ac5   Jens Axboe   blk-mq: when poll...
3604
  	struct request *rq;
29ece8b43   Yufen Yu   block: add BLK_MQ...
3605
  	if (q->poll_nsec == BLK_MQ_POLL_CLASSIC)
1052b8ac5   Jens Axboe   blk-mq: when poll...
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
  		return false;
  
  	if (!blk_qc_t_is_internal(cookie))
  		rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
  	else {
  		rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie));
  		/*
  		 * With scheduling, if the request has completed, we'll
  		 * get a NULL return here, as we clear the sched tag when
  		 * that happens. The request still remains valid, like always,
  		 * so we should be safe with just the NULL check.
  		 */
  		if (!rq)
  			return false;
  	}
cae740a04   John Garry   blk-mq: Remove so...
3621
  	return blk_mq_poll_hybrid_sleep(q, rq);
1052b8ac5   Jens Axboe   blk-mq: when poll...
3622
  }
529262d56   Christoph Hellwig   block: remove ->p...
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
  /**
   * blk_poll - poll for IO completions
   * @q:  the queue
   * @cookie: cookie passed back at IO submission time
   * @spin: whether to spin for completions
   *
   * Description:
   *    Poll for completions on the passed in queue. Returns number of
   *    completed entries found. If @spin is true, then blk_poll will continue
   *    looping until at least one completion is found, unless the task is
   *    otherwise marked running (or we need to reschedule).
   */
  int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin)
1052b8ac5   Jens Axboe   blk-mq: when poll...
3636
3637
  {
  	struct blk_mq_hw_ctx *hctx;
bbd7bb701   Jens Axboe   block: move poll ...
3638
  	long state;
529262d56   Christoph Hellwig   block: remove ->p...
3639
3640
  	if (!blk_qc_t_valid(cookie) ||
  	    !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
1052b8ac5   Jens Axboe   blk-mq: when poll...
3641
  		return 0;
529262d56   Christoph Hellwig   block: remove ->p...
3642
3643
  	if (current->plug)
  		blk_flush_plug_list(current->plug, false);
1052b8ac5   Jens Axboe   blk-mq: when poll...
3644
  	hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
06426adf0   Jens Axboe   blk-mq: implement...
3645
3646
3647
3648
3649
3650
3651
  	/*
  	 * If we sleep, have the caller restart the poll loop to reset
  	 * the state. Like for the other success return cases, the
  	 * caller is responsible for checking if the IO completed. If
  	 * the IO isn't complete, we'll get called again and will go
  	 * straight to the busy poll loop.
  	 */
1052b8ac5   Jens Axboe   blk-mq: when poll...
3652
  	if (blk_mq_poll_hybrid(q, hctx, cookie))
85f4d4b65   Jens Axboe   block: have ->pol...
3653
  		return 1;
06426adf0   Jens Axboe   blk-mq: implement...
3654

bbd7bb701   Jens Axboe   block: move poll ...
3655
3656
3657
  	hctx->poll_considered++;
  
  	state = current->state;
aa61bec30   Jens Axboe   blk-mq: ensure mq...
3658
  	do {
bbd7bb701   Jens Axboe   block: move poll ...
3659
3660
3661
  		int ret;
  
  		hctx->poll_invoked++;
9743139c5   Jens Axboe   blk-mq: remove 't...
3662
  		ret = q->mq_ops->poll(hctx);
bbd7bb701   Jens Axboe   block: move poll ...
3663
3664
  		if (ret > 0) {
  			hctx->poll_success++;
849a37001   Jens Axboe   block: avoid orde...
3665
  			__set_current_state(TASK_RUNNING);
85f4d4b65   Jens Axboe   block: have ->pol...
3666
  			return ret;
bbd7bb701   Jens Axboe   block: move poll ...
3667
3668
3669
  		}
  
  		if (signal_pending_state(state, current))
849a37001   Jens Axboe   block: avoid orde...
3670
  			__set_current_state(TASK_RUNNING);
bbd7bb701   Jens Axboe   block: move poll ...
3671
3672
  
  		if (current->state == TASK_RUNNING)
85f4d4b65   Jens Axboe   block: have ->pol...
3673
  			return 1;
0a1b8b87d   Jens Axboe   block: make blk_p...
3674
  		if (ret < 0 || !spin)
bbd7bb701   Jens Axboe   block: move poll ...
3675
3676
  			break;
  		cpu_relax();
aa61bec30   Jens Axboe   blk-mq: ensure mq...
3677
  	} while (!need_resched());
bbd7bb701   Jens Axboe   block: move poll ...
3678

67b4110f8   Nitesh Shetty   blk: optimization...
3679
  	__set_current_state(TASK_RUNNING);
85f4d4b65   Jens Axboe   block: have ->pol...
3680
  	return 0;
bbd7bb701   Jens Axboe   block: move poll ...
3681
  }
529262d56   Christoph Hellwig   block: remove ->p...
3682
  EXPORT_SYMBOL_GPL(blk_poll);
bbd7bb701   Jens Axboe   block: move poll ...
3683

9cf2bab63   Jens Axboe   block: kill reque...
3684
3685
3686
3687
3688
  unsigned int blk_mq_rq_cpu(struct request *rq)
  {
  	return rq->mq_ctx->cpu;
  }
  EXPORT_SYMBOL(blk_mq_rq_cpu);
320ae51fe   Jens Axboe   blk-mq: new multi...
3689
3690
  static int __init blk_mq_init(void)
  {
c3077b5d9   Christoph Hellwig   blk-mq: merge blk...
3691
3692
3693
3694
3695
3696
3697
3698
3699
  	int i;
  
  	for_each_possible_cpu(i)
  		INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
  	open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
  
  	cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,
  				  "block/softirq:dead", NULL,
  				  blk_softirq_cpu_dead);
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
3700
3701
  	cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
  				blk_mq_hctx_notify_dead);
bf0beec06   Ming Lei   blk-mq: drain I/O...
3702
3703
3704
  	cpuhp_setup_state_multi(CPUHP_AP_BLK_MQ_ONLINE, "block/mq:online",
  				blk_mq_hctx_notify_online,
  				blk_mq_hctx_notify_offline);
320ae51fe   Jens Axboe   blk-mq: new multi...
3705
3706
3707
  	return 0;
  }
  subsys_initcall(blk_mq_init);