Blame view

block/blk-mq.c 86.9 KB
3dcf60bcb   Christoph Hellwig   block: add SPDX t...
1
  // SPDX-License-Identifier: GPL-2.0
75bb4625b   Jens Axboe   blk-mq: add file ...
2
3
4
5
6
7
  /*
   * Block multiqueue core code
   *
   * Copyright (C) 2013-2014 Jens Axboe
   * Copyright (C) 2013-2014 Christoph Hellwig
   */
320ae51fe   Jens Axboe   blk-mq: new multi...
8
9
10
11
12
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/backing-dev.h>
  #include <linux/bio.h>
  #include <linux/blkdev.h>
f75782e4e   Catalin Marinas   block: kmemleak: ...
13
  #include <linux/kmemleak.h>
320ae51fe   Jens Axboe   blk-mq: new multi...
14
15
16
17
18
19
20
21
22
23
  #include <linux/mm.h>
  #include <linux/init.h>
  #include <linux/slab.h>
  #include <linux/workqueue.h>
  #include <linux/smp.h>
  #include <linux/llist.h>
  #include <linux/list_sort.h>
  #include <linux/cpu.h>
  #include <linux/cache.h>
  #include <linux/sched/sysctl.h>
105ab3d8c   Ingo Molnar   sched/headers: Pr...
24
  #include <linux/sched/topology.h>
174cd4b1e   Ingo Molnar   sched/headers: Pr...
25
  #include <linux/sched/signal.h>
320ae51fe   Jens Axboe   blk-mq: new multi...
26
  #include <linux/delay.h>
aedcd72f6   Jens Axboe   blk-mq: limit mem...
27
  #include <linux/crash_dump.h>
88c7b2b75   Jens Axboe   blk-mq: prefetch ...
28
  #include <linux/prefetch.h>
320ae51fe   Jens Axboe   blk-mq: new multi...
29
30
31
32
  
  #include <trace/events/block.h>
  
  #include <linux/blk-mq.h>
54d4e6ab9   Max Gurtovoy   block: centralize...
33
  #include <linux/t10-pi.h>
320ae51fe   Jens Axboe   blk-mq: new multi...
34
35
  #include "blk.h"
  #include "blk-mq.h"
9c1051aac   Omar Sandoval   blk-mq: untangle ...
36
  #include "blk-mq-debugfs.h"
320ae51fe   Jens Axboe   blk-mq: new multi...
37
  #include "blk-mq-tag.h"
986d413b7   Bart Van Assche   blk-mq: Enable su...
38
  #include "blk-pm.h"
cf43e6be8   Jens Axboe   block: add scalab...
39
  #include "blk-stat.h"
bd166ef18   Jens Axboe   blk-mq-sched: add...
40
  #include "blk-mq-sched.h"
c1c80384c   Josef Bacik   block: remove ext...
41
  #include "blk-rq-qos.h"
320ae51fe   Jens Axboe   blk-mq: new multi...
42

34dbad5d2   Omar Sandoval   blk-stat: convert...
43
44
  static void blk_mq_poll_stats_start(struct request_queue *q);
  static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
720b8ccc4   Stephen Bates   blk-mq: Add a pol...
45
46
  static int blk_mq_poll_stats_bkt(const struct request *rq)
  {
3d2443069   Hou Tao   block: make rq se...
47
  	int ddir, sectors, bucket;
720b8ccc4   Stephen Bates   blk-mq: Add a pol...
48

99c749a4c   Jens Axboe   blk-stat: kill bl...
49
  	ddir = rq_data_dir(rq);
3d2443069   Hou Tao   block: make rq se...
50
  	sectors = blk_rq_stats_sectors(rq);
720b8ccc4   Stephen Bates   blk-mq: Add a pol...
51

3d2443069   Hou Tao   block: make rq se...
52
  	bucket = ddir + 2 * ilog2(sectors);
720b8ccc4   Stephen Bates   blk-mq: Add a pol...
53
54
55
56
57
58
59
60
  
  	if (bucket < 0)
  		return -1;
  	else if (bucket >= BLK_MQ_POLL_STATS_BKTS)
  		return ddir + BLK_MQ_POLL_STATS_BKTS - 2;
  
  	return bucket;
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
61
  /*
85fae294e   Yufen Yu   blk-mq: update co...
62
63
   * Check if any of the ctx, dispatch list or elevator
   * have pending work in this hardware queue.
320ae51fe   Jens Axboe   blk-mq: new multi...
64
   */
79f720a75   Jens Axboe   blk-mq: only run ...
65
  static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
320ae51fe   Jens Axboe   blk-mq: new multi...
66
  {
79f720a75   Jens Axboe   blk-mq: only run ...
67
68
  	return !list_empty_careful(&hctx->dispatch) ||
  		sbitmap_any_bit_set(&hctx->ctx_map) ||
bd166ef18   Jens Axboe   blk-mq-sched: add...
69
  			blk_mq_sched_has_work(hctx);
1429d7c94   Jens Axboe   blk-mq: switch ct...
70
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
71
72
73
74
75
76
  /*
   * Mark this ctx as having pending work in this hardware queue
   */
  static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx,
  				     struct blk_mq_ctx *ctx)
  {
f31967f0e   Jens Axboe   blk-mq: allow sof...
77
78
79
80
  	const int bit = ctx->index_hw[hctx->type];
  
  	if (!sbitmap_test_bit(&hctx->ctx_map, bit))
  		sbitmap_set_bit(&hctx->ctx_map, bit);
1429d7c94   Jens Axboe   blk-mq: switch ct...
81
82
83
84
85
  }
  
  static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
  				      struct blk_mq_ctx *ctx)
  {
f31967f0e   Jens Axboe   blk-mq: allow sof...
86
87
88
  	const int bit = ctx->index_hw[hctx->type];
  
  	sbitmap_clear_bit(&hctx->ctx_map, bit);
320ae51fe   Jens Axboe   blk-mq: new multi...
89
  }
f299b7c7a   Jens Axboe   blk-mq: provide i...
90
91
92
93
  struct mq_inflight {
  	struct hd_struct *part;
  	unsigned int *inflight;
  };
7baa85727   Jens Axboe   blk-mq-tag: chang...
94
  static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
f299b7c7a   Jens Axboe   blk-mq: provide i...
95
96
97
98
  				  struct request *rq, void *priv,
  				  bool reserved)
  {
  	struct mq_inflight *mi = priv;
6131837b1   Omar Sandoval   blk-mq: count all...
99
  	/*
e016b7820   Mikulas Patocka   block: return jus...
100
  	 * index[0] counts the specific partition that was asked for.
6131837b1   Omar Sandoval   blk-mq: count all...
101
102
103
  	 */
  	if (rq->part == mi->part)
  		mi->inflight[0]++;
7baa85727   Jens Axboe   blk-mq-tag: chang...
104
105
  
  	return true;
f299b7c7a   Jens Axboe   blk-mq: provide i...
106
  }
e016b7820   Mikulas Patocka   block: return jus...
107
  unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part)
f299b7c7a   Jens Axboe   blk-mq: provide i...
108
  {
e016b7820   Mikulas Patocka   block: return jus...
109
  	unsigned inflight[2];
f299b7c7a   Jens Axboe   blk-mq: provide i...
110
  	struct mq_inflight mi = { .part = part, .inflight = inflight, };
b8d62b3a9   Jens Axboe   blk-mq: enable ch...
111
  	inflight[0] = inflight[1] = 0;
f299b7c7a   Jens Axboe   blk-mq: provide i...
112
  	blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
e016b7820   Mikulas Patocka   block: return jus...
113
114
  
  	return inflight[0];
f299b7c7a   Jens Axboe   blk-mq: provide i...
115
  }
7baa85727   Jens Axboe   blk-mq-tag: chang...
116
  static bool blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx,
bf0ddaba6   Omar Sandoval   blk-mq: fix sysfs...
117
118
119
120
121
122
123
  				     struct request *rq, void *priv,
  				     bool reserved)
  {
  	struct mq_inflight *mi = priv;
  
  	if (rq->part == mi->part)
  		mi->inflight[rq_data_dir(rq)]++;
7baa85727   Jens Axboe   blk-mq-tag: chang...
124
125
  
  	return true;
bf0ddaba6   Omar Sandoval   blk-mq: fix sysfs...
126
127
128
129
130
131
132
133
134
135
  }
  
  void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
  			 unsigned int inflight[2])
  {
  	struct mq_inflight mi = { .part = part, .inflight = inflight, };
  
  	inflight[0] = inflight[1] = 0;
  	blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_rw, &mi);
  }
1671d522c   Ming Lei   block: rename blk...
136
  void blk_freeze_queue_start(struct request_queue *q)
43a5e4e21   Ming Lei   block: blk-mq: su...
137
  {
7996a8b55   Bob Liu   blk-mq: fix hang ...
138
139
  	mutex_lock(&q->mq_freeze_lock);
  	if (++q->mq_freeze_depth == 1) {
3ef28e83a   Dan Williams   block: generic re...
140
  		percpu_ref_kill(&q->q_usage_counter);
7996a8b55   Bob Liu   blk-mq: fix hang ...
141
  		mutex_unlock(&q->mq_freeze_lock);
344e9ffcb   Jens Axboe   block: add queue_...
142
  		if (queue_is_mq(q))
055f6e18e   Ming Lei   block: Make q_usa...
143
  			blk_mq_run_hw_queues(q, false);
7996a8b55   Bob Liu   blk-mq: fix hang ...
144
145
  	} else {
  		mutex_unlock(&q->mq_freeze_lock);
cddd5d176   Tejun Heo   blk-mq: blk_mq_fr...
146
  	}
f3af020b9   Tejun Heo   blk-mq: make mq_q...
147
  }
1671d522c   Ming Lei   block: rename blk...
148
  EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
f3af020b9   Tejun Heo   blk-mq: make mq_q...
149

6bae363ee   Keith Busch   blk-mq: Export bl...
150
  void blk_mq_freeze_queue_wait(struct request_queue *q)
f3af020b9   Tejun Heo   blk-mq: make mq_q...
151
  {
3ef28e83a   Dan Williams   block: generic re...
152
  	wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
43a5e4e21   Ming Lei   block: blk-mq: su...
153
  }
6bae363ee   Keith Busch   blk-mq: Export bl...
154
  EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait);
43a5e4e21   Ming Lei   block: blk-mq: su...
155

f91328c40   Keith Busch   blk-mq: Provide f...
156
157
158
159
160
161
162
163
  int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
  				     unsigned long timeout)
  {
  	return wait_event_timeout(q->mq_freeze_wq,
  					percpu_ref_is_zero(&q->q_usage_counter),
  					timeout);
  }
  EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout);
43a5e4e21   Ming Lei   block: blk-mq: su...
164

f3af020b9   Tejun Heo   blk-mq: make mq_q...
165
166
167
168
  /*
   * Guarantee no request is in use, so we can change any data structure of
   * the queue afterward.
   */
3ef28e83a   Dan Williams   block: generic re...
169
  void blk_freeze_queue(struct request_queue *q)
f3af020b9   Tejun Heo   blk-mq: make mq_q...
170
  {
3ef28e83a   Dan Williams   block: generic re...
171
172
173
174
175
176
177
  	/*
  	 * In the !blk_mq case we are only calling this to kill the
  	 * q_usage_counter, otherwise this increases the freeze depth
  	 * and waits for it to return to zero.  For this reason there is
  	 * no blk_unfreeze_queue(), and blk_freeze_queue() is not
  	 * exported to drivers as the only user for unfreeze is blk_mq.
  	 */
1671d522c   Ming Lei   block: rename blk...
178
  	blk_freeze_queue_start(q);
f3af020b9   Tejun Heo   blk-mq: make mq_q...
179
180
  	blk_mq_freeze_queue_wait(q);
  }
3ef28e83a   Dan Williams   block: generic re...
181
182
183
184
185
186
187
188
189
  
  void blk_mq_freeze_queue(struct request_queue *q)
  {
  	/*
  	 * ...just an alias to keep freeze and unfreeze actions balanced
  	 * in the blk_mq_* namespace
  	 */
  	blk_freeze_queue(q);
  }
c761d96b0   Jens Axboe   blk-mq: export bl...
190
  EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
f3af020b9   Tejun Heo   blk-mq: make mq_q...
191

b4c6a0287   Keith Busch   blk-mq: Export fr...
192
  void blk_mq_unfreeze_queue(struct request_queue *q)
320ae51fe   Jens Axboe   blk-mq: new multi...
193
  {
7996a8b55   Bob Liu   blk-mq: fix hang ...
194
195
196
197
  	mutex_lock(&q->mq_freeze_lock);
  	q->mq_freeze_depth--;
  	WARN_ON_ONCE(q->mq_freeze_depth < 0);
  	if (!q->mq_freeze_depth) {
bdd631609   Bart Van Assche   block: Allow unfr...
198
  		percpu_ref_resurrect(&q->q_usage_counter);
320ae51fe   Jens Axboe   blk-mq: new multi...
199
  		wake_up_all(&q->mq_freeze_wq);
add703fda   Tejun Heo   blk-mq: use percp...
200
  	}
7996a8b55   Bob Liu   blk-mq: fix hang ...
201
  	mutex_unlock(&q->mq_freeze_lock);
320ae51fe   Jens Axboe   blk-mq: new multi...
202
  }
b4c6a0287   Keith Busch   blk-mq: Export fr...
203
  EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
320ae51fe   Jens Axboe   blk-mq: new multi...
204

852ec8098   Bart Van Assche   blk-mq: Make it s...
205
206
207
208
209
210
  /*
   * FIXME: replace the scsi_internal_device_*block_nowait() calls in the
   * mpt3sas driver such that this function can be removed.
   */
  void blk_mq_quiesce_queue_nowait(struct request_queue *q)
  {
8814ce8a0   Bart Van Assche   block: Introduce ...
211
  	blk_queue_flag_set(QUEUE_FLAG_QUIESCED, q);
852ec8098   Bart Van Assche   blk-mq: Make it s...
212
213
  }
  EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
6a83e74d2   Bart Van Assche   blk-mq: Introduce...
214
  /**
69e07c4ad   Ming Lei   blk-mq: update co...
215
   * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
6a83e74d2   Bart Van Assche   blk-mq: Introduce...
216
217
218
   * @q: request queue.
   *
   * Note: this function does not prevent that the struct request end_io()
69e07c4ad   Ming Lei   blk-mq: update co...
219
220
221
   * callback function is invoked. Once this function is returned, we make
   * sure no dispatch can happen until the queue is unquiesced via
   * blk_mq_unquiesce_queue().
6a83e74d2   Bart Van Assche   blk-mq: Introduce...
222
223
224
225
226
227
   */
  void blk_mq_quiesce_queue(struct request_queue *q)
  {
  	struct blk_mq_hw_ctx *hctx;
  	unsigned int i;
  	bool rcu = false;
1d9e9bc6b   Ming Lei   blk-mq: don't sto...
228
  	blk_mq_quiesce_queue_nowait(q);
f4560ffe8   Ming Lei   blk-mq: use QUEUE...
229

6a83e74d2   Bart Van Assche   blk-mq: Introduce...
230
231
  	queue_for_each_hw_ctx(q, hctx, i) {
  		if (hctx->flags & BLK_MQ_F_BLOCKING)
05707b64a   Tejun Heo   blk-mq: rename bl...
232
  			synchronize_srcu(hctx->srcu);
6a83e74d2   Bart Van Assche   blk-mq: Introduce...
233
234
235
236
237
238
239
  		else
  			rcu = true;
  	}
  	if (rcu)
  		synchronize_rcu();
  }
  EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
e4e739131   Ming Lei   blk-mq: introduce...
240
241
242
243
244
245
246
247
248
  /*
   * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
   * @q: request queue.
   *
   * This function recovers queue into the state before quiescing
   * which is done by blk_mq_quiesce_queue.
   */
  void blk_mq_unquiesce_queue(struct request_queue *q)
  {
8814ce8a0   Bart Van Assche   block: Introduce ...
249
  	blk_queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
f4560ffe8   Ming Lei   blk-mq: use QUEUE...
250

1d9e9bc6b   Ming Lei   blk-mq: don't sto...
251
252
  	/* dispatch requests which are inserted during quiescing */
  	blk_mq_run_hw_queues(q, true);
e4e739131   Ming Lei   blk-mq: introduce...
253
254
  }
  EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
aed3ea94b   Jens Axboe   block: wake up wa...
255
256
257
258
259
260
261
262
263
  void blk_mq_wake_waiters(struct request_queue *q)
  {
  	struct blk_mq_hw_ctx *hctx;
  	unsigned int i;
  
  	queue_for_each_hw_ctx(q, hctx, i)
  		if (blk_mq_hw_queue_mapped(hctx))
  			blk_mq_tag_wakeup_all(hctx->tags, true);
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
264
265
266
267
268
  bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
  {
  	return blk_mq_has_free_tags(hctx->tags);
  }
  EXPORT_SYMBOL(blk_mq_can_queue);
fe1f45264   Jens Axboe   blk-mq: don't cal...
269
  /*
9a91b05bb   Hou Tao   block: also check...
270
271
   * Only need start/end time stamping if we have iostat or
   * blk stats enabled, or using an IO scheduler.
fe1f45264   Jens Axboe   blk-mq: don't cal...
272
273
274
   */
  static inline bool blk_mq_need_time_stamp(struct request *rq)
  {
9a91b05bb   Hou Tao   block: also check...
275
  	return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS)) || rq->q->elevator;
fe1f45264   Jens Axboe   blk-mq: don't cal...
276
  }
e4cdf1a1c   Christoph Hellwig   blk-mq: remove __...
277
  static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
6f816b4b7   Tejun Heo   blk-mq: add optio...
278
  		unsigned int tag, unsigned int op, u64 alloc_time_ns)
320ae51fe   Jens Axboe   blk-mq: new multi...
279
  {
e4cdf1a1c   Christoph Hellwig   blk-mq: remove __...
280
281
  	struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
  	struct request *rq = tags->static_rqs[tag];
bf9ae8c53   Jens Axboe   blk-mq: fix bad c...
282
  	req_flags_t rq_flags = 0;
c3a148d20   Bart Van Assche   blk-mq: Initializ...
283

e4cdf1a1c   Christoph Hellwig   blk-mq: remove __...
284
285
286
287
  	if (data->flags & BLK_MQ_REQ_INTERNAL) {
  		rq->tag = -1;
  		rq->internal_tag = tag;
  	} else {
d263ed992   Jianchao Wang   blk-mq: count the...
288
  		if (data->hctx->flags & BLK_MQ_F_TAG_SHARED) {
bf9ae8c53   Jens Axboe   blk-mq: fix bad c...
289
  			rq_flags = RQF_MQ_INFLIGHT;
e4cdf1a1c   Christoph Hellwig   blk-mq: remove __...
290
291
292
293
294
295
  			atomic_inc(&data->hctx->nr_active);
  		}
  		rq->tag = tag;
  		rq->internal_tag = -1;
  		data->hctx->tags->rqs[rq->tag] = rq;
  	}
af76e555e   Christoph Hellwig   blk-mq: initializ...
296
  	/* csd/requeue_work/fifo_time is initialized before use */
e4cdf1a1c   Christoph Hellwig   blk-mq: remove __...
297
298
  	rq->q = data->q;
  	rq->mq_ctx = data->ctx;
ea4f995ee   Jens Axboe   blk-mq: cache req...
299
  	rq->mq_hctx = data->hctx;
bf9ae8c53   Jens Axboe   blk-mq: fix bad c...
300
  	rq->rq_flags = rq_flags;
ef295ecf0   Christoph Hellwig   block: better op ...
301
  	rq->cmd_flags = op;
1b6d65a0b   Bart Van Assche   block: Introduce ...
302
303
  	if (data->flags & BLK_MQ_REQ_PREEMPT)
  		rq->rq_flags |= RQF_PREEMPT;
e4cdf1a1c   Christoph Hellwig   blk-mq: remove __...
304
  	if (blk_queue_io_stat(data->q))
e80640213   Christoph Hellwig   block: split out ...
305
  		rq->rq_flags |= RQF_IO_STAT;
7c3fb70f0   Jens Axboe   block: rearrange ...
306
  	INIT_LIST_HEAD(&rq->queuelist);
af76e555e   Christoph Hellwig   blk-mq: initializ...
307
308
  	INIT_HLIST_NODE(&rq->hash);
  	RB_CLEAR_NODE(&rq->rb_node);
af76e555e   Christoph Hellwig   blk-mq: initializ...
309
310
  	rq->rq_disk = NULL;
  	rq->part = NULL;
6f816b4b7   Tejun Heo   blk-mq: add optio...
311
312
313
  #ifdef CONFIG_BLK_RQ_ALLOC_TIME
  	rq->alloc_time_ns = alloc_time_ns;
  #endif
fe1f45264   Jens Axboe   blk-mq: don't cal...
314
315
316
317
  	if (blk_mq_need_time_stamp(rq))
  		rq->start_time_ns = ktime_get_ns();
  	else
  		rq->start_time_ns = 0;
544ccc8dc   Omar Sandoval   block: get rid of...
318
  	rq->io_start_time_ns = 0;
3d2443069   Hou Tao   block: make rq se...
319
  	rq->stats_sectors = 0;
af76e555e   Christoph Hellwig   blk-mq: initializ...
320
321
322
323
  	rq->nr_phys_segments = 0;
  #if defined(CONFIG_BLK_DEV_INTEGRITY)
  	rq->nr_integrity_segments = 0;
  #endif
af76e555e   Christoph Hellwig   blk-mq: initializ...
324
  	/* tag was already set */
af76e555e   Christoph Hellwig   blk-mq: initializ...
325
  	rq->extra_len = 0;
079076b34   Christoph Hellwig   block: remove dea...
326
  	WRITE_ONCE(rq->deadline, 0);
af76e555e   Christoph Hellwig   blk-mq: initializ...
327

f6be4fb4b   Jens Axboe   blk-mq: ->timeout...
328
  	rq->timeout = 0;
af76e555e   Christoph Hellwig   blk-mq: initializ...
329
330
  	rq->end_io = NULL;
  	rq->end_io_data = NULL;
af76e555e   Christoph Hellwig   blk-mq: initializ...
331

e4cdf1a1c   Christoph Hellwig   blk-mq: remove __...
332
  	data->ctx->rq_dispatched[op_is_sync(op)]++;
12f5b9314   Keith Busch   blk-mq: Remove ge...
333
  	refcount_set(&rq->ref, 1);
e4cdf1a1c   Christoph Hellwig   blk-mq: remove __...
334
  	return rq;
5dee85772   Christoph Hellwig   blk-mq: initializ...
335
  }
d2c0d3832   Christoph Hellwig   blk-mq: move blk_...
336
  static struct request *blk_mq_get_request(struct request_queue *q,
f9afca4d3   Jens Axboe   blk-mq: pass in r...
337
338
  					  struct bio *bio,
  					  struct blk_mq_alloc_data *data)
d2c0d3832   Christoph Hellwig   blk-mq: move blk_...
339
340
341
  {
  	struct elevator_queue *e = q->elevator;
  	struct request *rq;
e4cdf1a1c   Christoph Hellwig   blk-mq: remove __...
342
  	unsigned int tag;
c05f42206   Bart Van Assche   blk-mq: remove bl...
343
  	bool clear_ctx_on_error = false;
6f816b4b7   Tejun Heo   blk-mq: add optio...
344
  	u64 alloc_time_ns = 0;
d2c0d3832   Christoph Hellwig   blk-mq: move blk_...
345
346
  
  	blk_queue_enter_live(q);
6f816b4b7   Tejun Heo   blk-mq: add optio...
347
348
349
350
  
  	/* alloc_time includes depth and tag waits */
  	if (blk_queue_rq_alloc_time(q))
  		alloc_time_ns = ktime_get_ns();
d2c0d3832   Christoph Hellwig   blk-mq: move blk_...
351
  	data->q = q;
21e768b44   Bart Van Assche   blk-mq: Make blk_...
352
353
  	if (likely(!data->ctx)) {
  		data->ctx = blk_mq_get_ctx(q);
c05f42206   Bart Van Assche   blk-mq: remove bl...
354
  		clear_ctx_on_error = true;
21e768b44   Bart Van Assche   blk-mq: Make blk_...
355
  	}
d2c0d3832   Christoph Hellwig   blk-mq: move blk_...
356
  	if (likely(!data->hctx))
f9afca4d3   Jens Axboe   blk-mq: pass in r...
357
  		data->hctx = blk_mq_map_queue(q, data->cmd_flags,
8ccdf4a37   Jianchao Wang   blk-mq: save queu...
358
  						data->ctx);
f9afca4d3   Jens Axboe   blk-mq: pass in r...
359
  	if (data->cmd_flags & REQ_NOWAIT)
03a07c92a   Goldwyn Rodrigues   block: return on ...
360
  		data->flags |= BLK_MQ_REQ_NOWAIT;
d2c0d3832   Christoph Hellwig   blk-mq: move blk_...
361
362
363
364
365
366
  
  	if (e) {
  		data->flags |= BLK_MQ_REQ_INTERNAL;
  
  		/*
  		 * Flush requests are special and go directly to the
17a511993   Jens Axboe   blk-mq: don't cal...
367
368
  		 * dispatch list. Don't include reserved tags in the
  		 * limiting, as it isn't useful.
d2c0d3832   Christoph Hellwig   blk-mq: move blk_...
369
  		 */
f9afca4d3   Jens Axboe   blk-mq: pass in r...
370
371
  		if (!op_is_flush(data->cmd_flags) &&
  		    e->type->ops.limit_depth &&
17a511993   Jens Axboe   blk-mq: don't cal...
372
  		    !(data->flags & BLK_MQ_REQ_RESERVED))
f9afca4d3   Jens Axboe   blk-mq: pass in r...
373
  			e->type->ops.limit_depth(data->cmd_flags, data);
d263ed992   Jianchao Wang   blk-mq: count the...
374
375
  	} else {
  		blk_mq_tag_busy(data->hctx);
d2c0d3832   Christoph Hellwig   blk-mq: move blk_...
376
  	}
e4cdf1a1c   Christoph Hellwig   blk-mq: remove __...
377
378
  	tag = blk_mq_get_tag(data);
  	if (tag == BLK_MQ_TAG_FAIL) {
c05f42206   Bart Van Assche   blk-mq: remove bl...
379
  		if (clear_ctx_on_error)
1ad43c007   Ming Lei   blk-mq: don't lea...
380
  			data->ctx = NULL;
037cebb85   Christoph Hellwig   blk-mq: streamlin...
381
382
  		blk_queue_exit(q);
  		return NULL;
d2c0d3832   Christoph Hellwig   blk-mq: move blk_...
383
  	}
6f816b4b7   Tejun Heo   blk-mq: add optio...
384
  	rq = blk_mq_rq_ctx_init(data, tag, data->cmd_flags, alloc_time_ns);
f9afca4d3   Jens Axboe   blk-mq: pass in r...
385
  	if (!op_is_flush(data->cmd_flags)) {
037cebb85   Christoph Hellwig   blk-mq: streamlin...
386
  		rq->elv.icq = NULL;
f9cd4bfe9   Jens Axboe   block: get rid of...
387
  		if (e && e->type->ops.prepare_request) {
e2b3fa5af   Damien Le Moal   block: Remove bio...
388
389
  			if (e->type->icq_cache)
  				blk_mq_sched_assign_ioc(rq);
44e8c2bff   Christoph Hellwig   blk-mq: refactor ...
390

f9cd4bfe9   Jens Axboe   block: get rid of...
391
  			e->type->ops.prepare_request(rq, bio);
5bbf4e5a8   Christoph Hellwig   blk-mq-sched: uni...
392
  			rq->rq_flags |= RQF_ELVPRIV;
44e8c2bff   Christoph Hellwig   blk-mq: refactor ...
393
  		}
037cebb85   Christoph Hellwig   blk-mq: streamlin...
394
395
396
  	}
  	data->hctx->queued++;
  	return rq;
d2c0d3832   Christoph Hellwig   blk-mq: move blk_...
397
  }
cd6ce1482   Bart Van Assche   block: Make reque...
398
  struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
9a95e4ef7   Bart Van Assche   block, nvme: Intr...
399
  		blk_mq_req_flags_t flags)
320ae51fe   Jens Axboe   blk-mq: new multi...
400
  {
f9afca4d3   Jens Axboe   blk-mq: pass in r...
401
  	struct blk_mq_alloc_data alloc_data = { .flags = flags, .cmd_flags = op };
bd166ef18   Jens Axboe   blk-mq-sched: add...
402
  	struct request *rq;
a492f0754   Joe Lawrence   block,scsi: fixup...
403
  	int ret;
320ae51fe   Jens Axboe   blk-mq: new multi...
404

3a0a52997   Bart Van Assche   block, scsi: Make...
405
  	ret = blk_queue_enter(q, flags);
a492f0754   Joe Lawrence   block,scsi: fixup...
406
407
  	if (ret)
  		return ERR_PTR(ret);
320ae51fe   Jens Axboe   blk-mq: new multi...
408

f9afca4d3   Jens Axboe   blk-mq: pass in r...
409
  	rq = blk_mq_get_request(q, NULL, &alloc_data);
3280d66a6   Keith Busch   blk-mq: Fix queue...
410
  	blk_queue_exit(q);
841bac2c8   Jens Axboe   blk-mq: get rid o...
411

bd166ef18   Jens Axboe   blk-mq-sched: add...
412
  	if (!rq)
a492f0754   Joe Lawrence   block,scsi: fixup...
413
  		return ERR_PTR(-EWOULDBLOCK);
0c4de0f33   Christoph Hellwig   block: ensure bio...
414
415
416
417
  
  	rq->__data_len = 0;
  	rq->__sector = (sector_t) -1;
  	rq->bio = rq->biotail = NULL;
320ae51fe   Jens Axboe   blk-mq: new multi...
418
419
  	return rq;
  }
4bb659b15   Jens Axboe   blk-mq: implement...
420
  EXPORT_SYMBOL(blk_mq_alloc_request);
320ae51fe   Jens Axboe   blk-mq: new multi...
421

cd6ce1482   Bart Van Assche   block: Make reque...
422
  struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
9a95e4ef7   Bart Van Assche   block, nvme: Intr...
423
  	unsigned int op, blk_mq_req_flags_t flags, unsigned int hctx_idx)
1f5bd336b   Ming Lin   blk-mq: add blk_m...
424
  {
f9afca4d3   Jens Axboe   blk-mq: pass in r...
425
  	struct blk_mq_alloc_data alloc_data = { .flags = flags, .cmd_flags = op };
1f5bd336b   Ming Lin   blk-mq: add blk_m...
426
  	struct request *rq;
6d2809d51   Omar Sandoval   blk-mq: make blk_...
427
  	unsigned int cpu;
1f5bd336b   Ming Lin   blk-mq: add blk_m...
428
429
430
431
432
433
434
435
436
437
438
439
440
  	int ret;
  
  	/*
  	 * If the tag allocator sleeps we could get an allocation for a
  	 * different hardware context.  No need to complicate the low level
  	 * allocator for this for the rare use case of a command tied to
  	 * a specific queue.
  	 */
  	if (WARN_ON_ONCE(!(flags & BLK_MQ_REQ_NOWAIT)))
  		return ERR_PTR(-EINVAL);
  
  	if (hctx_idx >= q->nr_hw_queues)
  		return ERR_PTR(-EIO);
3a0a52997   Bart Van Assche   block, scsi: Make...
441
  	ret = blk_queue_enter(q, flags);
1f5bd336b   Ming Lin   blk-mq: add blk_m...
442
443
  	if (ret)
  		return ERR_PTR(ret);
c8712c6a6   Christoph Hellwig   blk-mq: skip unma...
444
445
446
447
  	/*
  	 * Check if the hardware context is actually mapped to anything.
  	 * If not tell the caller that it should skip this queue.
  	 */
6d2809d51   Omar Sandoval   blk-mq: make blk_...
448
449
450
451
  	alloc_data.hctx = q->queue_hw_ctx[hctx_idx];
  	if (!blk_mq_hw_queue_mapped(alloc_data.hctx)) {
  		blk_queue_exit(q);
  		return ERR_PTR(-EXDEV);
c8712c6a6   Christoph Hellwig   blk-mq: skip unma...
452
  	}
20e4d8139   Christoph Hellwig   blk-mq: simplify ...
453
  	cpu = cpumask_first_and(alloc_data.hctx->cpumask, cpu_online_mask);
6d2809d51   Omar Sandoval   blk-mq: make blk_...
454
  	alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
1f5bd336b   Ming Lin   blk-mq: add blk_m...
455

f9afca4d3   Jens Axboe   blk-mq: pass in r...
456
  	rq = blk_mq_get_request(q, NULL, &alloc_data);
3280d66a6   Keith Busch   blk-mq: Fix queue...
457
  	blk_queue_exit(q);
c8712c6a6   Christoph Hellwig   blk-mq: skip unma...
458

6d2809d51   Omar Sandoval   blk-mq: make blk_...
459
460
461
462
  	if (!rq)
  		return ERR_PTR(-EWOULDBLOCK);
  
  	return rq;
1f5bd336b   Ming Lin   blk-mq: add blk_m...
463
464
  }
  EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
12f5b9314   Keith Busch   blk-mq: Remove ge...
465
466
467
468
  static void __blk_mq_free_request(struct request *rq)
  {
  	struct request_queue *q = rq->q;
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
ea4f995ee   Jens Axboe   blk-mq: cache req...
469
  	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
12f5b9314   Keith Busch   blk-mq: Remove ge...
470
  	const int sched_tag = rq->internal_tag;
986d413b7   Bart Van Assche   blk-mq: Enable su...
471
  	blk_pm_mark_last_busy(rq);
ea4f995ee   Jens Axboe   blk-mq: cache req...
472
  	rq->mq_hctx = NULL;
12f5b9314   Keith Busch   blk-mq: Remove ge...
473
474
475
476
477
478
479
  	if (rq->tag != -1)
  		blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
  	if (sched_tag != -1)
  		blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag);
  	blk_mq_sched_restart(hctx);
  	blk_queue_exit(q);
  }
6af54051a   Christoph Hellwig   blk-mq: simplify ...
480
  void blk_mq_free_request(struct request *rq)
320ae51fe   Jens Axboe   blk-mq: new multi...
481
  {
320ae51fe   Jens Axboe   blk-mq: new multi...
482
  	struct request_queue *q = rq->q;
6af54051a   Christoph Hellwig   blk-mq: simplify ...
483
484
  	struct elevator_queue *e = q->elevator;
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
ea4f995ee   Jens Axboe   blk-mq: cache req...
485
  	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
6af54051a   Christoph Hellwig   blk-mq: simplify ...
486

5bbf4e5a8   Christoph Hellwig   blk-mq-sched: uni...
487
  	if (rq->rq_flags & RQF_ELVPRIV) {
f9cd4bfe9   Jens Axboe   block: get rid of...
488
489
  		if (e && e->type->ops.finish_request)
  			e->type->ops.finish_request(rq);
6af54051a   Christoph Hellwig   blk-mq: simplify ...
490
491
492
493
494
  		if (rq->elv.icq) {
  			put_io_context(rq->elv.icq->ioc);
  			rq->elv.icq = NULL;
  		}
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
495

6af54051a   Christoph Hellwig   blk-mq: simplify ...
496
  	ctx->rq_completed[rq_is_sync(rq)]++;
e80640213   Christoph Hellwig   block: split out ...
497
  	if (rq->rq_flags & RQF_MQ_INFLIGHT)
0d2602ca3   Jens Axboe   blk-mq: improve s...
498
  		atomic_dec(&hctx->nr_active);
87760e5ee   Jens Axboe   block: hook up wr...
499

7beb2f845   Jens Axboe   blk-mq: wire up c...
500
501
  	if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
  		laptop_io_completion(q->backing_dev_info);
a79050434   Josef Bacik   blk-rq-qos: refac...
502
  	rq_qos_done(q, rq);
0d2602ca3   Jens Axboe   blk-mq: improve s...
503

12f5b9314   Keith Busch   blk-mq: Remove ge...
504
505
506
  	WRITE_ONCE(rq->state, MQ_RQ_IDLE);
  	if (refcount_dec_and_test(&rq->ref))
  		__blk_mq_free_request(rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
507
  }
1a3b595a2   Jens Axboe   blk-mq: export bl...
508
  EXPORT_SYMBOL_GPL(blk_mq_free_request);
320ae51fe   Jens Axboe   blk-mq: new multi...
509

2a842acab   Christoph Hellwig   block: introduce ...
510
  inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
320ae51fe   Jens Axboe   blk-mq: new multi...
511
  {
fe1f45264   Jens Axboe   blk-mq: don't cal...
512
513
514
515
  	u64 now = 0;
  
  	if (blk_mq_need_time_stamp(rq))
  		now = ktime_get_ns();
522a77756   Omar Sandoval   block: consolidat...
516

4bc6339a5   Omar Sandoval   block: move blk_s...
517
518
  	if (rq->rq_flags & RQF_STATS) {
  		blk_mq_poll_stats_start(rq->q);
522a77756   Omar Sandoval   block: consolidat...
519
  		blk_stat_add(rq, now);
4bc6339a5   Omar Sandoval   block: move blk_s...
520
  	}
ed88660a5   Omar Sandoval   block: move call ...
521
522
  	if (rq->internal_tag != -1)
  		blk_mq_sched_completed_request(rq, now);
522a77756   Omar Sandoval   block: consolidat...
523
  	blk_account_io_done(rq, now);
0d11e6aca   Ming Lei   blk-mq: fix use-a...
524

91b63639c   Christoph Hellwig   blk-mq: bidi support
525
  	if (rq->end_io) {
a79050434   Josef Bacik   blk-rq-qos: refac...
526
  		rq_qos_done(rq->q, rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
527
  		rq->end_io(rq, error);
91b63639c   Christoph Hellwig   blk-mq: bidi support
528
  	} else {
320ae51fe   Jens Axboe   blk-mq: new multi...
529
  		blk_mq_free_request(rq);
91b63639c   Christoph Hellwig   blk-mq: bidi support
530
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
531
  }
c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
532
  EXPORT_SYMBOL(__blk_mq_end_request);
63151a449   Christoph Hellwig   blk-mq: allow dri...
533

2a842acab   Christoph Hellwig   block: introduce ...
534
  void blk_mq_end_request(struct request *rq, blk_status_t error)
63151a449   Christoph Hellwig   blk-mq: allow dri...
535
536
537
  {
  	if (blk_update_request(rq, error, blk_rq_bytes(rq)))
  		BUG();
c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
538
  	__blk_mq_end_request(rq, error);
63151a449   Christoph Hellwig   blk-mq: allow dri...
539
  }
c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
540
  EXPORT_SYMBOL(blk_mq_end_request);
320ae51fe   Jens Axboe   blk-mq: new multi...
541

30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
542
  static void __blk_mq_complete_request_remote(void *data)
320ae51fe   Jens Axboe   blk-mq: new multi...
543
  {
3d6efbf62   Christoph Hellwig   blk-mq: use __smp...
544
  	struct request *rq = data;
c7bb9ad17   Jens Axboe   block: get rid of...
545
  	struct request_queue *q = rq->q;
320ae51fe   Jens Axboe   blk-mq: new multi...
546

c7bb9ad17   Jens Axboe   block: get rid of...
547
  	q->mq_ops->complete(rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
548
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
549

453f83418   Christoph Hellwig   blk-mq: simplify ...
550
  static void __blk_mq_complete_request(struct request *rq)
320ae51fe   Jens Axboe   blk-mq: new multi...
551
552
  {
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
c7bb9ad17   Jens Axboe   block: get rid of...
553
  	struct request_queue *q = rq->q;
385352016   Christoph Hellwig   blk-mq: respect r...
554
  	bool shared = false;
320ae51fe   Jens Axboe   blk-mq: new multi...
555
  	int cpu;
af78ff7c6   Keith Busch   blk-mq: Simplify ...
556
  	WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
36e765392   Ming Lei   blk-mq: complete ...
557
558
559
560
561
562
563
564
565
  	/*
  	 * Most of single queue controllers, there is only one irq vector
  	 * for handling IO completion, and the only irq's affinity is set
  	 * as all possible CPUs. On most of ARCHs, this affinity means the
  	 * irq is handled on one specific CPU.
  	 *
  	 * So complete IO reqeust in softirq context in case of single queue
  	 * for not degrading IO performance by irqsoff latency.
  	 */
c7bb9ad17   Jens Axboe   block: get rid of...
566
  	if (q->nr_hw_queues == 1) {
36e765392   Ming Lei   blk-mq: complete ...
567
568
569
  		__blk_complete_request(rq);
  		return;
  	}
4ab32bf33   Jens Axboe   blk-mq: never red...
570
571
572
573
574
575
  	/*
  	 * For a polled request, always complete locallly, it's pointless
  	 * to redirect the completion.
  	 */
  	if ((rq->cmd_flags & REQ_HIPRI) ||
  	    !test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) {
c7bb9ad17   Jens Axboe   block: get rid of...
576
  		q->mq_ops->complete(rq);
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
577
578
  		return;
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
579
580
  
  	cpu = get_cpu();
c7bb9ad17   Jens Axboe   block: get rid of...
581
  	if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags))
385352016   Christoph Hellwig   blk-mq: respect r...
582
583
584
  		shared = cpus_share_cache(cpu, ctx->cpu);
  
  	if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
585
  		rq->csd.func = __blk_mq_complete_request_remote;
3d6efbf62   Christoph Hellwig   blk-mq: use __smp...
586
587
  		rq->csd.info = rq;
  		rq->csd.flags = 0;
c46fff2a3   Frederic Weisbecker   smp: Rename __smp...
588
  		smp_call_function_single_async(ctx->cpu, &rq->csd);
3d6efbf62   Christoph Hellwig   blk-mq: use __smp...
589
  	} else {
c7bb9ad17   Jens Axboe   block: get rid of...
590
  		q->mq_ops->complete(rq);
3d6efbf62   Christoph Hellwig   blk-mq: use __smp...
591
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
592
593
  	put_cpu();
  }
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
594

04ced159c   Jens Axboe   blk-mq: move hctx...
595
  static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx)
b7435db8b   Bart Van Assche   blk-mq: Add locki...
596
  	__releases(hctx->srcu)
04ced159c   Jens Axboe   blk-mq: move hctx...
597
598
599
600
  {
  	if (!(hctx->flags & BLK_MQ_F_BLOCKING))
  		rcu_read_unlock();
  	else
05707b64a   Tejun Heo   blk-mq: rename bl...
601
  		srcu_read_unlock(hctx->srcu, srcu_idx);
04ced159c   Jens Axboe   blk-mq: move hctx...
602
603
604
  }
  
  static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx)
b7435db8b   Bart Van Assche   blk-mq: Add locki...
605
  	__acquires(hctx->srcu)
04ced159c   Jens Axboe   blk-mq: move hctx...
606
  {
08b5a6e2a   Jens Axboe   blk-mq: silence f...
607
608
609
  	if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
  		/* shut up gcc false positive */
  		*srcu_idx = 0;
04ced159c   Jens Axboe   blk-mq: move hctx...
610
  		rcu_read_lock();
08b5a6e2a   Jens Axboe   blk-mq: silence f...
611
  	} else
05707b64a   Tejun Heo   blk-mq: rename bl...
612
  		*srcu_idx = srcu_read_lock(hctx->srcu);
04ced159c   Jens Axboe   blk-mq: move hctx...
613
  }
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
614
615
616
617
618
619
620
621
  /**
   * blk_mq_complete_request - end I/O on a request
   * @rq:		the request being processed
   *
   * Description:
   *	Ends all I/O on a request. It does not handle partial completions.
   *	The actual completion happens out-of-order, through a IPI handler.
   **/
16c15eb16   Keith Busch   blk-mq: Return tr...
622
  bool blk_mq_complete_request(struct request *rq)
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
623
  {
12f5b9314   Keith Busch   blk-mq: Remove ge...
624
  	if (unlikely(blk_should_fake_timeout(rq->q)))
16c15eb16   Keith Busch   blk-mq: Return tr...
625
  		return false;
12f5b9314   Keith Busch   blk-mq: Remove ge...
626
  	__blk_mq_complete_request(rq);
16c15eb16   Keith Busch   blk-mq: Return tr...
627
  	return true;
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
628
629
  }
  EXPORT_SYMBOL(blk_mq_complete_request);
320ae51fe   Jens Axboe   blk-mq: new multi...
630

973c01919   Keith Busch   blk-mq: Export if...
631
632
  int blk_mq_request_started(struct request *rq)
  {
5a61c3639   Tejun Heo   blk-mq: remove RE...
633
  	return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
973c01919   Keith Busch   blk-mq: Export if...
634
635
  }
  EXPORT_SYMBOL_GPL(blk_mq_request_started);
aa306ab70   Ming Lei   blk-mq: introduce...
636
637
638
639
640
  int blk_mq_request_completed(struct request *rq)
  {
  	return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE;
  }
  EXPORT_SYMBOL_GPL(blk_mq_request_completed);
e2490073c   Christoph Hellwig   blk-mq: call blk_...
641
  void blk_mq_start_request(struct request *rq)
320ae51fe   Jens Axboe   blk-mq: new multi...
642
643
644
645
  {
  	struct request_queue *q = rq->q;
  
  	trace_block_rq_issue(q, rq);
cf43e6be8   Jens Axboe   block: add scalab...
646
  	if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
544ccc8dc   Omar Sandoval   block: get rid of...
647
  		rq->io_start_time_ns = ktime_get_ns();
3d2443069   Hou Tao   block: make rq se...
648
  		rq->stats_sectors = blk_rq_sectors(rq);
cf43e6be8   Jens Axboe   block: add scalab...
649
  		rq->rq_flags |= RQF_STATS;
a79050434   Josef Bacik   blk-rq-qos: refac...
650
  		rq_qos_issue(q, rq);
cf43e6be8   Jens Axboe   block: add scalab...
651
  	}
1d9bd5161   Tejun Heo   blk-mq: replace t...
652
  	WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);
538b75341   Jens Axboe   blk-mq: request d...
653

1d9bd5161   Tejun Heo   blk-mq: replace t...
654
  	blk_add_timer(rq);
12f5b9314   Keith Busch   blk-mq: Remove ge...
655
  	WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT);
49f5baa51   Christoph Hellwig   blk-mq: pair blk_...
656
657
658
659
660
661
662
663
664
  
  	if (q->dma_drain_size && blk_rq_bytes(rq)) {
  		/*
  		 * Make sure space for the drain appears.  We know we can do
  		 * this because max_hw_segments has been adjusted to be one
  		 * fewer than the device can handle.
  		 */
  		rq->nr_phys_segments++;
  	}
54d4e6ab9   Max Gurtovoy   block: centralize...
665
666
667
668
669
  
  #ifdef CONFIG_BLK_DEV_INTEGRITY
  	if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE)
  		q->integrity.profile->prepare_fn(rq);
  #endif
320ae51fe   Jens Axboe   blk-mq: new multi...
670
  }
e2490073c   Christoph Hellwig   blk-mq: call blk_...
671
  EXPORT_SYMBOL(blk_mq_start_request);
320ae51fe   Jens Axboe   blk-mq: new multi...
672

ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
673
  static void __blk_mq_requeue_request(struct request *rq)
320ae51fe   Jens Axboe   blk-mq: new multi...
674
675
  {
  	struct request_queue *q = rq->q;
923218f61   Ming Lei   blk-mq: don't all...
676
  	blk_mq_put_driver_tag(rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
677
  	trace_block_rq_requeue(q, rq);
a79050434   Josef Bacik   blk-rq-qos: refac...
678
  	rq_qos_requeue(q, rq);
49f5baa51   Christoph Hellwig   blk-mq: pair blk_...
679

12f5b9314   Keith Busch   blk-mq: Remove ge...
680
681
  	if (blk_mq_request_started(rq)) {
  		WRITE_ONCE(rq->state, MQ_RQ_IDLE);
da6612673   Christoph Hellwig   blk-mq: don't tim...
682
  		rq->rq_flags &= ~RQF_TIMED_OUT;
e2490073c   Christoph Hellwig   blk-mq: call blk_...
683
684
685
  		if (q->dma_drain_size && blk_rq_bytes(rq))
  			rq->nr_phys_segments--;
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
686
  }
2b053aca7   Bart Van Assche   blk-mq: Add a kic...
687
  void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
688
  {
ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
689
  	__blk_mq_requeue_request(rq);
ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
690

105976f51   Ming Lei   blk-mq: don't cal...
691
692
  	/* this request will be re-inserted to io scheduler queue */
  	blk_mq_sched_requeue_request(rq);
7d692330e   Jens Axboe   block: get rid of...
693
  	BUG_ON(!list_empty(&rq->queuelist));
2b053aca7   Bart Van Assche   blk-mq: Add a kic...
694
  	blk_mq_add_to_requeue_list(rq, true, kick_requeue_list);
ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
695
696
  }
  EXPORT_SYMBOL(blk_mq_requeue_request);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
697
698
699
  static void blk_mq_requeue_work(struct work_struct *work)
  {
  	struct request_queue *q =
2849450ad   Mike Snitzer   blk-mq: introduce...
700
  		container_of(work, struct request_queue, requeue_work.work);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
701
702
  	LIST_HEAD(rq_list);
  	struct request *rq, *next;
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
703

18e9781d4   Jens Axboe   blk-mq: blk_mq_re...
704
  	spin_lock_irq(&q->requeue_lock);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
705
  	list_splice_init(&q->requeue_list, &rq_list);
18e9781d4   Jens Axboe   blk-mq: blk_mq_re...
706
  	spin_unlock_irq(&q->requeue_lock);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
707
708
  
  	list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
aef1897cd   Jianchao Wang   blk-mq: insert rq...
709
  		if (!(rq->rq_flags & (RQF_SOFTBARRIER | RQF_DONTPREP)))
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
710
  			continue;
e80640213   Christoph Hellwig   block: split out ...
711
  		rq->rq_flags &= ~RQF_SOFTBARRIER;
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
712
  		list_del_init(&rq->queuelist);
aef1897cd   Jianchao Wang   blk-mq: insert rq...
713
714
715
716
717
718
  		/*
  		 * If RQF_DONTPREP, rq has contained some driver specific
  		 * data, so insert it to hctx dispatch list to avoid any
  		 * merge.
  		 */
  		if (rq->rq_flags & RQF_DONTPREP)
74c77d6a4   Ming Lei   blk-mq: insert pa...
719
  			blk_mq_request_bypass_insert(rq, false, false);
aef1897cd   Jianchao Wang   blk-mq: insert rq...
720
721
  		else
  			blk_mq_sched_insert_request(rq, true, false, false);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
722
723
724
725
726
  	}
  
  	while (!list_empty(&rq_list)) {
  		rq = list_entry(rq_list.next, struct request, queuelist);
  		list_del_init(&rq->queuelist);
9e97d2951   Mike Snitzer   blk-mq-sched: rem...
727
  		blk_mq_sched_insert_request(rq, false, false, false);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
728
  	}
52d7f1b5c   Bart Van Assche   blk-mq: Avoid tha...
729
  	blk_mq_run_hw_queues(q, false);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
730
  }
2b053aca7   Bart Van Assche   blk-mq: Add a kic...
731
732
  void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
  				bool kick_requeue_list)
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
733
734
735
736
737
738
  {
  	struct request_queue *q = rq->q;
  	unsigned long flags;
  
  	/*
  	 * We abuse this flag that is otherwise used by the I/O scheduler to
ff821d271   Jens Axboe   blk-mq: fixup som...
739
  	 * request head insertion from the workqueue.
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
740
  	 */
e80640213   Christoph Hellwig   block: split out ...
741
  	BUG_ON(rq->rq_flags & RQF_SOFTBARRIER);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
742
743
744
  
  	spin_lock_irqsave(&q->requeue_lock, flags);
  	if (at_head) {
e80640213   Christoph Hellwig   block: split out ...
745
  		rq->rq_flags |= RQF_SOFTBARRIER;
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
746
747
748
749
750
  		list_add(&rq->queuelist, &q->requeue_list);
  	} else {
  		list_add_tail(&rq->queuelist, &q->requeue_list);
  	}
  	spin_unlock_irqrestore(&q->requeue_lock, flags);
2b053aca7   Bart Van Assche   blk-mq: Add a kic...
751
752
753
  
  	if (kick_requeue_list)
  		blk_mq_kick_requeue_list(q);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
754
  }
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
755
756
757
  
  void blk_mq_kick_requeue_list(struct request_queue *q)
  {
ae943d206   Bart Van Assche   blk-mq: Avoid tha...
758
  	kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, 0);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
759
760
  }
  EXPORT_SYMBOL(blk_mq_kick_requeue_list);
2849450ad   Mike Snitzer   blk-mq: introduce...
761
762
763
  void blk_mq_delay_kick_requeue_list(struct request_queue *q,
  				    unsigned long msecs)
  {
d4acf3650   Bart Van Assche   block: Make blk_m...
764
765
  	kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work,
  				    msecs_to_jiffies(msecs));
2849450ad   Mike Snitzer   blk-mq: introduce...
766
767
  }
  EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
0e62f51f8   Jens Axboe   blk-mq: let blk_m...
768
769
  struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
  {
88c7b2b75   Jens Axboe   blk-mq: prefetch ...
770
771
  	if (tag < tags->nr_tags) {
  		prefetch(tags->rqs[tag]);
4ee86babe   Hannes Reinecke   blk-mq: add bound...
772
  		return tags->rqs[tag];
88c7b2b75   Jens Axboe   blk-mq: prefetch ...
773
  	}
4ee86babe   Hannes Reinecke   blk-mq: add bound...
774
775
  
  	return NULL;
24d2f9030   Christoph Hellwig   blk-mq: split out...
776
777
  }
  EXPORT_SYMBOL(blk_mq_tag_to_rq);
3c94d83cb   Jens Axboe   blk-mq: change bl...
778
779
  static bool blk_mq_rq_inflight(struct blk_mq_hw_ctx *hctx, struct request *rq,
  			       void *priv, bool reserved)
ae8799125   Jens Axboe   blk-mq: provide a...
780
781
  {
  	/*
49a7ac29f   Ming Lei   blk-mq: consider ...
782
  	 * If we find a request that isn't idle and the queue matches,
3c94d83cb   Jens Axboe   blk-mq: change bl...
783
  	 * we know the queue is busy. Return false to stop the iteration.
ae8799125   Jens Axboe   blk-mq: provide a...
784
  	 */
49a7ac29f   Ming Lei   blk-mq: consider ...
785
  	if (blk_mq_request_started(rq) && rq->q == hctx->queue) {
ae8799125   Jens Axboe   blk-mq: provide a...
786
787
788
789
790
791
792
793
  		bool *busy = priv;
  
  		*busy = true;
  		return false;
  	}
  
  	return true;
  }
3c94d83cb   Jens Axboe   blk-mq: change bl...
794
  bool blk_mq_queue_inflight(struct request_queue *q)
ae8799125   Jens Axboe   blk-mq: provide a...
795
796
  {
  	bool busy = false;
3c94d83cb   Jens Axboe   blk-mq: change bl...
797
  	blk_mq_queue_tag_busy_iter(q, blk_mq_rq_inflight, &busy);
ae8799125   Jens Axboe   blk-mq: provide a...
798
799
  	return busy;
  }
3c94d83cb   Jens Axboe   blk-mq: change bl...
800
  EXPORT_SYMBOL_GPL(blk_mq_queue_inflight);
ae8799125   Jens Axboe   blk-mq: provide a...
801

358f70da4   Tejun Heo   blk-mq: make blk_...
802
  static void blk_mq_rq_timed_out(struct request *req, bool reserved)
320ae51fe   Jens Axboe   blk-mq: new multi...
803
  {
da6612673   Christoph Hellwig   blk-mq: don't tim...
804
  	req->rq_flags |= RQF_TIMED_OUT;
d1210d5af   Christoph Hellwig   blk-mq: simplify ...
805
806
807
808
809
810
811
  	if (req->q->mq_ops->timeout) {
  		enum blk_eh_timer_return ret;
  
  		ret = req->q->mq_ops->timeout(req, reserved);
  		if (ret == BLK_EH_DONE)
  			return;
  		WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER);
46f92d42e   Christoph Hellwig   blk-mq: unshared ...
812
  	}
d1210d5af   Christoph Hellwig   blk-mq: simplify ...
813
814
  
  	blk_add_timer(req);
87ee7b112   Jens Axboe   blk-mq: fix race ...
815
  }
5b3f25fc3   Keith Busch   blk-mq: Allow req...
816

12f5b9314   Keith Busch   blk-mq: Remove ge...
817
  static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
81481eb42   Christoph Hellwig   blk-mq: fix and s...
818
  {
12f5b9314   Keith Busch   blk-mq: Remove ge...
819
  	unsigned long deadline;
87ee7b112   Jens Axboe   blk-mq: fix race ...
820

12f5b9314   Keith Busch   blk-mq: Remove ge...
821
822
  	if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT)
  		return false;
da6612673   Christoph Hellwig   blk-mq: don't tim...
823
824
  	if (rq->rq_flags & RQF_TIMED_OUT)
  		return false;
a7af0af32   Peter Zijlstra   blk-mq: attempt t...
825

079076b34   Christoph Hellwig   block: remove dea...
826
  	deadline = READ_ONCE(rq->deadline);
12f5b9314   Keith Busch   blk-mq: Remove ge...
827
828
  	if (time_after_eq(jiffies, deadline))
  		return true;
a7af0af32   Peter Zijlstra   blk-mq: attempt t...
829

12f5b9314   Keith Busch   blk-mq: Remove ge...
830
831
832
833
834
  	if (*next == 0)
  		*next = deadline;
  	else if (time_after(*next, deadline))
  		*next = deadline;
  	return false;
87ee7b112   Jens Axboe   blk-mq: fix race ...
835
  }
7baa85727   Jens Axboe   blk-mq-tag: chang...
836
  static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
1d9bd5161   Tejun Heo   blk-mq: replace t...
837
838
  		struct request *rq, void *priv, bool reserved)
  {
12f5b9314   Keith Busch   blk-mq: Remove ge...
839
840
841
842
843
844
845
  	unsigned long *next = priv;
  
  	/*
  	 * Just do a quick check if it is expired before locking the request in
  	 * so we're not unnecessarilly synchronizing across CPUs.
  	 */
  	if (!blk_mq_req_expired(rq, next))
7baa85727   Jens Axboe   blk-mq-tag: chang...
846
  		return true;
12f5b9314   Keith Busch   blk-mq: Remove ge...
847
848
849
850
851
852
853
854
855
856
857
  
  	/*
  	 * We have reason to believe the request may be expired. Take a
  	 * reference on the request to lock this request lifetime into its
  	 * currently allocated context to prevent it from being reallocated in
  	 * the event the completion by-passes this timeout handler.
  	 *
  	 * If the reference was already released, then the driver beat the
  	 * timeout handler to posting a natural completion.
  	 */
  	if (!refcount_inc_not_zero(&rq->ref))
7baa85727   Jens Axboe   blk-mq-tag: chang...
858
  		return true;
12f5b9314   Keith Busch   blk-mq: Remove ge...
859

1d9bd5161   Tejun Heo   blk-mq: replace t...
860
  	/*
12f5b9314   Keith Busch   blk-mq: Remove ge...
861
862
863
864
  	 * The request is now locked and cannot be reallocated underneath the
  	 * timeout handler's processing. Re-verify this exact request is truly
  	 * expired; if it is not expired, then the request was completed and
  	 * reallocated as a new request.
1d9bd5161   Tejun Heo   blk-mq: replace t...
865
  	 */
12f5b9314   Keith Busch   blk-mq: Remove ge...
866
  	if (blk_mq_req_expired(rq, next))
1d9bd5161   Tejun Heo   blk-mq: replace t...
867
  		blk_mq_rq_timed_out(rq, reserved);
8d6996630   Yufen Yu   block: fix null p...
868
869
870
871
  
  	if (is_flush_rq(rq, hctx))
  		rq->end_io(rq, 0);
  	else if (refcount_dec_and_test(&rq->ref))
12f5b9314   Keith Busch   blk-mq: Remove ge...
872
  		__blk_mq_free_request(rq);
7baa85727   Jens Axboe   blk-mq-tag: chang...
873
874
  
  	return true;
1d9bd5161   Tejun Heo   blk-mq: replace t...
875
  }
287922eb0   Christoph Hellwig   block: defer time...
876
  static void blk_mq_timeout_work(struct work_struct *work)
320ae51fe   Jens Axboe   blk-mq: new multi...
877
  {
287922eb0   Christoph Hellwig   block: defer time...
878
879
  	struct request_queue *q =
  		container_of(work, struct request_queue, timeout_work);
12f5b9314   Keith Busch   blk-mq: Remove ge...
880
  	unsigned long next = 0;
1d9bd5161   Tejun Heo   blk-mq: replace t...
881
  	struct blk_mq_hw_ctx *hctx;
81481eb42   Christoph Hellwig   blk-mq: fix and s...
882
  	int i;
320ae51fe   Jens Axboe   blk-mq: new multi...
883

71f79fb31   Gabriel Krisman Bertazi   blk-mq: Allow tim...
884
885
886
887
888
889
890
891
892
  	/* A deadlock might occur if a request is stuck requiring a
  	 * timeout at the same time a queue freeze is waiting
  	 * completion, since the timeout code would not be able to
  	 * acquire the queue reference here.
  	 *
  	 * That's why we don't use blk_queue_enter here; instead, we use
  	 * percpu_ref_tryget directly, because we need to be able to
  	 * obtain a reference even in the short window between the queue
  	 * starting to freeze, by dropping the first reference in
1671d522c   Ming Lei   block: rename blk...
893
  	 * blk_freeze_queue_start, and the moment the last request is
71f79fb31   Gabriel Krisman Bertazi   blk-mq: Allow tim...
894
895
896
897
  	 * consumed, marked by the instant q_usage_counter reaches
  	 * zero.
  	 */
  	if (!percpu_ref_tryget(&q->q_usage_counter))
287922eb0   Christoph Hellwig   block: defer time...
898
  		return;
12f5b9314   Keith Busch   blk-mq: Remove ge...
899
  	blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &next);
320ae51fe   Jens Axboe   blk-mq: new multi...
900

12f5b9314   Keith Busch   blk-mq: Remove ge...
901
902
  	if (next != 0) {
  		mod_timer(&q->timeout, next);
0d2602ca3   Jens Axboe   blk-mq: improve s...
903
  	} else {
fcd36c36f   Bart Van Assche   blk-mq: Explain w...
904
905
906
907
908
909
  		/*
  		 * Request timeouts are handled as a forward rolling timer. If
  		 * we end up here it means that no requests are pending and
  		 * also that no request has been pending for a while. Mark
  		 * each hctx as idle.
  		 */
f054b56c9   Ming Lei   blk-mq: fix race ...
910
911
912
913
914
  		queue_for_each_hw_ctx(q, hctx, i) {
  			/* the hctx may be unmapped, so check it here */
  			if (blk_mq_hw_queue_mapped(hctx))
  				blk_mq_tag_idle(hctx);
  		}
0d2602ca3   Jens Axboe   blk-mq: improve s...
915
  	}
287922eb0   Christoph Hellwig   block: defer time...
916
  	blk_queue_exit(q);
320ae51fe   Jens Axboe   blk-mq: new multi...
917
  }
88459642c   Omar Sandoval   blk-mq: abstract ...
918
919
920
921
922
923
924
925
926
927
  struct flush_busy_ctx_data {
  	struct blk_mq_hw_ctx *hctx;
  	struct list_head *list;
  };
  
  static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
  {
  	struct flush_busy_ctx_data *flush_data = data;
  	struct blk_mq_hw_ctx *hctx = flush_data->hctx;
  	struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
928
  	enum hctx_type type = hctx->type;
88459642c   Omar Sandoval   blk-mq: abstract ...
929

88459642c   Omar Sandoval   blk-mq: abstract ...
930
  	spin_lock(&ctx->lock);
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
931
  	list_splice_tail_init(&ctx->rq_lists[type], flush_data->list);
e9a99a638   Omar Sandoval   block: clear ctx ...
932
  	sbitmap_clear_bit(sb, bitnr);
88459642c   Omar Sandoval   blk-mq: abstract ...
933
934
935
  	spin_unlock(&ctx->lock);
  	return true;
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
936
  /*
1429d7c94   Jens Axboe   blk-mq: switch ct...
937
938
939
   * Process software queues that have been marked busy, splicing them
   * to the for-dispatch
   */
2c3ad6679   Jens Axboe   blk-mq: export so...
940
  void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
1429d7c94   Jens Axboe   blk-mq: switch ct...
941
  {
88459642c   Omar Sandoval   blk-mq: abstract ...
942
943
944
945
  	struct flush_busy_ctx_data data = {
  		.hctx = hctx,
  		.list = list,
  	};
1429d7c94   Jens Axboe   blk-mq: switch ct...
946

88459642c   Omar Sandoval   blk-mq: abstract ...
947
  	sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data);
1429d7c94   Jens Axboe   blk-mq: switch ct...
948
  }
2c3ad6679   Jens Axboe   blk-mq: export so...
949
  EXPORT_SYMBOL_GPL(blk_mq_flush_busy_ctxs);
1429d7c94   Jens Axboe   blk-mq: switch ct...
950

b347689ff   Ming Lei   blk-mq-sched: imp...
951
952
953
954
955
956
957
958
959
960
961
  struct dispatch_rq_data {
  	struct blk_mq_hw_ctx *hctx;
  	struct request *rq;
  };
  
  static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr,
  		void *data)
  {
  	struct dispatch_rq_data *dispatch_data = data;
  	struct blk_mq_hw_ctx *hctx = dispatch_data->hctx;
  	struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
962
  	enum hctx_type type = hctx->type;
b347689ff   Ming Lei   blk-mq-sched: imp...
963
964
  
  	spin_lock(&ctx->lock);
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
965
966
  	if (!list_empty(&ctx->rq_lists[type])) {
  		dispatch_data->rq = list_entry_rq(ctx->rq_lists[type].next);
b347689ff   Ming Lei   blk-mq-sched: imp...
967
  		list_del_init(&dispatch_data->rq->queuelist);
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
968
  		if (list_empty(&ctx->rq_lists[type]))
b347689ff   Ming Lei   blk-mq-sched: imp...
969
970
971
972
973
974
975
976
977
978
  			sbitmap_clear_bit(sb, bitnr);
  	}
  	spin_unlock(&ctx->lock);
  
  	return !dispatch_data->rq;
  }
  
  struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
  					struct blk_mq_ctx *start)
  {
f31967f0e   Jens Axboe   blk-mq: allow sof...
979
  	unsigned off = start ? start->index_hw[hctx->type] : 0;
b347689ff   Ming Lei   blk-mq-sched: imp...
980
981
982
983
984
985
986
987
988
989
  	struct dispatch_rq_data data = {
  		.hctx = hctx,
  		.rq   = NULL,
  	};
  
  	__sbitmap_for_each_set(&hctx->ctx_map, off,
  			       dispatch_rq_from_ctx, &data);
  
  	return data.rq;
  }
703fd1c0f   Jens Axboe   blk-mq: account h...
990
991
992
993
  static inline unsigned int queued_to_index(unsigned int queued)
  {
  	if (!queued)
  		return 0;
1429d7c94   Jens Axboe   blk-mq: switch ct...
994

703fd1c0f   Jens Axboe   blk-mq: account h...
995
  	return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
1429d7c94   Jens Axboe   blk-mq: switch ct...
996
  }
8ab6bb9ee   Ming Lei   blk-mq: cleanup b...
997
  bool blk_mq_get_driver_tag(struct request *rq)
bd166ef18   Jens Axboe   blk-mq-sched: add...
998
999
1000
  {
  	struct blk_mq_alloc_data data = {
  		.q = rq->q,
ea4f995ee   Jens Axboe   blk-mq: cache req...
1001
  		.hctx = rq->mq_hctx,
8ab6bb9ee   Ming Lei   blk-mq: cleanup b...
1002
  		.flags = BLK_MQ_REQ_NOWAIT,
f9afca4d3   Jens Axboe   blk-mq: pass in r...
1003
  		.cmd_flags = rq->cmd_flags,
bd166ef18   Jens Axboe   blk-mq-sched: add...
1004
  	};
d263ed992   Jianchao Wang   blk-mq: count the...
1005
  	bool shared;
5feeacdd4   Jens Axboe   blk-mq: add might...
1006

81380ca10   Omar Sandoval   blk-mq: use the r...
1007
1008
  	if (rq->tag != -1)
  		goto done;
bd166ef18   Jens Axboe   blk-mq-sched: add...
1009

415b806de   Sagi Grimberg   blk-mq-sched: All...
1010
1011
  	if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
  		data.flags |= BLK_MQ_REQ_RESERVED;
d263ed992   Jianchao Wang   blk-mq: count the...
1012
  	shared = blk_mq_tag_busy(data.hctx);
bd166ef18   Jens Axboe   blk-mq-sched: add...
1013
1014
  	rq->tag = blk_mq_get_tag(&data);
  	if (rq->tag >= 0) {
d263ed992   Jianchao Wang   blk-mq: count the...
1015
  		if (shared) {
200e86b33   Jens Axboe   blk-mq: only appl...
1016
1017
1018
  			rq->rq_flags |= RQF_MQ_INFLIGHT;
  			atomic_inc(&data.hctx->nr_active);
  		}
bd166ef18   Jens Axboe   blk-mq-sched: add...
1019
  		data.hctx->tags->rqs[rq->tag] = rq;
bd166ef18   Jens Axboe   blk-mq-sched: add...
1020
  	}
81380ca10   Omar Sandoval   blk-mq: use the r...
1021
  done:
81380ca10   Omar Sandoval   blk-mq: use the r...
1022
  	return rq->tag != -1;
bd166ef18   Jens Axboe   blk-mq-sched: add...
1023
  }
eb619fdb2   Jens Axboe   blk-mq: fix issue...
1024
1025
  static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
  				int flags, void *key)
da55f2cc7   Omar Sandoval   blk-mq: use sbq w...
1026
1027
1028
1029
  {
  	struct blk_mq_hw_ctx *hctx;
  
  	hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
5815839b3   Ming Lei   blk-mq: introduce...
1030
  	spin_lock(&hctx->dispatch_wait_lock);
e86185754   Jens Axboe   blk-mq: fix sbitm...
1031
1032
1033
1034
1035
1036
1037
  	if (!list_empty(&wait->entry)) {
  		struct sbitmap_queue *sbq;
  
  		list_del_init(&wait->entry);
  		sbq = &hctx->tags->bitmap_tags;
  		atomic_dec(&sbq->ws_active);
  	}
5815839b3   Ming Lei   blk-mq: introduce...
1038
  	spin_unlock(&hctx->dispatch_wait_lock);
da55f2cc7   Omar Sandoval   blk-mq: use sbq w...
1039
1040
1041
  	blk_mq_run_hw_queue(hctx, true);
  	return 1;
  }
f906a6a0f   Jens Axboe   blk-mq: improve t...
1042
1043
  /*
   * Mark us waiting for a tag. For shared tags, this involves hooking us into
ee3e4de52   Bart Van Assche   blk-mq: Fix spell...
1044
1045
   * the tag wakeups. For non-shared tags, we can simply mark us needing a
   * restart. For both cases, take care to check the condition again after
f906a6a0f   Jens Axboe   blk-mq: improve t...
1046
1047
   * marking us as waiting.
   */
2278d69f0   Ming Lei   blk-mq: don't pas...
1048
  static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
f906a6a0f   Jens Axboe   blk-mq: improve t...
1049
  				 struct request *rq)
da55f2cc7   Omar Sandoval   blk-mq: use sbq w...
1050
  {
e86185754   Jens Axboe   blk-mq: fix sbitm...
1051
  	struct sbitmap_queue *sbq = &hctx->tags->bitmap_tags;
5815839b3   Ming Lei   blk-mq: introduce...
1052
  	struct wait_queue_head *wq;
f906a6a0f   Jens Axboe   blk-mq: improve t...
1053
1054
  	wait_queue_entry_t *wait;
  	bool ret;
da55f2cc7   Omar Sandoval   blk-mq: use sbq w...
1055

2278d69f0   Ming Lei   blk-mq: don't pas...
1056
  	if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) {
684b73245   Yufen Yu   blk-mq: use blk_m...
1057
  		blk_mq_sched_mark_restart_hctx(hctx);
f906a6a0f   Jens Axboe   blk-mq: improve t...
1058

c27d53fb4   Bart Van Assche   blk-mq: Reduce th...
1059
1060
1061
1062
1063
1064
1065
1066
  		/*
  		 * It's possible that a tag was freed in the window between the
  		 * allocation failure and adding the hardware queue to the wait
  		 * queue.
  		 *
  		 * Don't clear RESTART here, someone else could have set it.
  		 * At most this will cost an extra queue run.
  		 */
8ab6bb9ee   Ming Lei   blk-mq: cleanup b...
1067
  		return blk_mq_get_driver_tag(rq);
eb619fdb2   Jens Axboe   blk-mq: fix issue...
1068
  	}
eb619fdb2   Jens Axboe   blk-mq: fix issue...
1069

2278d69f0   Ming Lei   blk-mq: don't pas...
1070
  	wait = &hctx->dispatch_wait;
c27d53fb4   Bart Van Assche   blk-mq: Reduce th...
1071
1072
  	if (!list_empty_careful(&wait->entry))
  		return false;
e86185754   Jens Axboe   blk-mq: fix sbitm...
1073
  	wq = &bt_wait_ptr(sbq, hctx)->wait;
5815839b3   Ming Lei   blk-mq: introduce...
1074
1075
1076
  
  	spin_lock_irq(&wq->lock);
  	spin_lock(&hctx->dispatch_wait_lock);
c27d53fb4   Bart Van Assche   blk-mq: Reduce th...
1077
  	if (!list_empty(&wait->entry)) {
5815839b3   Ming Lei   blk-mq: introduce...
1078
1079
  		spin_unlock(&hctx->dispatch_wait_lock);
  		spin_unlock_irq(&wq->lock);
c27d53fb4   Bart Van Assche   blk-mq: Reduce th...
1080
  		return false;
eb619fdb2   Jens Axboe   blk-mq: fix issue...
1081
  	}
e86185754   Jens Axboe   blk-mq: fix sbitm...
1082
  	atomic_inc(&sbq->ws_active);
5815839b3   Ming Lei   blk-mq: introduce...
1083
1084
  	wait->flags &= ~WQ_FLAG_EXCLUSIVE;
  	__add_wait_queue(wq, wait);
c27d53fb4   Bart Van Assche   blk-mq: Reduce th...
1085

da55f2cc7   Omar Sandoval   blk-mq: use sbq w...
1086
  	/*
eb619fdb2   Jens Axboe   blk-mq: fix issue...
1087
1088
1089
  	 * It's possible that a tag was freed in the window between the
  	 * allocation failure and adding the hardware queue to the wait
  	 * queue.
da55f2cc7   Omar Sandoval   blk-mq: use sbq w...
1090
  	 */
8ab6bb9ee   Ming Lei   blk-mq: cleanup b...
1091
  	ret = blk_mq_get_driver_tag(rq);
c27d53fb4   Bart Van Assche   blk-mq: Reduce th...
1092
  	if (!ret) {
5815839b3   Ming Lei   blk-mq: introduce...
1093
1094
  		spin_unlock(&hctx->dispatch_wait_lock);
  		spin_unlock_irq(&wq->lock);
c27d53fb4   Bart Van Assche   blk-mq: Reduce th...
1095
  		return false;
eb619fdb2   Jens Axboe   blk-mq: fix issue...
1096
  	}
c27d53fb4   Bart Van Assche   blk-mq: Reduce th...
1097
1098
1099
1100
1101
  
  	/*
  	 * We got a tag, remove ourselves from the wait queue to ensure
  	 * someone else gets the wakeup.
  	 */
c27d53fb4   Bart Van Assche   blk-mq: Reduce th...
1102
  	list_del_init(&wait->entry);
e86185754   Jens Axboe   blk-mq: fix sbitm...
1103
  	atomic_dec(&sbq->ws_active);
5815839b3   Ming Lei   blk-mq: introduce...
1104
1105
  	spin_unlock(&hctx->dispatch_wait_lock);
  	spin_unlock_irq(&wq->lock);
c27d53fb4   Bart Van Assche   blk-mq: Reduce th...
1106
1107
  
  	return true;
da55f2cc7   Omar Sandoval   blk-mq: use sbq w...
1108
  }
6e7687173   Ming Lei   blk-mq: dequeue r...
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
  #define BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT  8
  #define BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR  4
  /*
   * Update dispatch busy with the Exponential Weighted Moving Average(EWMA):
   * - EWMA is one simple way to compute running average value
   * - weight(7/8 and 1/8) is applied so that it can decrease exponentially
   * - take 4 as factor for avoiding to get too small(0) result, and this
   *   factor doesn't matter because EWMA decreases exponentially
   */
  static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
  {
  	unsigned int ewma;
  
  	if (hctx->queue->elevator)
  		return;
  
  	ewma = hctx->dispatch_busy;
  
  	if (!ewma && !busy)
  		return;
  
  	ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1;
  	if (busy)
  		ewma += 1 << BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR;
  	ewma /= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT;
  
  	hctx->dispatch_busy = ewma;
  }
86ff7c2a8   Ming Lei   blk-mq: introduce...
1137
  #define BLK_MQ_RESOURCE_DELAY	3		/* ms units */
1f57f8d44   Jens Axboe   blk-mq: don't que...
1138
1139
1140
  /*
   * Returns true if we did some work AND can potentially do more.
   */
de1482974   Ming Lei   blk-mq: introduce...
1141
  bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
eb619fdb2   Jens Axboe   blk-mq: fix issue...
1142
  			     bool got_budget)
320ae51fe   Jens Axboe   blk-mq: new multi...
1143
  {
81380ca10   Omar Sandoval   blk-mq: use the r...
1144
  	struct blk_mq_hw_ctx *hctx;
6d6f167ce   Jianchao Wang   blk-mq: put the d...
1145
  	struct request *rq, *nxt;
eb619fdb2   Jens Axboe   blk-mq: fix issue...
1146
  	bool no_tag = false;
fc17b6534   Christoph Hellwig   blk-mq: switch ->...
1147
  	int errors, queued;
86ff7c2a8   Ming Lei   blk-mq: introduce...
1148
  	blk_status_t ret = BLK_STS_OK;
320ae51fe   Jens Axboe   blk-mq: new multi...
1149

81380ca10   Omar Sandoval   blk-mq: use the r...
1150
1151
  	if (list_empty(list))
  		return false;
de1482974   Ming Lei   blk-mq: introduce...
1152
  	WARN_ON(!list_is_singular(list) && got_budget);
320ae51fe   Jens Axboe   blk-mq: new multi...
1153
  	/*
320ae51fe   Jens Axboe   blk-mq: new multi...
1154
1155
  	 * Now process all the entries, sending them to the driver.
  	 */
93efe9817   Jens Axboe   blk-mq: include e...
1156
  	errors = queued = 0;
81380ca10   Omar Sandoval   blk-mq: use the r...
1157
  	do {
74c450521   Jens Axboe   blk-mq: add a 'li...
1158
  		struct blk_mq_queue_data bd;
320ae51fe   Jens Axboe   blk-mq: new multi...
1159

f04c3df3e   Jens Axboe   blk-mq: abstract ...
1160
  		rq = list_first_entry(list, struct request, queuelist);
0bca799b9   Ming Lei   blk-mq: order get...
1161

ea4f995ee   Jens Axboe   blk-mq: cache req...
1162
  		hctx = rq->mq_hctx;
c7b6c5129   John Garry   blk-mq: Put drive...
1163
1164
  		if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) {
  			blk_mq_put_driver_tag(rq);
0bca799b9   Ming Lei   blk-mq: order get...
1165
  			break;
c7b6c5129   John Garry   blk-mq: Put drive...
1166
  		}
0bca799b9   Ming Lei   blk-mq: order get...
1167

8ab6bb9ee   Ming Lei   blk-mq: cleanup b...
1168
  		if (!blk_mq_get_driver_tag(rq)) {
3c782d67c   Jens Axboe   blk-mq: fix poten...
1169
  			/*
da55f2cc7   Omar Sandoval   blk-mq: use sbq w...
1170
  			 * The initial allocation attempt failed, so we need to
eb619fdb2   Jens Axboe   blk-mq: fix issue...
1171
1172
1173
1174
  			 * rerun the hardware queue when a tag is freed. The
  			 * waitqueue takes care of that. If the queue is run
  			 * before we add this entry back on the dispatch list,
  			 * we'll re-run it below.
3c782d67c   Jens Axboe   blk-mq: fix poten...
1175
  			 */
2278d69f0   Ming Lei   blk-mq: don't pas...
1176
  			if (!blk_mq_mark_tag_wait(hctx, rq)) {
0bca799b9   Ming Lei   blk-mq: order get...
1177
  				blk_mq_put_dispatch_budget(hctx);
f906a6a0f   Jens Axboe   blk-mq: improve t...
1178
1179
1180
1181
1182
1183
  				/*
  				 * For non-shared tags, the RESTART check
  				 * will suffice.
  				 */
  				if (hctx->flags & BLK_MQ_F_TAG_SHARED)
  					no_tag = true;
de1482974   Ming Lei   blk-mq: introduce...
1184
1185
1186
  				break;
  			}
  		}
320ae51fe   Jens Axboe   blk-mq: new multi...
1187
  		list_del_init(&rq->queuelist);
320ae51fe   Jens Axboe   blk-mq: new multi...
1188

74c450521   Jens Axboe   blk-mq: add a 'li...
1189
  		bd.rq = rq;
113285b47   Jens Axboe   blk-mq: ensure th...
1190
1191
1192
1193
1194
1195
1196
1197
  
  		/*
  		 * Flag last if we have no more requests, or if we have more
  		 * but can't assign a driver tag to it.
  		 */
  		if (list_empty(list))
  			bd.last = true;
  		else {
113285b47   Jens Axboe   blk-mq: ensure th...
1198
  			nxt = list_first_entry(list, struct request, queuelist);
8ab6bb9ee   Ming Lei   blk-mq: cleanup b...
1199
  			bd.last = !blk_mq_get_driver_tag(nxt);
113285b47   Jens Axboe   blk-mq: ensure th...
1200
  		}
74c450521   Jens Axboe   blk-mq: add a 'li...
1201
1202
  
  		ret = q->mq_ops->queue_rq(hctx, &bd);
86ff7c2a8   Ming Lei   blk-mq: introduce...
1203
  		if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
6d6f167ce   Jianchao Wang   blk-mq: put the d...
1204
1205
  			/*
  			 * If an I/O scheduler has been configured and we got a
ff821d271   Jens Axboe   blk-mq: fixup som...
1206
1207
  			 * driver tag for the next request already, free it
  			 * again.
6d6f167ce   Jianchao Wang   blk-mq: put the d...
1208
1209
1210
1211
1212
  			 */
  			if (!list_empty(list)) {
  				nxt = list_first_entry(list, struct request, queuelist);
  				blk_mq_put_driver_tag(nxt);
  			}
f04c3df3e   Jens Axboe   blk-mq: abstract ...
1213
  			list_add(&rq->queuelist, list);
ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
1214
  			__blk_mq_requeue_request(rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
1215
  			break;
fc17b6534   Christoph Hellwig   blk-mq: switch ->...
1216
1217
1218
  		}
  
  		if (unlikely(ret != BLK_STS_OK)) {
93efe9817   Jens Axboe   blk-mq: include e...
1219
  			errors++;
2a842acab   Christoph Hellwig   block: introduce ...
1220
  			blk_mq_end_request(rq, BLK_STS_IOERR);
fc17b6534   Christoph Hellwig   blk-mq: switch ->...
1221
  			continue;
320ae51fe   Jens Axboe   blk-mq: new multi...
1222
  		}
fc17b6534   Christoph Hellwig   blk-mq: switch ->...
1223
  		queued++;
81380ca10   Omar Sandoval   blk-mq: use the r...
1224
  	} while (!list_empty(list));
320ae51fe   Jens Axboe   blk-mq: new multi...
1225

703fd1c0f   Jens Axboe   blk-mq: account h...
1226
  	hctx->dispatched[queued_to_index(queued)]++;
320ae51fe   Jens Axboe   blk-mq: new multi...
1227
1228
1229
1230
1231
  
  	/*
  	 * Any items that need requeuing? Stuff them into hctx->dispatch,
  	 * that is where we will continue on next queue run.
  	 */
f04c3df3e   Jens Axboe   blk-mq: abstract ...
1232
  	if (!list_empty(list)) {
86ff7c2a8   Ming Lei   blk-mq: introduce...
1233
  		bool needs_restart;
d666ba98f   Jens Axboe   blk-mq: add mq_op...
1234
1235
1236
1237
1238
1239
1240
  		/*
  		 * If we didn't flush the entire list, we could have told
  		 * the driver there was more coming, but that turned out to
  		 * be a lie.
  		 */
  		if (q->mq_ops->commit_rqs)
  			q->mq_ops->commit_rqs(hctx);
320ae51fe   Jens Axboe   blk-mq: new multi...
1241
  		spin_lock(&hctx->lock);
74c77d6a4   Ming Lei   blk-mq: insert pa...
1242
  		list_splice_tail_init(list, &hctx->dispatch);
320ae51fe   Jens Axboe   blk-mq: new multi...
1243
  		spin_unlock(&hctx->lock);
f04c3df3e   Jens Axboe   blk-mq: abstract ...
1244

9ba52e581   Shaohua Li   blk-mq: don't los...
1245
  		/*
b1a83ee0c   Ming Lei   blk-mq: order add...
1246
1247
1248
1249
1250
1251
1252
1253
1254
  		 * Order adding requests to hctx->dispatch and checking
  		 * SCHED_RESTART flag. The pair of this smp_mb() is the one
  		 * in blk_mq_sched_restart(). Avoid restart code path to
  		 * miss the new added requests to hctx->dispatch, meantime
  		 * SCHED_RESTART is observed here.
  		 */
  		smp_mb();
  
  		/*
710c785f8   Bart Van Assche   blk-mq: Clarify c...
1255
1256
1257
  		 * If SCHED_RESTART was set by the caller of this function and
  		 * it is no longer set that means that it was cleared by another
  		 * thread and hence that a queue rerun is needed.
9ba52e581   Shaohua Li   blk-mq: don't los...
1258
  		 *
eb619fdb2   Jens Axboe   blk-mq: fix issue...
1259
1260
1261
1262
  		 * If 'no_tag' is set, that means that we failed getting
  		 * a driver tag with an I/O scheduler attached. If our dispatch
  		 * waitqueue is no longer active, ensure that we run the queue
  		 * AFTER adding our entries back to the list.
bd166ef18   Jens Axboe   blk-mq-sched: add...
1263
  		 *
710c785f8   Bart Van Assche   blk-mq: Clarify c...
1264
1265
1266
1267
1268
1269
1270
  		 * If no I/O scheduler has been configured it is possible that
  		 * the hardware queue got stopped and restarted before requests
  		 * were pushed back onto the dispatch list. Rerun the queue to
  		 * avoid starvation. Notes:
  		 * - blk_mq_run_hw_queue() checks whether or not a queue has
  		 *   been stopped before rerunning a queue.
  		 * - Some but not all block drivers stop a queue before
fc17b6534   Christoph Hellwig   blk-mq: switch ->...
1271
  		 *   returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
710c785f8   Bart Van Assche   blk-mq: Clarify c...
1272
  		 *   and dm-rq.
86ff7c2a8   Ming Lei   blk-mq: introduce...
1273
1274
1275
1276
  		 *
  		 * If driver returns BLK_STS_RESOURCE and SCHED_RESTART
  		 * bit is set, run queue after a delay to avoid IO stalls
  		 * that could otherwise occur if the queue is idle.
bd166ef18   Jens Axboe   blk-mq-sched: add...
1277
  		 */
86ff7c2a8   Ming Lei   blk-mq: introduce...
1278
1279
  		needs_restart = blk_mq_sched_needs_restart(hctx);
  		if (!needs_restart ||
eb619fdb2   Jens Axboe   blk-mq: fix issue...
1280
  		    (no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
bd166ef18   Jens Axboe   blk-mq-sched: add...
1281
  			blk_mq_run_hw_queue(hctx, true);
86ff7c2a8   Ming Lei   blk-mq: introduce...
1282
1283
  		else if (needs_restart && (ret == BLK_STS_RESOURCE))
  			blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
1f57f8d44   Jens Axboe   blk-mq: don't que...
1284

6e7687173   Ming Lei   blk-mq: dequeue r...
1285
  		blk_mq_update_dispatch_busy(hctx, true);
1f57f8d44   Jens Axboe   blk-mq: don't que...
1286
  		return false;
6e7687173   Ming Lei   blk-mq: dequeue r...
1287
1288
  	} else
  		blk_mq_update_dispatch_busy(hctx, false);
f04c3df3e   Jens Axboe   blk-mq: abstract ...
1289

1f57f8d44   Jens Axboe   blk-mq: don't que...
1290
1291
1292
1293
1294
1295
  	/*
  	 * If the host/device is unable to accept more work, inform the
  	 * caller of that.
  	 */
  	if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
  		return false;
93efe9817   Jens Axboe   blk-mq: include e...
1296
  	return (queued + errors) != 0;
f04c3df3e   Jens Axboe   blk-mq: abstract ...
1297
  }
6a83e74d2   Bart Van Assche   blk-mq: Introduce...
1298
1299
1300
  static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
  {
  	int srcu_idx;
b7a71e66d   Jens Axboe   blk-mq: add warni...
1301
1302
1303
  	/*
  	 * We should be running this queue from one of the CPUs that
  	 * are mapped to it.
7df938fbc   Ming Lei   blk-mq: turn WARN...
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
  	 *
  	 * There are at least two related races now between setting
  	 * hctx->next_cpu from blk_mq_hctx_next_cpu() and running
  	 * __blk_mq_run_hw_queue():
  	 *
  	 * - hctx->next_cpu is found offline in blk_mq_hctx_next_cpu(),
  	 *   but later it becomes online, then this warning is harmless
  	 *   at all
  	 *
  	 * - hctx->next_cpu is found online in blk_mq_hctx_next_cpu(),
  	 *   but later it becomes offline, then the warning can't be
  	 *   triggered, and we depend on blk-mq timeout handler to
  	 *   handle dispatched requests to this hctx
b7a71e66d   Jens Axboe   blk-mq: add warni...
1317
  	 */
7df938fbc   Ming Lei   blk-mq: turn WARN...
1318
1319
1320
1321
1322
1323
1324
1325
  	if (!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
  		cpu_online(hctx->next_cpu)) {
  		printk(KERN_WARNING "run queue from wrong CPU %d, hctx %s
  ",
  			raw_smp_processor_id(),
  			cpumask_empty(hctx->cpumask) ? "inactive": "active");
  		dump_stack();
  	}
6a83e74d2   Bart Van Assche   blk-mq: Introduce...
1326

b7a71e66d   Jens Axboe   blk-mq: add warni...
1327
1328
1329
1330
1331
  	/*
  	 * We can't run the queue inline with ints disabled. Ensure that
  	 * we catch bad users of this early.
  	 */
  	WARN_ON_ONCE(in_interrupt());
04ced159c   Jens Axboe   blk-mq: move hctx...
1332
  	might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
bf4907c05   Jens Axboe   blk-mq: fix sched...
1333

04ced159c   Jens Axboe   blk-mq: move hctx...
1334
1335
1336
  	hctx_lock(hctx, &srcu_idx);
  	blk_mq_sched_dispatch_requests(hctx);
  	hctx_unlock(hctx, srcu_idx);
6a83e74d2   Bart Van Assche   blk-mq: Introduce...
1337
  }
f82ddf192   Ming Lei   blk-mq: introduce...
1338
1339
1340
1341
1342
1343
1344
1345
  static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx)
  {
  	int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask);
  
  	if (cpu >= nr_cpu_ids)
  		cpu = cpumask_first(hctx->cpumask);
  	return cpu;
  }
506e931f9   Jens Axboe   blk-mq: add basic...
1346
1347
1348
1349
1350
1351
1352
1353
  /*
   * It'd be great if the workqueue API had a way to pass
   * in a mask and had some smarts for more clever placement.
   * For now we just round-robin here, switching for every
   * BLK_MQ_CPU_WORK_BATCH queued items.
   */
  static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
  {
7bed45954   Ming Lei   blk-mq: make sure...
1354
  	bool tried = false;
476f8c98a   Ming Lei   blk-mq: avoid to ...
1355
  	int next_cpu = hctx->next_cpu;
7bed45954   Ming Lei   blk-mq: make sure...
1356

b657d7e63   Christoph Hellwig   blk-mq: handle th...
1357
1358
  	if (hctx->queue->nr_hw_queues == 1)
  		return WORK_CPU_UNBOUND;
506e931f9   Jens Axboe   blk-mq: add basic...
1359
1360
  
  	if (--hctx->next_cpu_batch <= 0) {
7bed45954   Ming Lei   blk-mq: make sure...
1361
  select_cpu:
476f8c98a   Ming Lei   blk-mq: avoid to ...
1362
  		next_cpu = cpumask_next_and(next_cpu, hctx->cpumask,
20e4d8139   Christoph Hellwig   blk-mq: simplify ...
1363
  				cpu_online_mask);
506e931f9   Jens Axboe   blk-mq: add basic...
1364
  		if (next_cpu >= nr_cpu_ids)
f82ddf192   Ming Lei   blk-mq: introduce...
1365
  			next_cpu = blk_mq_first_mapped_cpu(hctx);
506e931f9   Jens Axboe   blk-mq: add basic...
1366
1367
  		hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
  	}
7bed45954   Ming Lei   blk-mq: make sure...
1368
1369
1370
1371
  	/*
  	 * Do unbound schedule if we can't find a online CPU for this hctx,
  	 * and it should only happen in the path of handling CPU DEAD.
  	 */
476f8c98a   Ming Lei   blk-mq: avoid to ...
1372
  	if (!cpu_online(next_cpu)) {
7bed45954   Ming Lei   blk-mq: make sure...
1373
1374
1375
1376
1377
1378
1379
1380
1381
  		if (!tried) {
  			tried = true;
  			goto select_cpu;
  		}
  
  		/*
  		 * Make sure to re-select CPU next time once after CPUs
  		 * in hctx->cpumask become online again.
  		 */
476f8c98a   Ming Lei   blk-mq: avoid to ...
1382
  		hctx->next_cpu = next_cpu;
7bed45954   Ming Lei   blk-mq: make sure...
1383
1384
1385
  		hctx->next_cpu_batch = 1;
  		return WORK_CPU_UNBOUND;
  	}
476f8c98a   Ming Lei   blk-mq: avoid to ...
1386
1387
1388
  
  	hctx->next_cpu = next_cpu;
  	return next_cpu;
506e931f9   Jens Axboe   blk-mq: add basic...
1389
  }
7587a5ae7   Bart Van Assche   blk-mq: Introduce...
1390
1391
  static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
  					unsigned long msecs)
320ae51fe   Jens Axboe   blk-mq: new multi...
1392
  {
5435c023b   Bart Van Assche   blk-mq: Warn when...
1393
  	if (unlikely(blk_mq_hctx_stopped(hctx)))
320ae51fe   Jens Axboe   blk-mq: new multi...
1394
  		return;
1b792f2f9   Jens Axboe   blk-mq: add flag ...
1395
  	if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
2a90d4aae   Paolo Bonzini   blk-mq: use get_c...
1396
1397
  		int cpu = get_cpu();
  		if (cpumask_test_cpu(cpu, hctx->cpumask)) {
398205b83   Paolo Bonzini   blk_mq: call pree...
1398
  			__blk_mq_run_hw_queue(hctx);
2a90d4aae   Paolo Bonzini   blk-mq: use get_c...
1399
  			put_cpu();
398205b83   Paolo Bonzini   blk_mq: call pree...
1400
1401
  			return;
  		}
e4043dcf3   Jens Axboe   blk-mq: ensure th...
1402

2a90d4aae   Paolo Bonzini   blk-mq: use get_c...
1403
  		put_cpu();
e4043dcf3   Jens Axboe   blk-mq: ensure th...
1404
  	}
398205b83   Paolo Bonzini   blk_mq: call pree...
1405

ae943d206   Bart Van Assche   blk-mq: Avoid tha...
1406
1407
  	kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
  				    msecs_to_jiffies(msecs));
7587a5ae7   Bart Van Assche   blk-mq: Introduce...
1408
1409
1410
1411
1412
1413
1414
  }
  
  void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
  {
  	__blk_mq_delay_run_hw_queue(hctx, true, msecs);
  }
  EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);
79f720a75   Jens Axboe   blk-mq: only run ...
1415
  bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
7587a5ae7   Bart Van Assche   blk-mq: Introduce...
1416
  {
24f5a90f0   Ming Lei   blk-mq: quiesce q...
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
  	int srcu_idx;
  	bool need_run;
  
  	/*
  	 * When queue is quiesced, we may be switching io scheduler, or
  	 * updating nr_hw_queues, or other things, and we can't run queue
  	 * any more, even __blk_mq_hctx_has_pending() can't be called safely.
  	 *
  	 * And queue will be rerun in blk_mq_unquiesce_queue() if it is
  	 * quiesced.
  	 */
04ced159c   Jens Axboe   blk-mq: move hctx...
1428
1429
1430
1431
  	hctx_lock(hctx, &srcu_idx);
  	need_run = !blk_queue_quiesced(hctx->queue) &&
  		blk_mq_hctx_has_pending(hctx);
  	hctx_unlock(hctx, srcu_idx);
24f5a90f0   Ming Lei   blk-mq: quiesce q...
1432
1433
  
  	if (need_run) {
79f720a75   Jens Axboe   blk-mq: only run ...
1434
1435
1436
1437
1438
  		__blk_mq_delay_run_hw_queue(hctx, async, 0);
  		return true;
  	}
  
  	return false;
320ae51fe   Jens Axboe   blk-mq: new multi...
1439
  }
5b7272729   Omar Sandoval   blk-mq: export he...
1440
  EXPORT_SYMBOL(blk_mq_run_hw_queue);
320ae51fe   Jens Axboe   blk-mq: new multi...
1441

b94ec2964   Mike Snitzer   blk-mq: export bl...
1442
  void blk_mq_run_hw_queues(struct request_queue *q, bool async)
320ae51fe   Jens Axboe   blk-mq: new multi...
1443
1444
1445
1446
1447
  {
  	struct blk_mq_hw_ctx *hctx;
  	int i;
  
  	queue_for_each_hw_ctx(q, hctx, i) {
79f720a75   Jens Axboe   blk-mq: only run ...
1448
  		if (blk_mq_hctx_stopped(hctx))
320ae51fe   Jens Axboe   blk-mq: new multi...
1449
  			continue;
b94ec2964   Mike Snitzer   blk-mq: export bl...
1450
  		blk_mq_run_hw_queue(hctx, async);
320ae51fe   Jens Axboe   blk-mq: new multi...
1451
1452
  	}
  }
b94ec2964   Mike Snitzer   blk-mq: export bl...
1453
  EXPORT_SYMBOL(blk_mq_run_hw_queues);
320ae51fe   Jens Axboe   blk-mq: new multi...
1454

fd0014430   Bart Van Assche   blk-mq: Introduce...
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
  /**
   * blk_mq_queue_stopped() - check whether one or more hctxs have been stopped
   * @q: request queue.
   *
   * The caller is responsible for serializing this function against
   * blk_mq_{start,stop}_hw_queue().
   */
  bool blk_mq_queue_stopped(struct request_queue *q)
  {
  	struct blk_mq_hw_ctx *hctx;
  	int i;
  
  	queue_for_each_hw_ctx(q, hctx, i)
  		if (blk_mq_hctx_stopped(hctx))
  			return true;
  
  	return false;
  }
  EXPORT_SYMBOL(blk_mq_queue_stopped);
39a70c76b   Ming Lei   blk-mq: clarify d...
1474
1475
1476
  /*
   * This function is often used for pausing .queue_rq() by driver when
   * there isn't enough resource or some conditions aren't satisfied, and
4d6062193   Bart Van Assche   block: Fix two co...
1477
   * BLK_STS_RESOURCE is usually returned.
39a70c76b   Ming Lei   blk-mq: clarify d...
1478
1479
1480
1481
1482
   *
   * We do not guarantee that dispatch can be drained or blocked
   * after blk_mq_stop_hw_queue() returns. Please use
   * blk_mq_quiesce_queue() for that requirement.
   */
2719aa217   Jens Axboe   blk-mq: don't use...
1483
1484
  void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
  {
641a9ed60   Ming Lei   Revert "blk-mq: d...
1485
  	cancel_delayed_work(&hctx->run_work);
280d45f6c   Christoph Hellwig   blk-mq: add blk_m...
1486

641a9ed60   Ming Lei   Revert "blk-mq: d...
1487
  	set_bit(BLK_MQ_S_STOPPED, &hctx->state);
2719aa217   Jens Axboe   blk-mq: don't use...
1488
  }
641a9ed60   Ming Lei   Revert "blk-mq: d...
1489
  EXPORT_SYMBOL(blk_mq_stop_hw_queue);
2719aa217   Jens Axboe   blk-mq: don't use...
1490

39a70c76b   Ming Lei   blk-mq: clarify d...
1491
1492
1493
  /*
   * This function is often used for pausing .queue_rq() by driver when
   * there isn't enough resource or some conditions aren't satisfied, and
4d6062193   Bart Van Assche   block: Fix two co...
1494
   * BLK_STS_RESOURCE is usually returned.
39a70c76b   Ming Lei   blk-mq: clarify d...
1495
1496
1497
1498
1499
   *
   * We do not guarantee that dispatch can be drained or blocked
   * after blk_mq_stop_hw_queues() returns. Please use
   * blk_mq_quiesce_queue() for that requirement.
   */
2719aa217   Jens Axboe   blk-mq: don't use...
1500
1501
  void blk_mq_stop_hw_queues(struct request_queue *q)
  {
641a9ed60   Ming Lei   Revert "blk-mq: d...
1502
1503
1504
1505
1506
  	struct blk_mq_hw_ctx *hctx;
  	int i;
  
  	queue_for_each_hw_ctx(q, hctx, i)
  		blk_mq_stop_hw_queue(hctx);
280d45f6c   Christoph Hellwig   blk-mq: add blk_m...
1507
1508
  }
  EXPORT_SYMBOL(blk_mq_stop_hw_queues);
320ae51fe   Jens Axboe   blk-mq: new multi...
1509
1510
1511
  void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
  {
  	clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
e4043dcf3   Jens Axboe   blk-mq: ensure th...
1512

0ffbce80c   Jens Axboe   blk-mq: blk_mq_st...
1513
  	blk_mq_run_hw_queue(hctx, false);
320ae51fe   Jens Axboe   blk-mq: new multi...
1514
1515
  }
  EXPORT_SYMBOL(blk_mq_start_hw_queue);
2f2685565   Christoph Hellwig   blk-mq: add blk_m...
1516
1517
1518
1519
1520
1521
1522
1523
1524
  void blk_mq_start_hw_queues(struct request_queue *q)
  {
  	struct blk_mq_hw_ctx *hctx;
  	int i;
  
  	queue_for_each_hw_ctx(q, hctx, i)
  		blk_mq_start_hw_queue(hctx);
  }
  EXPORT_SYMBOL(blk_mq_start_hw_queues);
ae911c5e7   Jens Axboe   blk-mq: add blk_m...
1525
1526
1527
1528
1529
1530
1531
1532
1533
  void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
  {
  	if (!blk_mq_hctx_stopped(hctx))
  		return;
  
  	clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
  	blk_mq_run_hw_queue(hctx, async);
  }
  EXPORT_SYMBOL_GPL(blk_mq_start_stopped_hw_queue);
1b4a32585   Christoph Hellwig   blk-mq: add async...
1534
  void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
320ae51fe   Jens Axboe   blk-mq: new multi...
1535
1536
1537
  {
  	struct blk_mq_hw_ctx *hctx;
  	int i;
ae911c5e7   Jens Axboe   blk-mq: add blk_m...
1538
1539
  	queue_for_each_hw_ctx(q, hctx, i)
  		blk_mq_start_stopped_hw_queue(hctx, async);
320ae51fe   Jens Axboe   blk-mq: new multi...
1540
1541
  }
  EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
70f4db639   Christoph Hellwig   blk-mq: add blk_m...
1542
  static void blk_mq_run_work_fn(struct work_struct *work)
320ae51fe   Jens Axboe   blk-mq: new multi...
1543
1544
  {
  	struct blk_mq_hw_ctx *hctx;
9f9937379   Jens Axboe   blk-mq: unify hct...
1545
  	hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work);
320ae51fe   Jens Axboe   blk-mq: new multi...
1546

21c6e939a   Jens Axboe   blk-mq: unify hct...
1547
  	/*
15fe8a90b   Ming Lei   blk-mq: remove bl...
1548
  	 * If we are stopped, don't run the queue.
21c6e939a   Jens Axboe   blk-mq: unify hct...
1549
  	 */
15fe8a90b   Ming Lei   blk-mq: remove bl...
1550
  	if (test_bit(BLK_MQ_S_STOPPED, &hctx->state))
0196d6b40   Jianchao Wang   blk-mq: return wh...
1551
  		return;
7587a5ae7   Bart Van Assche   blk-mq: Introduce...
1552
1553
1554
  
  	__blk_mq_run_hw_queue(hctx);
  }
cfd0c552a   Ming Lei   blk-mq: mark ctx ...
1555
  static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
cfd0c552a   Ming Lei   blk-mq: mark ctx ...
1556
1557
  					    struct request *rq,
  					    bool at_head)
320ae51fe   Jens Axboe   blk-mq: new multi...
1558
  {
e57690fe0   Jens Axboe   blk-mq: don't ove...
1559
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
1560
  	enum hctx_type type = hctx->type;
e57690fe0   Jens Axboe   blk-mq: don't ove...
1561

7b6078146   Bart Van Assche   blk-mq: Document ...
1562
  	lockdep_assert_held(&ctx->lock);
01b983c9f   Jens Axboe   blk-mq: add blktr...
1563
  	trace_block_rq_insert(hctx->queue, rq);
72a0a36e2   Christoph Hellwig   blk-mq: support a...
1564
  	if (at_head)
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
1565
  		list_add(&rq->queuelist, &ctx->rq_lists[type]);
72a0a36e2   Christoph Hellwig   blk-mq: support a...
1566
  	else
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
1567
  		list_add_tail(&rq->queuelist, &ctx->rq_lists[type]);
cfd0c552a   Ming Lei   blk-mq: mark ctx ...
1568
  }
4bb659b15   Jens Axboe   blk-mq: implement...
1569

2c3ad6679   Jens Axboe   blk-mq: export so...
1570
1571
  void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
  			     bool at_head)
cfd0c552a   Ming Lei   blk-mq: mark ctx ...
1572
1573
  {
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
7b6078146   Bart Van Assche   blk-mq: Document ...
1574
  	lockdep_assert_held(&ctx->lock);
e57690fe0   Jens Axboe   blk-mq: don't ove...
1575
  	__blk_mq_insert_req_list(hctx, rq, at_head);
320ae51fe   Jens Axboe   blk-mq: new multi...
1576
  	blk_mq_hctx_mark_pending(hctx, ctx);
320ae51fe   Jens Axboe   blk-mq: new multi...
1577
  }
157f377be   Jens Axboe   block: directly i...
1578
1579
1580
1581
  /*
   * Should only be used carefully, when the caller knows we want to
   * bypass a potential IO scheduler on the target device.
   */
74c77d6a4   Ming Lei   blk-mq: insert pa...
1582
1583
  void blk_mq_request_bypass_insert(struct request *rq, bool at_head,
  				  bool run_queue)
157f377be   Jens Axboe   block: directly i...
1584
  {
ea4f995ee   Jens Axboe   blk-mq: cache req...
1585
  	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
157f377be   Jens Axboe   block: directly i...
1586
1587
  
  	spin_lock(&hctx->lock);
74c77d6a4   Ming Lei   blk-mq: insert pa...
1588
1589
1590
1591
  	if (at_head)
  		list_add(&rq->queuelist, &hctx->dispatch);
  	else
  		list_add_tail(&rq->queuelist, &hctx->dispatch);
157f377be   Jens Axboe   block: directly i...
1592
  	spin_unlock(&hctx->lock);
b0850297c   Ming Lei   block: pass 'run_...
1593
1594
  	if (run_queue)
  		blk_mq_run_hw_queue(hctx, false);
157f377be   Jens Axboe   block: directly i...
1595
  }
bd166ef18   Jens Axboe   blk-mq-sched: add...
1596
1597
  void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
  			    struct list_head *list)
320ae51fe   Jens Axboe   blk-mq: new multi...
1598
1599
  
  {
3f0cedc7e   Ming Lei   blk-mq: use list_...
1600
  	struct request *rq;
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
1601
  	enum hctx_type type = hctx->type;
3f0cedc7e   Ming Lei   blk-mq: use list_...
1602

320ae51fe   Jens Axboe   blk-mq: new multi...
1603
1604
1605
1606
  	/*
  	 * preemption doesn't flush plug list, so it's possible ctx->cpu is
  	 * offline now
  	 */
3f0cedc7e   Ming Lei   blk-mq: use list_...
1607
  	list_for_each_entry(rq, list, queuelist) {
e57690fe0   Jens Axboe   blk-mq: don't ove...
1608
  		BUG_ON(rq->mq_ctx != ctx);
3f0cedc7e   Ming Lei   blk-mq: use list_...
1609
  		trace_block_rq_insert(hctx->queue, rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
1610
  	}
3f0cedc7e   Ming Lei   blk-mq: use list_...
1611
1612
  
  	spin_lock(&ctx->lock);
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
1613
  	list_splice_tail_init(list, &ctx->rq_lists[type]);
cfd0c552a   Ming Lei   blk-mq: mark ctx ...
1614
  	blk_mq_hctx_mark_pending(hctx, ctx);
320ae51fe   Jens Axboe   blk-mq: new multi...
1615
  	spin_unlock(&ctx->lock);
320ae51fe   Jens Axboe   blk-mq: new multi...
1616
  }
3110fc796   Jens Axboe   blk-mq: improve p...
1617
  static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
320ae51fe   Jens Axboe   blk-mq: new multi...
1618
1619
1620
  {
  	struct request *rqa = container_of(a, struct request, queuelist);
  	struct request *rqb = container_of(b, struct request, queuelist);
3110fc796   Jens Axboe   blk-mq: improve p...
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
  	if (rqa->mq_ctx < rqb->mq_ctx)
  		return -1;
  	else if (rqa->mq_ctx > rqb->mq_ctx)
  		return 1;
  	else if (rqa->mq_hctx < rqb->mq_hctx)
  		return -1;
  	else if (rqa->mq_hctx > rqb->mq_hctx)
  		return 1;
  
  	return blk_rq_pos(rqa) > blk_rq_pos(rqb);
320ae51fe   Jens Axboe   blk-mq: new multi...
1631
1632
1633
1634
  }
  
  void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  {
67cae4c94   Jens Axboe   blk-mq: cleanup a...
1635
  	struct blk_mq_hw_ctx *this_hctx;
320ae51fe   Jens Axboe   blk-mq: new multi...
1636
1637
1638
1639
  	struct blk_mq_ctx *this_ctx;
  	struct request_queue *this_q;
  	struct request *rq;
  	LIST_HEAD(list);
67cae4c94   Jens Axboe   blk-mq: cleanup a...
1640
  	LIST_HEAD(rq_list);
320ae51fe   Jens Axboe   blk-mq: new multi...
1641
1642
1643
  	unsigned int depth;
  
  	list_splice_init(&plug->mq_list, &list);
ce5b009cf   Jens Axboe   block: improve lo...
1644
1645
  	if (plug->rq_count > 2 && plug->multiple_queues)
  		list_sort(NULL, &list, plug_rq_cmp);
320ae51fe   Jens Axboe   blk-mq: new multi...
1646

bcc816dfe   Dongli Zhang   blk-mq: do not re...
1647
  	plug->rq_count = 0;
320ae51fe   Jens Axboe   blk-mq: new multi...
1648
  	this_q = NULL;
67cae4c94   Jens Axboe   blk-mq: cleanup a...
1649
  	this_hctx = NULL;
320ae51fe   Jens Axboe   blk-mq: new multi...
1650
1651
1652
1653
1654
1655
1656
  	this_ctx = NULL;
  	depth = 0;
  
  	while (!list_empty(&list)) {
  		rq = list_entry_rq(list.next);
  		list_del_init(&rq->queuelist);
  		BUG_ON(!rq->q);
67cae4c94   Jens Axboe   blk-mq: cleanup a...
1657
1658
  		if (rq->mq_hctx != this_hctx || rq->mq_ctx != this_ctx) {
  			if (this_hctx) {
587562d0c   Ilya Dryomov   blk-mq: I/O and t...
1659
  				trace_block_unplug(this_q, depth, !from_schedule);
67cae4c94   Jens Axboe   blk-mq: cleanup a...
1660
1661
  				blk_mq_sched_insert_requests(this_hctx, this_ctx,
  								&rq_list,
bd166ef18   Jens Axboe   blk-mq-sched: add...
1662
  								from_schedule);
320ae51fe   Jens Axboe   blk-mq: new multi...
1663
  			}
320ae51fe   Jens Axboe   blk-mq: new multi...
1664
  			this_q = rq->q;
67cae4c94   Jens Axboe   blk-mq: cleanup a...
1665
1666
  			this_ctx = rq->mq_ctx;
  			this_hctx = rq->mq_hctx;
320ae51fe   Jens Axboe   blk-mq: new multi...
1667
1668
1669
1670
  			depth = 0;
  		}
  
  		depth++;
67cae4c94   Jens Axboe   blk-mq: cleanup a...
1671
  		list_add_tail(&rq->queuelist, &rq_list);
320ae51fe   Jens Axboe   blk-mq: new multi...
1672
1673
1674
  	}
  
  	/*
67cae4c94   Jens Axboe   blk-mq: cleanup a...
1675
1676
  	 * If 'this_hctx' is set, we know we have entries to complete
  	 * on 'rq_list'. Do those.
320ae51fe   Jens Axboe   blk-mq: new multi...
1677
  	 */
67cae4c94   Jens Axboe   blk-mq: cleanup a...
1678
  	if (this_hctx) {
587562d0c   Ilya Dryomov   blk-mq: I/O and t...
1679
  		trace_block_unplug(this_q, depth, !from_schedule);
67cae4c94   Jens Axboe   blk-mq: cleanup a...
1680
  		blk_mq_sched_insert_requests(this_hctx, this_ctx, &rq_list,
bd166ef18   Jens Axboe   blk-mq-sched: add...
1681
  						from_schedule);
320ae51fe   Jens Axboe   blk-mq: new multi...
1682
1683
  	}
  }
14ccb66b3   Christoph Hellwig   block: remove the...
1684
1685
  static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
  		unsigned int nr_segs)
320ae51fe   Jens Axboe   blk-mq: new multi...
1686
  {
f924cddeb   Christoph Hellwig   block: remove blk...
1687
1688
1689
1690
1691
  	if (bio->bi_opf & REQ_RAHEAD)
  		rq->cmd_flags |= REQ_FAILFAST_MASK;
  
  	rq->__sector = bio->bi_iter.bi_sector;
  	rq->write_hint = bio->bi_write_hint;
14ccb66b3   Christoph Hellwig   block: remove the...
1692
  	blk_rq_bio_prep(rq, bio, nr_segs);
4b570521b   Jens Axboe   blk-mq: request i...
1693

6e85eaf30   Jens Axboe   blk-mq: blk_accou...
1694
  	blk_account_io_start(rq, true);
320ae51fe   Jens Axboe   blk-mq: new multi...
1695
  }
0f95549c0   Mike Snitzer   blk-mq: factor ou...
1696
1697
  static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
  					    struct request *rq,
be94f058f   Jens Axboe   blk-mq: use bd->l...
1698
  					    blk_qc_t *cookie, bool last)
f984df1f0   Shaohua Li   blk-mq: do limite...
1699
  {
f984df1f0   Shaohua Li   blk-mq: do limite...
1700
  	struct request_queue *q = rq->q;
f984df1f0   Shaohua Li   blk-mq: do limite...
1701
1702
  	struct blk_mq_queue_data bd = {
  		.rq = rq,
be94f058f   Jens Axboe   blk-mq: use bd->l...
1703
  		.last = last,
f984df1f0   Shaohua Li   blk-mq: do limite...
1704
  	};
bd166ef18   Jens Axboe   blk-mq-sched: add...
1705
  	blk_qc_t new_cookie;
f06345add   Jens Axboe   blk-mq: fixup typ...
1706
  	blk_status_t ret;
0f95549c0   Mike Snitzer   blk-mq: factor ou...
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
  
  	new_cookie = request_to_qc_t(hctx, rq);
  
  	/*
  	 * For OK queue, we are done. For error, caller may kill it.
  	 * Any other error (busy), just add it to our list as we
  	 * previously would have done.
  	 */
  	ret = q->mq_ops->queue_rq(hctx, &bd);
  	switch (ret) {
  	case BLK_STS_OK:
6ce3dd6ee   Ming Lei   blk-mq: issue dir...
1718
  		blk_mq_update_dispatch_busy(hctx, false);
0f95549c0   Mike Snitzer   blk-mq: factor ou...
1719
1720
1721
  		*cookie = new_cookie;
  		break;
  	case BLK_STS_RESOURCE:
86ff7c2a8   Ming Lei   blk-mq: introduce...
1722
  	case BLK_STS_DEV_RESOURCE:
6ce3dd6ee   Ming Lei   blk-mq: issue dir...
1723
  		blk_mq_update_dispatch_busy(hctx, true);
0f95549c0   Mike Snitzer   blk-mq: factor ou...
1724
1725
1726
  		__blk_mq_requeue_request(rq);
  		break;
  	default:
6ce3dd6ee   Ming Lei   blk-mq: issue dir...
1727
  		blk_mq_update_dispatch_busy(hctx, false);
0f95549c0   Mike Snitzer   blk-mq: factor ou...
1728
1729
1730
1731
1732
1733
  		*cookie = BLK_QC_T_NONE;
  		break;
  	}
  
  	return ret;
  }
fd9c40f64   Bart Van Assche   block: Revert v5....
1734
  static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
0f95549c0   Mike Snitzer   blk-mq: factor ou...
1735
  						struct request *rq,
396eaf21e   Ming Lei   blk-mq: improve D...
1736
  						blk_qc_t *cookie,
fd9c40f64   Bart Van Assche   block: Revert v5....
1737
  						bool bypass_insert, bool last)
0f95549c0   Mike Snitzer   blk-mq: factor ou...
1738
1739
  {
  	struct request_queue *q = rq->q;
d964f04a8   Ming Lei   blk-mq: fix direc...
1740
  	bool run_queue = true;
23d4ee19e   Ming Lei   blk-mq: don't dis...
1741
  	/*
fd9c40f64   Bart Van Assche   block: Revert v5....
1742
  	 * RCU or SRCU read lock is needed before checking quiesced flag.
23d4ee19e   Ming Lei   blk-mq: don't dis...
1743
  	 *
fd9c40f64   Bart Van Assche   block: Revert v5....
1744
1745
1746
  	 * When queue is stopped or quiesced, ignore 'bypass_insert' from
  	 * blk_mq_request_issue_directly(), and return BLK_STS_OK to caller,
  	 * and avoid driver to try to dispatch again.
23d4ee19e   Ming Lei   blk-mq: don't dis...
1747
  	 */
fd9c40f64   Bart Van Assche   block: Revert v5....
1748
  	if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
d964f04a8   Ming Lei   blk-mq: fix direc...
1749
  		run_queue = false;
fd9c40f64   Bart Van Assche   block: Revert v5....
1750
1751
  		bypass_insert = false;
  		goto insert;
d964f04a8   Ming Lei   blk-mq: fix direc...
1752
  	}
f984df1f0   Shaohua Li   blk-mq: do limite...
1753

fd9c40f64   Bart Van Assche   block: Revert v5....
1754
1755
  	if (q->elevator && !bypass_insert)
  		goto insert;
2253efc85   Bart Van Assche   blk-mq: Move more...
1756

0bca799b9   Ming Lei   blk-mq: order get...
1757
  	if (!blk_mq_get_dispatch_budget(hctx))
fd9c40f64   Bart Van Assche   block: Revert v5....
1758
  		goto insert;
bd166ef18   Jens Axboe   blk-mq-sched: add...
1759

8ab6bb9ee   Ming Lei   blk-mq: cleanup b...
1760
  	if (!blk_mq_get_driver_tag(rq)) {
0bca799b9   Ming Lei   blk-mq: order get...
1761
  		blk_mq_put_dispatch_budget(hctx);
fd9c40f64   Bart Van Assche   block: Revert v5....
1762
  		goto insert;
88022d720   Ming Lei   blk-mq: don't han...
1763
  	}
de1482974   Ming Lei   blk-mq: introduce...
1764

fd9c40f64   Bart Van Assche   block: Revert v5....
1765
1766
1767
1768
  	return __blk_mq_issue_directly(hctx, rq, cookie, last);
  insert:
  	if (bypass_insert)
  		return BLK_STS_RESOURCE;
872a2b318   Ming Lei   blk-mq: insert re...
1769
  	blk_mq_sched_insert_request(rq, false, run_queue, false);
fd9c40f64   Bart Van Assche   block: Revert v5....
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
  	return BLK_STS_OK;
  }
  
  static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
  		struct request *rq, blk_qc_t *cookie)
  {
  	blk_status_t ret;
  	int srcu_idx;
  
  	might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
  
  	hctx_lock(hctx, &srcu_idx);
  
  	ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true);
  	if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
74c77d6a4   Ming Lei   blk-mq: insert pa...
1785
  		blk_mq_request_bypass_insert(rq, false, true);
fd9c40f64   Bart Van Assche   block: Revert v5....
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
  	else if (ret != BLK_STS_OK)
  		blk_mq_end_request(rq, ret);
  
  	hctx_unlock(hctx, srcu_idx);
  }
  
  blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
  {
  	blk_status_t ret;
  	int srcu_idx;
  	blk_qc_t unused_cookie;
  	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
  
  	hctx_lock(hctx, &srcu_idx);
  	ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true, last);
04ced159c   Jens Axboe   blk-mq: move hctx...
1801
  	hctx_unlock(hctx, srcu_idx);
7f556a44e   Jianchao Wang   blk-mq: refactor ...
1802
1803
  
  	return ret;
5eb6126e1   Christoph Hellwig   blk-mq: improve b...
1804
  }
6ce3dd6ee   Ming Lei   blk-mq: issue dir...
1805
1806
1807
1808
  void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
  		struct list_head *list)
  {
  	while (!list_empty(list)) {
fd9c40f64   Bart Van Assche   block: Revert v5....
1809
  		blk_status_t ret;
6ce3dd6ee   Ming Lei   blk-mq: issue dir...
1810
1811
1812
1813
  		struct request *rq = list_first_entry(list, struct request,
  				queuelist);
  
  		list_del_init(&rq->queuelist);
fd9c40f64   Bart Van Assche   block: Revert v5....
1814
1815
1816
1817
  		ret = blk_mq_request_issue_directly(rq, list_empty(list));
  		if (ret != BLK_STS_OK) {
  			if (ret == BLK_STS_RESOURCE ||
  					ret == BLK_STS_DEV_RESOURCE) {
74c77d6a4   Ming Lei   blk-mq: insert pa...
1818
  				blk_mq_request_bypass_insert(rq, false,
c616cbee9   Jens Axboe   blk-mq: punt fail...
1819
  							list_empty(list));
fd9c40f64   Bart Van Assche   block: Revert v5....
1820
1821
1822
1823
  				break;
  			}
  			blk_mq_end_request(rq, ret);
  		}
6ce3dd6ee   Ming Lei   blk-mq: issue dir...
1824
  	}
d666ba98f   Jens Axboe   blk-mq: add mq_op...
1825
1826
1827
1828
1829
1830
  
  	/*
  	 * If we didn't flush the entire list, we could have told
  	 * the driver there was more coming, but that turned out to
  	 * be a lie.
  	 */
fd9c40f64   Bart Van Assche   block: Revert v5....
1831
  	if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs)
d666ba98f   Jens Axboe   blk-mq: add mq_op...
1832
  		hctx->queue->mq_ops->commit_rqs(hctx);
6ce3dd6ee   Ming Lei   blk-mq: issue dir...
1833
  }
ce5b009cf   Jens Axboe   block: improve lo...
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
  static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
  {
  	list_add_tail(&rq->queuelist, &plug->mq_list);
  	plug->rq_count++;
  	if (!plug->multiple_queues && !list_is_singular(&plug->mq_list)) {
  		struct request *tmp;
  
  		tmp = list_first_entry(&plug->mq_list, struct request,
  						queuelist);
  		if (tmp->q != rq->q)
  			plug->multiple_queues = true;
  	}
  }
dece16353   Jens Axboe   block: change ->m...
1847
  static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
07068d5b8   Jens Axboe   blk-mq: split mak...
1848
  {
ef295ecf0   Christoph Hellwig   block: better op ...
1849
  	const int is_sync = op_is_sync(bio->bi_opf);
f73f44eb0   Christoph Hellwig   block: add a op_i...
1850
  	const int is_flush_fua = op_is_flush(bio->bi_opf);
7809167da   Ming Lei   block: don't lose...
1851
  	struct blk_mq_alloc_data data = { .flags = 0};
07068d5b8   Jens Axboe   blk-mq: split mak...
1852
  	struct request *rq;
f984df1f0   Shaohua Li   blk-mq: do limite...
1853
  	struct blk_plug *plug;
5b3f341f0   Shaohua Li   blk-mq: make plug...
1854
  	struct request *same_queue_rq = NULL;
14ccb66b3   Christoph Hellwig   block: remove the...
1855
  	unsigned int nr_segs;
7b371636f   Jens Axboe   blk-mq: return ta...
1856
  	blk_qc_t cookie;
07068d5b8   Jens Axboe   blk-mq: split mak...
1857
1858
  
  	blk_queue_bounce(q, &bio);
14ccb66b3   Christoph Hellwig   block: remove the...
1859
  	__blk_queue_split(q, &bio, &nr_segs);
f36ea50ca   Wen Xiong   blk-mq: NVMe 512B...
1860

e23947bd7   Dmitry Monakhov   bio-integrity: fo...
1861
  	if (!bio_integrity_prep(bio))
dece16353   Jens Axboe   block: change ->m...
1862
  		return BLK_QC_T_NONE;
07068d5b8   Jens Axboe   blk-mq: split mak...
1863

87c279e61   Omar Sandoval   blk-mq: really fi...
1864
  	if (!is_flush_fua && !blk_queue_nomerges(q) &&
14ccb66b3   Christoph Hellwig   block: remove the...
1865
  	    blk_attempt_plug_merge(q, bio, nr_segs, &same_queue_rq))
87c279e61   Omar Sandoval   blk-mq: really fi...
1866
  		return BLK_QC_T_NONE;
f984df1f0   Shaohua Li   blk-mq: do limite...
1867

14ccb66b3   Christoph Hellwig   block: remove the...
1868
  	if (blk_mq_sched_bio_merge(q, bio, nr_segs))
bd166ef18   Jens Axboe   blk-mq-sched: add...
1869
  		return BLK_QC_T_NONE;
d53375608   Christoph Hellwig   block: remove the...
1870
  	rq_qos_throttle(q, bio);
87760e5ee   Jens Axboe   block: hook up wr...
1871

7809167da   Ming Lei   block: don't lose...
1872
  	data.cmd_flags = bio->bi_opf;
f9afca4d3   Jens Axboe   blk-mq: pass in r...
1873
  	rq = blk_mq_get_request(q, bio, &data);
87760e5ee   Jens Axboe   block: hook up wr...
1874
  	if (unlikely(!rq)) {
c1c80384c   Josef Bacik   block: remove ext...
1875
  		rq_qos_cleanup(q, bio);
7b6620d7d   Jens Axboe   block: remove REQ...
1876
  		if (bio->bi_opf & REQ_NOWAIT)
03a07c92a   Goldwyn Rodrigues   block: return on ...
1877
  			bio_wouldblock_error(bio);
7b6620d7d   Jens Axboe   block: remove REQ...
1878
  		return BLK_QC_T_NONE;
87760e5ee   Jens Axboe   block: hook up wr...
1879
  	}
d6f1dda27   Xiaoguang Wang   blk-mq: place tra...
1880
  	trace_block_getrq(q, bio, bio->bi_opf);
c1c80384c   Josef Bacik   block: remove ext...
1881
  	rq_qos_track(q, rq, bio);
07068d5b8   Jens Axboe   blk-mq: split mak...
1882

fd2d33267   Jens Axboe   blk-mq: add suppo...
1883
  	cookie = request_to_qc_t(data.hctx, rq);
07068d5b8   Jens Axboe   blk-mq: split mak...
1884

970d168de   Bart Van Assche   blk-mq: simplify ...
1885
  	blk_mq_bio_to_request(rq, bio, nr_segs);
b49773e7b   Damien Le Moal   block: Disable wr...
1886
  	plug = blk_mq_plug(q, bio);
07068d5b8   Jens Axboe   blk-mq: split mak...
1887
  	if (unlikely(is_flush_fua)) {
923218f61   Ming Lei   blk-mq: don't all...
1888
1889
1890
  		/* bypass scheduler for flush rq */
  		blk_insert_flush(rq);
  		blk_mq_run_hw_queue(data.hctx, true);
3154df262   Ming Lei   blk-mq: apply nor...
1891
1892
  	} else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs ||
  				!blk_queue_nonrot(q))) {
b2c5d16b7   Jens Axboe   blk-mq: use plug ...
1893
1894
1895
  		/*
  		 * Use plugging if we have a ->commit_rqs() hook as well, as
  		 * we know the driver uses bd->last in a smart fashion.
3154df262   Ming Lei   blk-mq: apply nor...
1896
1897
1898
  		 *
  		 * Use normal plugging if this disk is slow HDD, as sequential
  		 * IO may benefit a lot from plug merging.
b2c5d16b7   Jens Axboe   blk-mq: use plug ...
1899
  		 */
5f0ed774e   Jens Axboe   block: sum reques...
1900
  		unsigned int request_count = plug->rq_count;
600271d90   Shaohua Li   blk-mq: immediate...
1901
  		struct request *last = NULL;
676d06077   Ming Lei   blk-mq: fix for t...
1902
  		if (!request_count)
e6c4438ba   Jeff Moyer   blk-mq: fix plugg...
1903
  			trace_block_plug(q);
600271d90   Shaohua Li   blk-mq: immediate...
1904
1905
  		else
  			last = list_entry_rq(plug->mq_list.prev);
b094f89ca   Jens Axboe   blk-mq: fix calli...
1906

600271d90   Shaohua Li   blk-mq: immediate...
1907
1908
  		if (request_count >= BLK_MAX_REQUEST_COUNT || (last &&
  		    blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
e6c4438ba   Jeff Moyer   blk-mq: fix plugg...
1909
1910
  			blk_flush_plug_list(plug, false);
  			trace_block_plug(q);
320ae51fe   Jens Axboe   blk-mq: new multi...
1911
  		}
b094f89ca   Jens Axboe   blk-mq: fix calli...
1912

ce5b009cf   Jens Axboe   block: improve lo...
1913
  		blk_add_rq_to_plug(plug, rq);
a12de1d42   Ming Lei   blk-mq: honor IO ...
1914
1915
  	} else if (q->elevator) {
  		blk_mq_sched_insert_request(rq, false, true, true);
2299722c4   Christoph Hellwig   blk-mq: split the...
1916
  	} else if (plug && !blk_queue_nomerges(q)) {
07068d5b8   Jens Axboe   blk-mq: split mak...
1917
  		/*
6a83e74d2   Bart Van Assche   blk-mq: Introduce...
1918
  		 * We do limited plugging. If the bio can be merged, do that.
f984df1f0   Shaohua Li   blk-mq: do limite...
1919
1920
  		 * Otherwise the existing request in the plug list will be
  		 * issued. So the plug list will have one request at most
2299722c4   Christoph Hellwig   blk-mq: split the...
1921
1922
  		 * The plug list might get flushed before this. If that happens,
  		 * the plug list is empty, and same_queue_rq is invalid.
07068d5b8   Jens Axboe   blk-mq: split mak...
1923
  		 */
2299722c4   Christoph Hellwig   blk-mq: split the...
1924
1925
  		if (list_empty(&plug->mq_list))
  			same_queue_rq = NULL;
4711b5731   Jens Axboe   blk-mq: fix failu...
1926
  		if (same_queue_rq) {
2299722c4   Christoph Hellwig   blk-mq: split the...
1927
  			list_del_init(&same_queue_rq->queuelist);
4711b5731   Jens Axboe   blk-mq: fix failu...
1928
1929
  			plug->rq_count--;
  		}
ce5b009cf   Jens Axboe   block: improve lo...
1930
  		blk_add_rq_to_plug(plug, rq);
ff3b74b8e   Yufen Yu   blk-mq: add trace...
1931
  		trace_block_plug(q);
2299722c4   Christoph Hellwig   blk-mq: split the...
1932

dad7a3be4   Ming Lei   blk-mq: pass corr...
1933
  		if (same_queue_rq) {
ea4f995ee   Jens Axboe   blk-mq: cache req...
1934
  			data.hctx = same_queue_rq->mq_hctx;
ff3b74b8e   Yufen Yu   blk-mq: add trace...
1935
  			trace_block_unplug(q, 1, true);
2299722c4   Christoph Hellwig   blk-mq: split the...
1936
  			blk_mq_try_issue_directly(data.hctx, same_queue_rq,
fd9c40f64   Bart Van Assche   block: Revert v5....
1937
  					&cookie);
dad7a3be4   Ming Lei   blk-mq: pass corr...
1938
  		}
a12de1d42   Ming Lei   blk-mq: honor IO ...
1939
1940
  	} else if ((q->nr_hw_queues > 1 && is_sync) ||
  			!data.hctx->dispatch_busy) {
fd9c40f64   Bart Van Assche   block: Revert v5....
1941
  		blk_mq_try_issue_directly(data.hctx, rq, &cookie);
ab42f35d9   Ming Lei   blk-mq: merge bio...
1942
  	} else {
8fa9f5564   huhai   blk-mq: remove re...
1943
  		blk_mq_sched_insert_request(rq, false, true, true);
ab42f35d9   Ming Lei   blk-mq: merge bio...
1944
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
1945

7b371636f   Jens Axboe   blk-mq: return ta...
1946
  	return cookie;
320ae51fe   Jens Axboe   blk-mq: new multi...
1947
  }
cc71a6f43   Jens Axboe   blk-mq: abstract ...
1948
1949
  void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
  		     unsigned int hctx_idx)
95363efde   Jens Axboe   blk-mq: allow blk...
1950
  {
e9b267d91   Christoph Hellwig   blk-mq: add ->ini...
1951
  	struct page *page;
320ae51fe   Jens Axboe   blk-mq: new multi...
1952

24d2f9030   Christoph Hellwig   blk-mq: split out...
1953
  	if (tags->rqs && set->ops->exit_request) {
e9b267d91   Christoph Hellwig   blk-mq: add ->ini...
1954
  		int i;
320ae51fe   Jens Axboe   blk-mq: new multi...
1955

24d2f9030   Christoph Hellwig   blk-mq: split out...
1956
  		for (i = 0; i < tags->nr_tags; i++) {
2af8cbe30   Jens Axboe   blk-mq: split tag...
1957
1958
1959
  			struct request *rq = tags->static_rqs[i];
  
  			if (!rq)
e9b267d91   Christoph Hellwig   blk-mq: add ->ini...
1960
  				continue;
d6296d39e   Christoph Hellwig   blk-mq: update ->...
1961
  			set->ops->exit_request(set, rq, hctx_idx);
2af8cbe30   Jens Axboe   blk-mq: split tag...
1962
  			tags->static_rqs[i] = NULL;
e9b267d91   Christoph Hellwig   blk-mq: add ->ini...
1963
  		}
320ae51fe   Jens Axboe   blk-mq: new multi...
1964
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
1965

24d2f9030   Christoph Hellwig   blk-mq: split out...
1966
1967
  	while (!list_empty(&tags->page_list)) {
  		page = list_first_entry(&tags->page_list, struct page, lru);
6753471c0   Dave Hansen   blk-mq: uses page...
1968
  		list_del_init(&page->lru);
f75782e4e   Catalin Marinas   block: kmemleak: ...
1969
1970
  		/*
  		 * Remove kmemleak object previously allocated in
273938bf7   Raul E Rangel   block: fix functi...
1971
  		 * blk_mq_alloc_rqs().
f75782e4e   Catalin Marinas   block: kmemleak: ...
1972
1973
  		 */
  		kmemleak_free(page_address(page));
320ae51fe   Jens Axboe   blk-mq: new multi...
1974
1975
  		__free_pages(page, page->private);
  	}
cc71a6f43   Jens Axboe   blk-mq: abstract ...
1976
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
1977

cc71a6f43   Jens Axboe   blk-mq: abstract ...
1978
1979
  void blk_mq_free_rq_map(struct blk_mq_tags *tags)
  {
24d2f9030   Christoph Hellwig   blk-mq: split out...
1980
  	kfree(tags->rqs);
cc71a6f43   Jens Axboe   blk-mq: abstract ...
1981
  	tags->rqs = NULL;
2af8cbe30   Jens Axboe   blk-mq: split tag...
1982
1983
  	kfree(tags->static_rqs);
  	tags->static_rqs = NULL;
320ae51fe   Jens Axboe   blk-mq: new multi...
1984

24d2f9030   Christoph Hellwig   blk-mq: split out...
1985
  	blk_mq_free_tags(tags);
320ae51fe   Jens Axboe   blk-mq: new multi...
1986
  }
cc71a6f43   Jens Axboe   blk-mq: abstract ...
1987
1988
1989
1990
  struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
  					unsigned int hctx_idx,
  					unsigned int nr_tags,
  					unsigned int reserved_tags)
320ae51fe   Jens Axboe   blk-mq: new multi...
1991
  {
24d2f9030   Christoph Hellwig   blk-mq: split out...
1992
  	struct blk_mq_tags *tags;
59f082e46   Shaohua Li   blk-mq: allocate ...
1993
  	int node;
320ae51fe   Jens Axboe   blk-mq: new multi...
1994

7d76f8562   Dongli Zhang   blk-mq: use HCTX_...
1995
  	node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], hctx_idx);
59f082e46   Shaohua Li   blk-mq: allocate ...
1996
1997
1998
1999
  	if (node == NUMA_NO_NODE)
  		node = set->numa_node;
  
  	tags = blk_mq_init_tags(nr_tags, reserved_tags, node,
24391c0dc   Shaohua Li   blk-mq: add tag a...
2000
  				BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
24d2f9030   Christoph Hellwig   blk-mq: split out...
2001
2002
  	if (!tags)
  		return NULL;
320ae51fe   Jens Axboe   blk-mq: new multi...
2003

590b5b7d8   Kees Cook   treewide: kzalloc...
2004
  	tags->rqs = kcalloc_node(nr_tags, sizeof(struct request *),
36e1f3d10   Gabriel Krisman Bertazi   blk-mq: Avoid mem...
2005
  				 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
59f082e46   Shaohua Li   blk-mq: allocate ...
2006
  				 node);
24d2f9030   Christoph Hellwig   blk-mq: split out...
2007
2008
2009
2010
  	if (!tags->rqs) {
  		blk_mq_free_tags(tags);
  		return NULL;
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
2011

590b5b7d8   Kees Cook   treewide: kzalloc...
2012
2013
2014
  	tags->static_rqs = kcalloc_node(nr_tags, sizeof(struct request *),
  					GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
  					node);
2af8cbe30   Jens Axboe   blk-mq: split tag...
2015
2016
2017
2018
2019
  	if (!tags->static_rqs) {
  		kfree(tags->rqs);
  		blk_mq_free_tags(tags);
  		return NULL;
  	}
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2020
2021
2022
2023
2024
2025
2026
  	return tags;
  }
  
  static size_t order_to_size(unsigned int order)
  {
  	return (size_t)PAGE_SIZE << order;
  }
1d9bd5161   Tejun Heo   blk-mq: replace t...
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
  static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
  			       unsigned int hctx_idx, int node)
  {
  	int ret;
  
  	if (set->ops->init_request) {
  		ret = set->ops->init_request(set, rq, hctx_idx, node);
  		if (ret)
  			return ret;
  	}
12f5b9314   Keith Busch   blk-mq: Remove ge...
2037
  	WRITE_ONCE(rq->state, MQ_RQ_IDLE);
1d9bd5161   Tejun Heo   blk-mq: replace t...
2038
2039
  	return 0;
  }
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2040
2041
2042
2043
2044
  int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
  		     unsigned int hctx_idx, unsigned int depth)
  {
  	unsigned int i, j, entries_per_page, max_order = 4;
  	size_t rq_size, left;
59f082e46   Shaohua Li   blk-mq: allocate ...
2045
  	int node;
7d76f8562   Dongli Zhang   blk-mq: use HCTX_...
2046
  	node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], hctx_idx);
59f082e46   Shaohua Li   blk-mq: allocate ...
2047
2048
  	if (node == NUMA_NO_NODE)
  		node = set->numa_node;
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2049
2050
  
  	INIT_LIST_HEAD(&tags->page_list);
320ae51fe   Jens Axboe   blk-mq: new multi...
2051
2052
2053
2054
  	/*
  	 * rq_size is the size of the request plus driver payload, rounded
  	 * to the cacheline size
  	 */
24d2f9030   Christoph Hellwig   blk-mq: split out...
2055
  	rq_size = round_up(sizeof(struct request) + set->cmd_size,
320ae51fe   Jens Axboe   blk-mq: new multi...
2056
  				cache_line_size());
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2057
  	left = rq_size * depth;
320ae51fe   Jens Axboe   blk-mq: new multi...
2058

cc71a6f43   Jens Axboe   blk-mq: abstract ...
2059
  	for (i = 0; i < depth; ) {
320ae51fe   Jens Axboe   blk-mq: new multi...
2060
2061
2062
2063
  		int this_order = max_order;
  		struct page *page;
  		int to_do;
  		void *p;
b3a834b15   Bartlomiej Zolnierkiewicz   blk-mq: fix undef...
2064
  		while (this_order && left < order_to_size(this_order - 1))
320ae51fe   Jens Axboe   blk-mq: new multi...
2065
2066
2067
  			this_order--;
  
  		do {
59f082e46   Shaohua Li   blk-mq: allocate ...
2068
  			page = alloc_pages_node(node,
36e1f3d10   Gabriel Krisman Bertazi   blk-mq: Avoid mem...
2069
  				GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
a51644054   Jens Axboe   blk-mq: scale dep...
2070
  				this_order);
320ae51fe   Jens Axboe   blk-mq: new multi...
2071
2072
2073
2074
2075
2076
2077
2078
2079
  			if (page)
  				break;
  			if (!this_order--)
  				break;
  			if (order_to_size(this_order) < rq_size)
  				break;
  		} while (1);
  
  		if (!page)
24d2f9030   Christoph Hellwig   blk-mq: split out...
2080
  			goto fail;
320ae51fe   Jens Axboe   blk-mq: new multi...
2081
2082
  
  		page->private = this_order;
24d2f9030   Christoph Hellwig   blk-mq: split out...
2083
  		list_add_tail(&page->lru, &tags->page_list);
320ae51fe   Jens Axboe   blk-mq: new multi...
2084
2085
  
  		p = page_address(page);
f75782e4e   Catalin Marinas   block: kmemleak: ...
2086
2087
2088
2089
  		/*
  		 * Allow kmemleak to scan these pages as they contain pointers
  		 * to additional allocations like via ops->init_request().
  		 */
36e1f3d10   Gabriel Krisman Bertazi   blk-mq: Avoid mem...
2090
  		kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO);
320ae51fe   Jens Axboe   blk-mq: new multi...
2091
  		entries_per_page = order_to_size(this_order) / rq_size;
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2092
  		to_do = min(entries_per_page, depth - i);
320ae51fe   Jens Axboe   blk-mq: new multi...
2093
2094
  		left -= to_do * rq_size;
  		for (j = 0; j < to_do; j++) {
2af8cbe30   Jens Axboe   blk-mq: split tag...
2095
2096
2097
  			struct request *rq = p;
  
  			tags->static_rqs[i] = rq;
1d9bd5161   Tejun Heo   blk-mq: replace t...
2098
2099
2100
  			if (blk_mq_init_request(set, rq, hctx_idx, node)) {
  				tags->static_rqs[i] = NULL;
  				goto fail;
e9b267d91   Christoph Hellwig   blk-mq: add ->ini...
2101
  			}
320ae51fe   Jens Axboe   blk-mq: new multi...
2102
2103
2104
2105
  			p += rq_size;
  			i++;
  		}
  	}
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2106
  	return 0;
320ae51fe   Jens Axboe   blk-mq: new multi...
2107

24d2f9030   Christoph Hellwig   blk-mq: split out...
2108
  fail:
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2109
2110
  	blk_mq_free_rqs(set, tags, hctx_idx);
  	return -ENOMEM;
320ae51fe   Jens Axboe   blk-mq: new multi...
2111
  }
e57690fe0   Jens Axboe   blk-mq: don't ove...
2112
2113
2114
2115
2116
  /*
   * 'cpu' is going away. splice any existing rq_list entries from this
   * software queue to the hw queue dispatch list, and ensure that it
   * gets run.
   */
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
2117
  static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
484b4061e   Jens Axboe   blk-mq: save memo...
2118
  {
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
2119
  	struct blk_mq_hw_ctx *hctx;
484b4061e   Jens Axboe   blk-mq: save memo...
2120
2121
  	struct blk_mq_ctx *ctx;
  	LIST_HEAD(tmp);
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
2122
  	enum hctx_type type;
484b4061e   Jens Axboe   blk-mq: save memo...
2123

9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
2124
  	hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead);
e57690fe0   Jens Axboe   blk-mq: don't ove...
2125
  	ctx = __blk_mq_get_ctx(hctx->queue, cpu);
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
2126
  	type = hctx->type;
484b4061e   Jens Axboe   blk-mq: save memo...
2127
2128
  
  	spin_lock(&ctx->lock);
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
2129
2130
  	if (!list_empty(&ctx->rq_lists[type])) {
  		list_splice_init(&ctx->rq_lists[type], &tmp);
484b4061e   Jens Axboe   blk-mq: save memo...
2131
2132
2133
2134
2135
  		blk_mq_hctx_clear_pending(hctx, ctx);
  	}
  	spin_unlock(&ctx->lock);
  
  	if (list_empty(&tmp))
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
2136
  		return 0;
484b4061e   Jens Axboe   blk-mq: save memo...
2137

e57690fe0   Jens Axboe   blk-mq: don't ove...
2138
2139
2140
  	spin_lock(&hctx->lock);
  	list_splice_tail_init(&tmp, &hctx->dispatch);
  	spin_unlock(&hctx->lock);
484b4061e   Jens Axboe   blk-mq: save memo...
2141
2142
  
  	blk_mq_run_hw_queue(hctx, true);
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
2143
  	return 0;
484b4061e   Jens Axboe   blk-mq: save memo...
2144
  }
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
2145
  static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
484b4061e   Jens Axboe   blk-mq: save memo...
2146
  {
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
2147
2148
  	cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD,
  					    &hctx->cpuhp_dead);
484b4061e   Jens Axboe   blk-mq: save memo...
2149
  }
c3b4afca7   Ming Lei   blk-mq: free hctx...
2150
  /* hctx->ctxs will be freed in queue's release handler */
08e98fc60   Ming Lei   blk-mq: handle fa...
2151
2152
2153
2154
  static void blk_mq_exit_hctx(struct request_queue *q,
  		struct blk_mq_tag_set *set,
  		struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
  {
8ab0b7dc7   Ming Lei   blk-mq: fix kerne...
2155
2156
  	if (blk_mq_hw_queue_mapped(hctx))
  		blk_mq_tag_idle(hctx);
08e98fc60   Ming Lei   blk-mq: handle fa...
2157

f70ced091   Ming Lei   blk-mq: support p...
2158
  	if (set->ops->exit_request)
d6296d39e   Christoph Hellwig   blk-mq: update ->...
2159
  		set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);
f70ced091   Ming Lei   blk-mq: support p...
2160

08e98fc60   Ming Lei   blk-mq: handle fa...
2161
2162
  	if (set->ops->exit_hctx)
  		set->ops->exit_hctx(hctx, hctx_idx);
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
2163
  	blk_mq_remove_cpuhp(hctx);
2f8f1336a   Ming Lei   blk-mq: always fr...
2164
2165
2166
2167
  
  	spin_lock(&q->unused_hctx_lock);
  	list_add(&hctx->hctx_list, &q->unused_hctx_list);
  	spin_unlock(&q->unused_hctx_lock);
08e98fc60   Ming Lei   blk-mq: handle fa...
2168
  }
624dbe475   Ming Lei   blk-mq: avoid cod...
2169
2170
2171
2172
2173
2174
2175
2176
2177
  static void blk_mq_exit_hw_queues(struct request_queue *q,
  		struct blk_mq_tag_set *set, int nr_queue)
  {
  	struct blk_mq_hw_ctx *hctx;
  	unsigned int i;
  
  	queue_for_each_hw_ctx(q, hctx, i) {
  		if (i == nr_queue)
  			break;
477e19ded   Jianchao Wang   blk-mq: adjust de...
2178
  		blk_mq_debugfs_unregister_hctx(hctx);
08e98fc60   Ming Lei   blk-mq: handle fa...
2179
  		blk_mq_exit_hctx(q, set, hctx, i);
624dbe475   Ming Lei   blk-mq: avoid cod...
2180
  	}
624dbe475   Ming Lei   blk-mq: avoid cod...
2181
  }
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
  static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
  {
  	int hw_ctx_size = sizeof(struct blk_mq_hw_ctx);
  
  	BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, srcu),
  			   __alignof__(struct blk_mq_hw_ctx)) !=
  		     sizeof(struct blk_mq_hw_ctx));
  
  	if (tag_set->flags & BLK_MQ_F_BLOCKING)
  		hw_ctx_size += sizeof(struct srcu_struct);
  
  	return hw_ctx_size;
  }
08e98fc60   Ming Lei   blk-mq: handle fa...
2195
2196
2197
  static int blk_mq_init_hctx(struct request_queue *q,
  		struct blk_mq_tag_set *set,
  		struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
320ae51fe   Jens Axboe   blk-mq: new multi...
2198
  {
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2199
2200
2201
2202
2203
2204
2205
2206
2207
  	hctx->queue_num = hctx_idx;
  
  	cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
  
  	hctx->tags = set->tags[hctx_idx];
  
  	if (set->ops->init_hctx &&
  	    set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
  		goto unregister_cpu_notifier;
08e98fc60   Ming Lei   blk-mq: handle fa...
2208

7c6c5b7c9   Ming Lei   blk-mq: split blk...
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
  	if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx,
  				hctx->numa_node))
  		goto exit_hctx;
  	return 0;
  
   exit_hctx:
  	if (set->ops->exit_hctx)
  		set->ops->exit_hctx(hctx, hctx_idx);
   unregister_cpu_notifier:
  	blk_mq_remove_cpuhp(hctx);
  	return -1;
  }
  
  static struct blk_mq_hw_ctx *
  blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
  		int node)
  {
  	struct blk_mq_hw_ctx *hctx;
  	gfp_t gfp = GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY;
  
  	hctx = kzalloc_node(blk_mq_hw_ctx_size(set), gfp, node);
  	if (!hctx)
  		goto fail_alloc_hctx;
  
  	if (!zalloc_cpumask_var_node(&hctx->cpumask, gfp, node))
  		goto free_hctx;
  
  	atomic_set(&hctx->nr_active, 0);
08e98fc60   Ming Lei   blk-mq: handle fa...
2237
  	if (node == NUMA_NO_NODE)
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2238
2239
  		node = set->numa_node;
  	hctx->numa_node = node;
08e98fc60   Ming Lei   blk-mq: handle fa...
2240

9f9937379   Jens Axboe   blk-mq: unify hct...
2241
  	INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
08e98fc60   Ming Lei   blk-mq: handle fa...
2242
2243
2244
  	spin_lock_init(&hctx->lock);
  	INIT_LIST_HEAD(&hctx->dispatch);
  	hctx->queue = q;
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2245
  	hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;
08e98fc60   Ming Lei   blk-mq: handle fa...
2246

2f8f1336a   Ming Lei   blk-mq: always fr...
2247
  	INIT_LIST_HEAD(&hctx->hctx_list);
320ae51fe   Jens Axboe   blk-mq: new multi...
2248
  	/*
08e98fc60   Ming Lei   blk-mq: handle fa...
2249
2250
  	 * Allocate space for all possible cpus to avoid allocation at
  	 * runtime
320ae51fe   Jens Axboe   blk-mq: new multi...
2251
  	 */
d904bfa79   Johannes Thumshirn   block/blk-mq.c: u...
2252
  	hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *),
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2253
  			gfp, node);
08e98fc60   Ming Lei   blk-mq: handle fa...
2254
  	if (!hctx->ctxs)
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2255
  		goto free_cpumask;
320ae51fe   Jens Axboe   blk-mq: new multi...
2256

5b202853f   Jianchao Wang   blk-mq: change gf...
2257
  	if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8),
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2258
  				gfp, node))
08e98fc60   Ming Lei   blk-mq: handle fa...
2259
  		goto free_ctxs;
08e98fc60   Ming Lei   blk-mq: handle fa...
2260
  	hctx->nr_ctx = 0;
320ae51fe   Jens Axboe   blk-mq: new multi...
2261

5815839b3   Ming Lei   blk-mq: introduce...
2262
  	spin_lock_init(&hctx->dispatch_wait_lock);
eb619fdb2   Jens Axboe   blk-mq: fix issue...
2263
2264
  	init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
  	INIT_LIST_HEAD(&hctx->dispatch_wait.entry);
5b202853f   Jianchao Wang   blk-mq: change gf...
2265
  	hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size,
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2266
  			gfp);
f70ced091   Ming Lei   blk-mq: support p...
2267
  	if (!hctx->fq)
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2268
  		goto free_bitmap;
320ae51fe   Jens Axboe   blk-mq: new multi...
2269

6a83e74d2   Bart Van Assche   blk-mq: Introduce...
2270
  	if (hctx->flags & BLK_MQ_F_BLOCKING)
05707b64a   Tejun Heo   blk-mq: rename bl...
2271
  		init_srcu_struct(hctx->srcu);
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2272
  	blk_mq_hctx_kobj_init(hctx);
6a83e74d2   Bart Van Assche   blk-mq: Introduce...
2273

7c6c5b7c9   Ming Lei   blk-mq: split blk...
2274
  	return hctx;
320ae51fe   Jens Axboe   blk-mq: new multi...
2275

08e98fc60   Ming Lei   blk-mq: handle fa...
2276
   free_bitmap:
88459642c   Omar Sandoval   blk-mq: abstract ...
2277
  	sbitmap_free(&hctx->ctx_map);
08e98fc60   Ming Lei   blk-mq: handle fa...
2278
2279
   free_ctxs:
  	kfree(hctx->ctxs);
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2280
2281
2282
2283
2284
2285
   free_cpumask:
  	free_cpumask_var(hctx->cpumask);
   free_hctx:
  	kfree(hctx);
   fail_alloc_hctx:
  	return NULL;
08e98fc60   Ming Lei   blk-mq: handle fa...
2286
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
2287

320ae51fe   Jens Axboe   blk-mq: new multi...
2288
2289
2290
  static void blk_mq_init_cpu_queues(struct request_queue *q,
  				   unsigned int nr_hw_queues)
  {
b3c661b15   Jens Axboe   blk-mq: support m...
2291
2292
  	struct blk_mq_tag_set *set = q->tag_set;
  	unsigned int i, j;
320ae51fe   Jens Axboe   blk-mq: new multi...
2293
2294
2295
2296
  
  	for_each_possible_cpu(i) {
  		struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
  		struct blk_mq_hw_ctx *hctx;
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
2297
  		int k;
320ae51fe   Jens Axboe   blk-mq: new multi...
2298

320ae51fe   Jens Axboe   blk-mq: new multi...
2299
2300
  		__ctx->cpu = i;
  		spin_lock_init(&__ctx->lock);
c16d6b5a9   Ming Lei   blk-mq: fix dispa...
2301
2302
  		for (k = HCTX_TYPE_DEFAULT; k < HCTX_MAX_TYPES; k++)
  			INIT_LIST_HEAD(&__ctx->rq_lists[k]);
320ae51fe   Jens Axboe   blk-mq: new multi...
2303
  		__ctx->queue = q;
320ae51fe   Jens Axboe   blk-mq: new multi...
2304
2305
2306
2307
  		/*
  		 * Set local node, IFF we have more than one hw queue. If
  		 * not, we remain on the home node of the device
  		 */
b3c661b15   Jens Axboe   blk-mq: support m...
2308
2309
2310
2311
2312
  		for (j = 0; j < set->nr_maps; j++) {
  			hctx = blk_mq_map_queue_type(q, j, i);
  			if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
  				hctx->numa_node = local_memory_node(cpu_to_node(i));
  		}
320ae51fe   Jens Axboe   blk-mq: new multi...
2313
2314
  	}
  }
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
  static bool __blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, int hctx_idx)
  {
  	int ret = 0;
  
  	set->tags[hctx_idx] = blk_mq_alloc_rq_map(set, hctx_idx,
  					set->queue_depth, set->reserved_tags);
  	if (!set->tags[hctx_idx])
  		return false;
  
  	ret = blk_mq_alloc_rqs(set, set->tags[hctx_idx], hctx_idx,
  				set->queue_depth);
  	if (!ret)
  		return true;
  
  	blk_mq_free_rq_map(set->tags[hctx_idx]);
  	set->tags[hctx_idx] = NULL;
  	return false;
  }
  
  static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set,
  					 unsigned int hctx_idx)
  {
4e6db0f21   Dan Carpenter   blk-mq: Add a NUL...
2337
  	if (set->tags && set->tags[hctx_idx]) {
bd166ef18   Jens Axboe   blk-mq-sched: add...
2338
2339
2340
2341
  		blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx);
  		blk_mq_free_rq_map(set->tags[hctx_idx]);
  		set->tags[hctx_idx] = NULL;
  	}
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2342
  }
4b855ad37   Christoph Hellwig   blk-mq: Create hc...
2343
  static void blk_mq_map_swqueue(struct request_queue *q)
320ae51fe   Jens Axboe   blk-mq: new multi...
2344
  {
b3c661b15   Jens Axboe   blk-mq: support m...
2345
  	unsigned int i, j, hctx_idx;
320ae51fe   Jens Axboe   blk-mq: new multi...
2346
2347
  	struct blk_mq_hw_ctx *hctx;
  	struct blk_mq_ctx *ctx;
2a34c0872   Ming Lei   blk-mq: fix CPU h...
2348
  	struct blk_mq_tag_set *set = q->tag_set;
320ae51fe   Jens Axboe   blk-mq: new multi...
2349
2350
  
  	queue_for_each_hw_ctx(q, hctx, i) {
e4043dcf3   Jens Axboe   blk-mq: ensure th...
2351
  		cpumask_clear(hctx->cpumask);
320ae51fe   Jens Axboe   blk-mq: new multi...
2352
  		hctx->nr_ctx = 0;
d416c92c5   huhai   blk-mq: clear hct...
2353
  		hctx->dispatch_from = NULL;
320ae51fe   Jens Axboe   blk-mq: new multi...
2354
2355
2356
  	}
  
  	/*
4b855ad37   Christoph Hellwig   blk-mq: Create hc...
2357
  	 * Map software to hardware queues.
4412efecf   Ming Lei   Revert "blk-mq: r...
2358
2359
  	 *
  	 * If the cpu isn't present, the cpu is mapped to first hctx.
320ae51fe   Jens Axboe   blk-mq: new multi...
2360
  	 */
20e4d8139   Christoph Hellwig   blk-mq: simplify ...
2361
  	for_each_possible_cpu(i) {
4412efecf   Ming Lei   Revert "blk-mq: r...
2362

897bb0c7f   Thomas Gleixner   blk-mq: Use prope...
2363
  		ctx = per_cpu_ptr(q->queue_ctx, i);
b3c661b15   Jens Axboe   blk-mq: support m...
2364
  		for (j = 0; j < set->nr_maps; j++) {
bb94aea14   Jianchao Wang   blk-mq: save defa...
2365
2366
2367
  			if (!set->map[j].nr_queues) {
  				ctx->hctxs[j] = blk_mq_map_queue_type(q,
  						HCTX_TYPE_DEFAULT, i);
e5edd5f29   Ming Lei   blk-mq: skip zero...
2368
  				continue;
bb94aea14   Jianchao Wang   blk-mq: save defa...
2369
  			}
201219691   Ming Lei   block: alloc map ...
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
  			hctx_idx = set->map[j].mq_map[i];
  			/* unmapped hw queue can be remapped after CPU topo changed */
  			if (!set->tags[hctx_idx] &&
  			    !__blk_mq_alloc_rq_map(set, hctx_idx)) {
  				/*
  				 * If tags initialization fail for some hctx,
  				 * that hctx won't be brought online.  In this
  				 * case, remap the current ctx to hctx[0] which
  				 * is guaranteed to always have tags allocated
  				 */
  				set->map[j].mq_map[i] = 0;
  			}
e5edd5f29   Ming Lei   blk-mq: skip zero...
2382

b3c661b15   Jens Axboe   blk-mq: support m...
2383
  			hctx = blk_mq_map_queue_type(q, j, i);
8ccdf4a37   Jianchao Wang   blk-mq: save queu...
2384
  			ctx->hctxs[j] = hctx;
b3c661b15   Jens Axboe   blk-mq: support m...
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
  			/*
  			 * If the CPU is already set in the mask, then we've
  			 * mapped this one already. This can happen if
  			 * devices share queues across queue maps.
  			 */
  			if (cpumask_test_cpu(i, hctx->cpumask))
  				continue;
  
  			cpumask_set_cpu(i, hctx->cpumask);
  			hctx->type = j;
  			ctx->index_hw[hctx->type] = hctx->nr_ctx;
  			hctx->ctxs[hctx->nr_ctx++] = ctx;
  
  			/*
  			 * If the nr_ctx type overflows, we have exceeded the
  			 * amount of sw queues we can support.
  			 */
  			BUG_ON(!hctx->nr_ctx);
  		}
bb94aea14   Jianchao Wang   blk-mq: save defa...
2404
2405
2406
2407
  
  		for (; j < HCTX_MAX_TYPES; j++)
  			ctx->hctxs[j] = blk_mq_map_queue_type(q,
  					HCTX_TYPE_DEFAULT, i);
320ae51fe   Jens Axboe   blk-mq: new multi...
2408
  	}
506e931f9   Jens Axboe   blk-mq: add basic...
2409
2410
  
  	queue_for_each_hw_ctx(q, hctx, i) {
4412efecf   Ming Lei   Revert "blk-mq: r...
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
  		/*
  		 * If no software queues are mapped to this hardware queue,
  		 * disable it and free the request entries.
  		 */
  		if (!hctx->nr_ctx) {
  			/* Never unmap queue 0.  We need it as a
  			 * fallback in case of a new remap fails
  			 * allocation
  			 */
  			if (i && set->tags[i])
  				blk_mq_free_map_and_requests(set, i);
  
  			hctx->tags = NULL;
  			continue;
  		}
484b4061e   Jens Axboe   blk-mq: save memo...
2426

2a34c0872   Ming Lei   blk-mq: fix CPU h...
2427
2428
  		hctx->tags = set->tags[i];
  		WARN_ON(!hctx->tags);
484b4061e   Jens Axboe   blk-mq: save memo...
2429
  		/*
889fa31f0   Chong Yuan   blk-mq: reduce un...
2430
2431
2432
2433
  		 * Set the map size to the number of mapped software queues.
  		 * This is more accurate and more efficient than looping
  		 * over all possibly mapped software queues.
  		 */
88459642c   Omar Sandoval   blk-mq: abstract ...
2434
  		sbitmap_resize(&hctx->ctx_map, hctx->nr_ctx);
889fa31f0   Chong Yuan   blk-mq: reduce un...
2435
2436
  
  		/*
484b4061e   Jens Axboe   blk-mq: save memo...
2437
2438
  		 * Initialize batch roundrobin counts
  		 */
f82ddf192   Ming Lei   blk-mq: introduce...
2439
  		hctx->next_cpu = blk_mq_first_mapped_cpu(hctx);
506e931f9   Jens Axboe   blk-mq: add basic...
2440
2441
  		hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
2442
  }
8e8320c93   Jens Axboe   blk-mq: fix perfo...
2443
2444
2445
2446
  /*
   * Caller needs to ensure that we're either frozen/quiesced, or that
   * the queue isn't live yet.
   */
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2447
  static void queue_set_hctx_shared(struct request_queue *q, bool shared)
0d2602ca3   Jens Axboe   blk-mq: improve s...
2448
2449
  {
  	struct blk_mq_hw_ctx *hctx;
0d2602ca3   Jens Axboe   blk-mq: improve s...
2450
  	int i;
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2451
  	queue_for_each_hw_ctx(q, hctx, i) {
97889f9ac   Ming Lei   blk-mq: remove sy...
2452
  		if (shared)
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2453
  			hctx->flags |= BLK_MQ_F_TAG_SHARED;
97889f9ac   Ming Lei   blk-mq: remove sy...
2454
  		else
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2455
2456
2457
  			hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
  	}
  }
8e8320c93   Jens Axboe   blk-mq: fix perfo...
2458
2459
  static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set,
  					bool shared)
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2460
2461
  {
  	struct request_queue *q;
0d2602ca3   Jens Axboe   blk-mq: improve s...
2462

705cda97e   Bart Van Assche   blk-mq: Make it s...
2463
  	lockdep_assert_held(&set->tag_list_lock);
0d2602ca3   Jens Axboe   blk-mq: improve s...
2464
2465
  	list_for_each_entry(q, &set->tag_list, tag_set_list) {
  		blk_mq_freeze_queue(q);
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2466
  		queue_set_hctx_shared(q, shared);
0d2602ca3   Jens Axboe   blk-mq: improve s...
2467
2468
2469
2470
2471
2472
2473
  		blk_mq_unfreeze_queue(q);
  	}
  }
  
  static void blk_mq_del_queue_tag_set(struct request_queue *q)
  {
  	struct blk_mq_tag_set *set = q->tag_set;
0d2602ca3   Jens Axboe   blk-mq: improve s...
2474
  	mutex_lock(&set->tag_list_lock);
705cda97e   Bart Van Assche   blk-mq: Make it s...
2475
  	list_del_rcu(&q->tag_set_list);
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2476
2477
2478
2479
2480
2481
  	if (list_is_singular(&set->tag_list)) {
  		/* just transitioned to unshared */
  		set->flags &= ~BLK_MQ_F_TAG_SHARED;
  		/* update existing queue */
  		blk_mq_update_tag_set_depth(set, false);
  	}
0d2602ca3   Jens Axboe   blk-mq: improve s...
2482
  	mutex_unlock(&set->tag_list_lock);
a347c7ad8   Roman Pen   blk-mq: reinit q-...
2483
  	INIT_LIST_HEAD(&q->tag_set_list);
0d2602ca3   Jens Axboe   blk-mq: improve s...
2484
2485
2486
2487
2488
  }
  
  static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
  				     struct request_queue *q)
  {
0d2602ca3   Jens Axboe   blk-mq: improve s...
2489
  	mutex_lock(&set->tag_list_lock);
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2490

ff821d271   Jens Axboe   blk-mq: fixup som...
2491
2492
2493
2494
2495
  	/*
  	 * Check to see if we're transitioning to shared (from 1 to 2 queues).
  	 */
  	if (!list_empty(&set->tag_list) &&
  	    !(set->flags & BLK_MQ_F_TAG_SHARED)) {
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2496
2497
2498
2499
2500
2501
  		set->flags |= BLK_MQ_F_TAG_SHARED;
  		/* update existing queue */
  		blk_mq_update_tag_set_depth(set, true);
  	}
  	if (set->flags & BLK_MQ_F_TAG_SHARED)
  		queue_set_hctx_shared(q, true);
705cda97e   Bart Van Assche   blk-mq: Make it s...
2502
  	list_add_tail_rcu(&q->tag_set_list, &set->tag_list);
2404e607a   Jeff Moyer   blk-mq: avoid exc...
2503

0d2602ca3   Jens Axboe   blk-mq: improve s...
2504
2505
  	mutex_unlock(&set->tag_list_lock);
  }
1db4909e7   Ming Lei   blk-mq: not embed...
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
  /* All allocations will be freed in release handler of q->mq_kobj */
  static int blk_mq_alloc_ctxs(struct request_queue *q)
  {
  	struct blk_mq_ctxs *ctxs;
  	int cpu;
  
  	ctxs = kzalloc(sizeof(*ctxs), GFP_KERNEL);
  	if (!ctxs)
  		return -ENOMEM;
  
  	ctxs->queue_ctx = alloc_percpu(struct blk_mq_ctx);
  	if (!ctxs->queue_ctx)
  		goto fail;
  
  	for_each_possible_cpu(cpu) {
  		struct blk_mq_ctx *ctx = per_cpu_ptr(ctxs->queue_ctx, cpu);
  		ctx->ctxs = ctxs;
  	}
  
  	q->mq_kobj = &ctxs->kobj;
  	q->queue_ctx = ctxs->queue_ctx;
  
  	return 0;
   fail:
  	kfree(ctxs);
  	return -ENOMEM;
  }
e09aae7ed   Ming Lei   blk-mq: release m...
2533
2534
2535
2536
2537
2538
2539
2540
  /*
   * It is the actual release handler for mq, but we do it from
   * request queue's release handler for avoiding use-after-free
   * and headache because q->mq_kobj shouldn't have been introduced,
   * but we can't group ctx/kctx kobj without it.
   */
  void blk_mq_release(struct request_queue *q)
  {
2f8f1336a   Ming Lei   blk-mq: always fr...
2541
2542
  	struct blk_mq_hw_ctx *hctx, *next;
  	int i;
e09aae7ed   Ming Lei   blk-mq: release m...
2543

2f8f1336a   Ming Lei   blk-mq: always fr...
2544
2545
2546
2547
2548
2549
  	queue_for_each_hw_ctx(q, hctx, i)
  		WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list));
  
  	/* all hctx are in .unused_hctx_list now */
  	list_for_each_entry_safe(hctx, next, &q->unused_hctx_list, hctx_list) {
  		list_del_init(&hctx->hctx_list);
6c8b232ef   Ming Lei   blk-mq: make life...
2550
  		kobject_put(&hctx->kobj);
c3b4afca7   Ming Lei   blk-mq: free hctx...
2551
  	}
e09aae7ed   Ming Lei   blk-mq: release m...
2552
2553
  
  	kfree(q->queue_hw_ctx);
7ea5fe31c   Ming Lei   blk-mq: make life...
2554
2555
2556
2557
2558
  	/*
  	 * release .mq_kobj and sw queue's kobject now because
  	 * both share lifetime with request queue.
  	 */
  	blk_mq_sysfs_deinit(q);
e09aae7ed   Ming Lei   blk-mq: release m...
2559
  }
24d2f9030   Christoph Hellwig   blk-mq: split out...
2560
  struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
320ae51fe   Jens Axboe   blk-mq: new multi...
2561
  {
b62c21b71   Mike Snitzer   blk-mq: add blk_m...
2562
  	struct request_queue *uninit_q, *q;
6d4696423   Christoph Hellwig   block: remove the...
2563
  	uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node);
b62c21b71   Mike Snitzer   blk-mq: add blk_m...
2564
2565
  	if (!uninit_q)
  		return ERR_PTR(-ENOMEM);
737eb78e8   Damien Le Moal   block: Delay defa...
2566
2567
2568
2569
2570
  	/*
  	 * Initialize the queue without an elevator. device_add_disk() will do
  	 * the initialization.
  	 */
  	q = blk_mq_init_allocated_queue(set, uninit_q, false);
b62c21b71   Mike Snitzer   blk-mq: add blk_m...
2571
2572
2573
2574
2575
2576
  	if (IS_ERR(q))
  		blk_cleanup_queue(uninit_q);
  
  	return q;
  }
  EXPORT_SYMBOL(blk_mq_init_queue);
9316a9ed6   Jens Axboe   blk-mq: provide h...
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
  /*
   * Helper for setting up a queue with mq ops, given queue depth, and
   * the passed in mq ops flags.
   */
  struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set,
  					   const struct blk_mq_ops *ops,
  					   unsigned int queue_depth,
  					   unsigned int set_flags)
  {
  	struct request_queue *q;
  	int ret;
  
  	memset(set, 0, sizeof(*set));
  	set->ops = ops;
  	set->nr_hw_queues = 1;
b3c661b15   Jens Axboe   blk-mq: support m...
2592
  	set->nr_maps = 1;
9316a9ed6   Jens Axboe   blk-mq: provide h...
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
  	set->queue_depth = queue_depth;
  	set->numa_node = NUMA_NO_NODE;
  	set->flags = set_flags;
  
  	ret = blk_mq_alloc_tag_set(set);
  	if (ret)
  		return ERR_PTR(ret);
  
  	q = blk_mq_init_queue(set);
  	if (IS_ERR(q)) {
  		blk_mq_free_tag_set(set);
  		return q;
  	}
  
  	return q;
  }
  EXPORT_SYMBOL(blk_mq_init_sq_queue);
34d11ffac   Jianchao Wang   blk-mq: realloc h...
2610
2611
2612
2613
  static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
  		struct blk_mq_tag_set *set, struct request_queue *q,
  		int hctx_idx, int node)
  {
2f8f1336a   Ming Lei   blk-mq: always fr...
2614
  	struct blk_mq_hw_ctx *hctx = NULL, *tmp;
34d11ffac   Jianchao Wang   blk-mq: realloc h...
2615

2f8f1336a   Ming Lei   blk-mq: always fr...
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
  	/* reuse dead hctx first */
  	spin_lock(&q->unused_hctx_lock);
  	list_for_each_entry(tmp, &q->unused_hctx_list, hctx_list) {
  		if (tmp->numa_node == node) {
  			hctx = tmp;
  			break;
  		}
  	}
  	if (hctx)
  		list_del_init(&hctx->hctx_list);
  	spin_unlock(&q->unused_hctx_lock);
  
  	if (!hctx)
  		hctx = blk_mq_alloc_hctx(q, set, node);
34d11ffac   Jianchao Wang   blk-mq: realloc h...
2630
  	if (!hctx)
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2631
  		goto fail;
34d11ffac   Jianchao Wang   blk-mq: realloc h...
2632

7c6c5b7c9   Ming Lei   blk-mq: split blk...
2633
2634
  	if (blk_mq_init_hctx(q, set, hctx, hctx_idx))
  		goto free_hctx;
34d11ffac   Jianchao Wang   blk-mq: realloc h...
2635
2636
  
  	return hctx;
7c6c5b7c9   Ming Lei   blk-mq: split blk...
2637
2638
2639
2640
2641
  
   free_hctx:
  	kobject_put(&hctx->kobj);
   fail:
  	return NULL;
34d11ffac   Jianchao Wang   blk-mq: realloc h...
2642
  }
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2643
2644
  static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
  						struct request_queue *q)
b62c21b71   Mike Snitzer   blk-mq: add blk_m...
2645
  {
e01ad46d5   Jianchao Wang   blk-mq: fallback ...
2646
  	int i, j, end;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2647
  	struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
f14bbe77a   Jens Axboe   blk-mq: pass in s...
2648

fb350e0ad   Ming Lei   blk-mq: fix race ...
2649
2650
  	/* protect against switching io scheduler  */
  	mutex_lock(&q->sysfs_lock);
24d2f9030   Christoph Hellwig   blk-mq: split out...
2651
  	for (i = 0; i < set->nr_hw_queues; i++) {
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2652
  		int node;
34d11ffac   Jianchao Wang   blk-mq: realloc h...
2653
  		struct blk_mq_hw_ctx *hctx;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2654

7d76f8562   Dongli Zhang   blk-mq: use HCTX_...
2655
  		node = blk_mq_hw_queue_to_node(&set->map[HCTX_TYPE_DEFAULT], i);
34d11ffac   Jianchao Wang   blk-mq: realloc h...
2656
2657
2658
2659
2660
2661
2662
  		/*
  		 * If the hw queue has been mapped to another numa node,
  		 * we need to realloc the hctx. If allocation fails, fallback
  		 * to use the previous one.
  		 */
  		if (hctxs[i] && (hctxs[i]->numa_node == node))
  			continue;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2663

34d11ffac   Jianchao Wang   blk-mq: realloc h...
2664
2665
  		hctx = blk_mq_alloc_and_init_hctx(set, q, i, node);
  		if (hctx) {
2f8f1336a   Ming Lei   blk-mq: always fr...
2666
  			if (hctxs[i])
34d11ffac   Jianchao Wang   blk-mq: realloc h...
2667
  				blk_mq_exit_hctx(q, set, hctxs[i], i);
34d11ffac   Jianchao Wang   blk-mq: realloc h...
2668
2669
2670
2671
2672
2673
2674
2675
2676
  			hctxs[i] = hctx;
  		} else {
  			if (hctxs[i])
  				pr_warn("Allocate new hctx on node %d fails,\
  						fallback to previous one on node %d
  ",
  						node, hctxs[i]->numa_node);
  			else
  				break;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2677
  		}
320ae51fe   Jens Axboe   blk-mq: new multi...
2678
  	}
e01ad46d5   Jianchao Wang   blk-mq: fallback ...
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
  	/*
  	 * Increasing nr_hw_queues fails. Free the newly allocated
  	 * hctxs and keep the previous q->nr_hw_queues.
  	 */
  	if (i != set->nr_hw_queues) {
  		j = q->nr_hw_queues;
  		end = i;
  	} else {
  		j = i;
  		end = q->nr_hw_queues;
  		q->nr_hw_queues = set->nr_hw_queues;
  	}
34d11ffac   Jianchao Wang   blk-mq: realloc h...
2691

e01ad46d5   Jianchao Wang   blk-mq: fallback ...
2692
  	for (; j < end; j++) {
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2693
2694
2695
  		struct blk_mq_hw_ctx *hctx = hctxs[j];
  
  		if (hctx) {
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2696
2697
  			if (hctx->tags)
  				blk_mq_free_map_and_requests(set, j);
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2698
  			blk_mq_exit_hctx(q, set, hctx, j);
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2699
  			hctxs[j] = NULL;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2700
2701
  		}
  	}
fb350e0ad   Ming Lei   blk-mq: fix race ...
2702
  	mutex_unlock(&q->sysfs_lock);
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2703
  }
392546aed   Jens Axboe   blk-mq: separate ...
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
  /*
   * Maximum number of hardware queues we support. For single sets, we'll never
   * have more than the CPUs (software queues). For multiple sets, the tag_set
   * user may have set ->nr_hw_queues larger.
   */
  static unsigned int nr_hw_queues(struct blk_mq_tag_set *set)
  {
  	if (set->nr_maps == 1)
  		return nr_cpu_ids;
  
  	return max(set->nr_hw_queues, nr_cpu_ids);
  }
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2716
  struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
737eb78e8   Damien Le Moal   block: Delay defa...
2717
2718
  						  struct request_queue *q,
  						  bool elevator_init)
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2719
  {
668416721   Ming Lei   blk-mq: mark requ...
2720
2721
  	/* mark the queue as mq asap */
  	q->mq_ops = set->ops;
34dbad5d2   Omar Sandoval   blk-stat: convert...
2722
  	q->poll_cb = blk_stat_alloc_callback(blk_mq_poll_stats_fn,
720b8ccc4   Stephen Bates   blk-mq: Add a pol...
2723
2724
  					     blk_mq_poll_stats_bkt,
  					     BLK_MQ_POLL_STATS_BKTS, q);
34dbad5d2   Omar Sandoval   blk-stat: convert...
2725
2726
  	if (!q->poll_cb)
  		goto err_exit;
1db4909e7   Ming Lei   blk-mq: not embed...
2727
  	if (blk_mq_alloc_ctxs(q))
41de54c64   Jes Sorensen   blk-mq: Fix memor...
2728
  		goto err_poll;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2729

737f98cfe   Ming Lei   blk-mq: initializ...
2730
2731
  	/* init q->mq_kobj and sw queues' kobjects */
  	blk_mq_sysfs_init(q);
392546aed   Jens Axboe   blk-mq: separate ...
2732
2733
  	q->nr_queues = nr_hw_queues(set);
  	q->queue_hw_ctx = kcalloc_node(q->nr_queues, sizeof(*(q->queue_hw_ctx)),
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2734
2735
  						GFP_KERNEL, set->numa_node);
  	if (!q->queue_hw_ctx)
1db4909e7   Ming Lei   blk-mq: not embed...
2736
  		goto err_sys_init;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2737

2f8f1336a   Ming Lei   blk-mq: always fr...
2738
2739
  	INIT_LIST_HEAD(&q->unused_hctx_list);
  	spin_lock_init(&q->unused_hctx_lock);
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2740
2741
2742
  	blk_mq_realloc_hw_ctxs(set, q);
  	if (!q->nr_hw_queues)
  		goto err_hctxs;
320ae51fe   Jens Axboe   blk-mq: new multi...
2743

287922eb0   Christoph Hellwig   block: defer time...
2744
  	INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
e56f698bd   Ming Lei   blk-mq: set defau...
2745
  	blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
320ae51fe   Jens Axboe   blk-mq: new multi...
2746

a8908939a   Jens Axboe   blk-mq: kill q->m...
2747
  	q->tag_set = set;
320ae51fe   Jens Axboe   blk-mq: new multi...
2748

94eddfbea   Jens Axboe   blk-mq: ensure th...
2749
  	q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
cd19181bf   Ming Lei   blk-mq: enable IO...
2750
2751
  	if (set->nr_maps > HCTX_TYPE_POLL &&
  	    set->map[HCTX_TYPE_POLL].nr_queues)
6544d229b   Christoph Hellwig   block: enable pol...
2752
  		blk_queue_flag_set(QUEUE_FLAG_POLL, q);
320ae51fe   Jens Axboe   blk-mq: new multi...
2753

1be036e94   Christoph Hellwig   blk-mq: initializ...
2754
  	q->sg_reserved_size = INT_MAX;
2849450ad   Mike Snitzer   blk-mq: introduce...
2755
  	INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
2756
2757
  	INIT_LIST_HEAD(&q->requeue_list);
  	spin_lock_init(&q->requeue_lock);
254d259da   Christoph Hellwig   blk-mq: merge mq ...
2758
  	blk_queue_make_request(q, blk_mq_make_request);
07068d5b8   Jens Axboe   blk-mq: split mak...
2759

eba717682   Jens Axboe   blk-mq: initializ...
2760
2761
2762
2763
  	/*
  	 * Do this after blk_queue_make_request() overrides it...
  	 */
  	q->nr_requests = set->queue_depth;
64f1c21e8   Jens Axboe   blk-mq: make the ...
2764
2765
2766
  	/*
  	 * Default to classic polling
  	 */
29ece8b43   Yufen Yu   block: add BLK_MQ...
2767
  	q->poll_nsec = BLK_MQ_POLL_CLASSIC;
64f1c21e8   Jens Axboe   blk-mq: make the ...
2768

24d2f9030   Christoph Hellwig   blk-mq: split out...
2769
  	blk_mq_init_cpu_queues(q, set->nr_hw_queues);
0d2602ca3   Jens Axboe   blk-mq: improve s...
2770
  	blk_mq_add_queue_tag_set(set, q);
4b855ad37   Christoph Hellwig   blk-mq: Create hc...
2771
  	blk_mq_map_swqueue(q);
4593fdbe7   Akinobu Mita   blk-mq: fix sysfs...
2772

737eb78e8   Damien Le Moal   block: Delay defa...
2773
2774
  	if (elevator_init)
  		elevator_init_mq(q);
d34849913   Jens Axboe   blk-mq-sched: all...
2775

320ae51fe   Jens Axboe   blk-mq: new multi...
2776
  	return q;
18741986a   Christoph Hellwig   blk-mq: rework fl...
2777

320ae51fe   Jens Axboe   blk-mq: new multi...
2778
  err_hctxs:
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2779
  	kfree(q->queue_hw_ctx);
73d9c8d4c   zhengbin   blk-mq: Fix memor...
2780
  	q->nr_hw_queues = 0;
1db4909e7   Ming Lei   blk-mq: not embed...
2781
2782
  err_sys_init:
  	blk_mq_sysfs_deinit(q);
41de54c64   Jes Sorensen   blk-mq: Fix memor...
2783
2784
2785
  err_poll:
  	blk_stat_free_callback(q->poll_cb);
  	q->poll_cb = NULL;
c7de57263   Ming Lin   blk-mq: clear q->...
2786
2787
  err_exit:
  	q->mq_ops = NULL;
320ae51fe   Jens Axboe   blk-mq: new multi...
2788
2789
  	return ERR_PTR(-ENOMEM);
  }
b62c21b71   Mike Snitzer   blk-mq: add blk_m...
2790
  EXPORT_SYMBOL(blk_mq_init_allocated_queue);
320ae51fe   Jens Axboe   blk-mq: new multi...
2791

c7e2d94b3   Ming Lei   blk-mq: free hw q...
2792
2793
  /* tags can _not_ be used after returning from blk_mq_exit_queue */
  void blk_mq_exit_queue(struct request_queue *q)
320ae51fe   Jens Axboe   blk-mq: new multi...
2794
  {
624dbe475   Ming Lei   blk-mq: avoid cod...
2795
  	struct blk_mq_tag_set	*set = q->tag_set;
320ae51fe   Jens Axboe   blk-mq: new multi...
2796

0d2602ca3   Jens Axboe   blk-mq: improve s...
2797
  	blk_mq_del_queue_tag_set(q);
624dbe475   Ming Lei   blk-mq: avoid cod...
2798
  	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
320ae51fe   Jens Axboe   blk-mq: new multi...
2799
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
2800

a51644054   Jens Axboe   blk-mq: scale dep...
2801
2802
2803
  static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
  {
  	int i;
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2804
2805
  	for (i = 0; i < set->nr_hw_queues; i++)
  		if (!__blk_mq_alloc_rq_map(set, i))
a51644054   Jens Axboe   blk-mq: scale dep...
2806
  			goto out_unwind;
a51644054   Jens Axboe   blk-mq: scale dep...
2807
2808
2809
2810
2811
  
  	return 0;
  
  out_unwind:
  	while (--i >= 0)
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2812
  		blk_mq_free_rq_map(set->tags[i]);
a51644054   Jens Axboe   blk-mq: scale dep...
2813

a51644054   Jens Axboe   blk-mq: scale dep...
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
  	return -ENOMEM;
  }
  
  /*
   * Allocate the request maps associated with this tag_set. Note that this
   * may reduce the depth asked for, if memory is tight. set->queue_depth
   * will be updated to reflect the allocated depth.
   */
  static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
  {
  	unsigned int depth;
  	int err;
  
  	depth = set->queue_depth;
  	do {
  		err = __blk_mq_alloc_rq_maps(set);
  		if (!err)
  			break;
  
  		set->queue_depth >>= 1;
  		if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) {
  			err = -ENOMEM;
  			break;
  		}
  	} while (set->queue_depth);
  
  	if (!set->queue_depth || err) {
  		pr_err("blk-mq: failed to allocate request map
  ");
  		return -ENOMEM;
  	}
  
  	if (depth != set->queue_depth)
  		pr_info("blk-mq: reduced tag depth (%u -> %u)
  ",
  						depth, set->queue_depth);
  
  	return 0;
  }
ebe8bddb6   Omar Sandoval   blk-mq: remap que...
2853
2854
  static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
  {
d4083258d   Bart Van Assche   blk-mq: Keep set-...
2855
2856
2857
2858
2859
2860
2861
  	/*
  	 * blk_mq_map_queues() and multiple .map_queues() implementations
  	 * expect that set->map[HCTX_TYPE_DEFAULT].nr_queues is set to the
  	 * number of hardware queues.
  	 */
  	if (set->nr_maps == 1)
  		set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues;
593887024   Ming Lei   blk-mq: re-build ...
2862
  	if (set->ops->map_queues && !is_kdump_kernel()) {
b3c661b15   Jens Axboe   blk-mq: support m...
2863
  		int i;
7d4901a90   Ming Lei   blk-mq: avoid to ...
2864
2865
2866
2867
2868
2869
2870
  		/*
  		 * transport .map_queues is usually done in the following
  		 * way:
  		 *
  		 * for (queue = 0; queue < set->nr_hw_queues; queue++) {
  		 * 	mask = get_cpu_mask(queue)
  		 * 	for_each_cpu(cpu, mask)
b3c661b15   Jens Axboe   blk-mq: support m...
2871
  		 * 		set->map[x].mq_map[cpu] = queue;
7d4901a90   Ming Lei   blk-mq: avoid to ...
2872
2873
2874
2875
2876
2877
  		 * }
  		 *
  		 * When we need to remap, the table has to be cleared for
  		 * killing stale mapping since one CPU may not be mapped
  		 * to any hw queue.
  		 */
b3c661b15   Jens Axboe   blk-mq: support m...
2878
2879
  		for (i = 0; i < set->nr_maps; i++)
  			blk_mq_clear_mq_map(&set->map[i]);
7d4901a90   Ming Lei   blk-mq: avoid to ...
2880

ebe8bddb6   Omar Sandoval   blk-mq: remap que...
2881
  		return set->ops->map_queues(set);
b3c661b15   Jens Axboe   blk-mq: support m...
2882
2883
  	} else {
  		BUG_ON(set->nr_maps > 1);
7d76f8562   Dongli Zhang   blk-mq: use HCTX_...
2884
  		return blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
b3c661b15   Jens Axboe   blk-mq: support m...
2885
  	}
ebe8bddb6   Omar Sandoval   blk-mq: remap que...
2886
  }
a4391c646   Jens Axboe   blk-mq: bump max ...
2887
2888
2889
  /*
   * Alloc a tag set to be associated with one or more request queues.
   * May fail with EINVAL for various error conditions. May adjust the
c018c84fd   Minwoo Im   blk-mq: fix typo ...
2890
   * requested depth down, if it's too large. In that case, the set
a4391c646   Jens Axboe   blk-mq: bump max ...
2891
2892
   * value will be stored in set->queue_depth.
   */
24d2f9030   Christoph Hellwig   blk-mq: split out...
2893
2894
  int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
  {
b3c661b15   Jens Axboe   blk-mq: support m...
2895
  	int i, ret;
da695ba23   Christoph Hellwig   blk-mq: allow the...
2896

205fb5f5b   Bart Van Assche   blk-mq: add blk_m...
2897
  	BUILD_BUG_ON(BLK_MQ_MAX_DEPTH > 1 << BLK_MQ_UNIQUE_TAG_BITS);
24d2f9030   Christoph Hellwig   blk-mq: split out...
2898
2899
  	if (!set->nr_hw_queues)
  		return -EINVAL;
a4391c646   Jens Axboe   blk-mq: bump max ...
2900
  	if (!set->queue_depth)
24d2f9030   Christoph Hellwig   blk-mq: split out...
2901
2902
2903
  		return -EINVAL;
  	if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
  		return -EINVAL;
7d7e0f90b   Christoph Hellwig   blk-mq: remove ->...
2904
  	if (!set->ops->queue_rq)
24d2f9030   Christoph Hellwig   blk-mq: split out...
2905
  		return -EINVAL;
de1482974   Ming Lei   blk-mq: introduce...
2906
2907
  	if (!set->ops->get_budget ^ !set->ops->put_budget)
  		return -EINVAL;
a4391c646   Jens Axboe   blk-mq: bump max ...
2908
2909
2910
2911
2912
2913
  	if (set->queue_depth > BLK_MQ_MAX_DEPTH) {
  		pr_info("blk-mq: reduced tag depth to %u
  ",
  			BLK_MQ_MAX_DEPTH);
  		set->queue_depth = BLK_MQ_MAX_DEPTH;
  	}
24d2f9030   Christoph Hellwig   blk-mq: split out...
2914

b3c661b15   Jens Axboe   blk-mq: support m...
2915
2916
2917
2918
  	if (!set->nr_maps)
  		set->nr_maps = 1;
  	else if (set->nr_maps > HCTX_MAX_TYPES)
  		return -EINVAL;
6637fadf2   Shaohua Li   blk-mq: move the ...
2919
2920
2921
2922
2923
2924
2925
  	/*
  	 * If a crashdump is active, then we are potentially in a very
  	 * memory constrained environment. Limit us to 1 queue and
  	 * 64 tags to prevent using too much memory.
  	 */
  	if (is_kdump_kernel()) {
  		set->nr_hw_queues = 1;
593887024   Ming Lei   blk-mq: re-build ...
2926
  		set->nr_maps = 1;
6637fadf2   Shaohua Li   blk-mq: move the ...
2927
2928
  		set->queue_depth = min(64U, set->queue_depth);
  	}
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2929
  	/*
392546aed   Jens Axboe   blk-mq: separate ...
2930
2931
  	 * There is no use for more h/w queues than cpus if we just have
  	 * a single map
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2932
  	 */
392546aed   Jens Axboe   blk-mq: separate ...
2933
  	if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids)
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2934
  		set->nr_hw_queues = nr_cpu_ids;
6637fadf2   Shaohua Li   blk-mq: move the ...
2935

392546aed   Jens Axboe   blk-mq: separate ...
2936
  	set->tags = kcalloc_node(nr_hw_queues(set), sizeof(struct blk_mq_tags *),
24d2f9030   Christoph Hellwig   blk-mq: split out...
2937
2938
  				 GFP_KERNEL, set->numa_node);
  	if (!set->tags)
a51644054   Jens Axboe   blk-mq: scale dep...
2939
  		return -ENOMEM;
24d2f9030   Christoph Hellwig   blk-mq: split out...
2940

da695ba23   Christoph Hellwig   blk-mq: allow the...
2941
  	ret = -ENOMEM;
b3c661b15   Jens Axboe   blk-mq: support m...
2942
2943
  	for (i = 0; i < set->nr_maps; i++) {
  		set->map[i].mq_map = kcalloc_node(nr_cpu_ids,
07b35eb5a   Ming Lei   blk-mq: fix alloc...
2944
  						  sizeof(set->map[i].mq_map[0]),
b3c661b15   Jens Axboe   blk-mq: support m...
2945
2946
2947
  						  GFP_KERNEL, set->numa_node);
  		if (!set->map[i].mq_map)
  			goto out_free_mq_map;
593887024   Ming Lei   blk-mq: re-build ...
2948
  		set->map[i].nr_queues = is_kdump_kernel() ? 1 : set->nr_hw_queues;
b3c661b15   Jens Axboe   blk-mq: support m...
2949
  	}
bdd17e75c   Christoph Hellwig   blk-mq: only allo...
2950

ebe8bddb6   Omar Sandoval   blk-mq: remap que...
2951
  	ret = blk_mq_update_queue_map(set);
da695ba23   Christoph Hellwig   blk-mq: allow the...
2952
2953
2954
2955
2956
  	if (ret)
  		goto out_free_mq_map;
  
  	ret = blk_mq_alloc_rq_maps(set);
  	if (ret)
bdd17e75c   Christoph Hellwig   blk-mq: only allo...
2957
  		goto out_free_mq_map;
24d2f9030   Christoph Hellwig   blk-mq: split out...
2958

0d2602ca3   Jens Axboe   blk-mq: improve s...
2959
2960
  	mutex_init(&set->tag_list_lock);
  	INIT_LIST_HEAD(&set->tag_list);
24d2f9030   Christoph Hellwig   blk-mq: split out...
2961
  	return 0;
bdd17e75c   Christoph Hellwig   blk-mq: only allo...
2962
2963
  
  out_free_mq_map:
b3c661b15   Jens Axboe   blk-mq: support m...
2964
2965
2966
2967
  	for (i = 0; i < set->nr_maps; i++) {
  		kfree(set->map[i].mq_map);
  		set->map[i].mq_map = NULL;
  	}
5676e7b6d   Robert Elliott   blk-mq: cleanup a...
2968
2969
  	kfree(set->tags);
  	set->tags = NULL;
da695ba23   Christoph Hellwig   blk-mq: allow the...
2970
  	return ret;
24d2f9030   Christoph Hellwig   blk-mq: split out...
2971
2972
2973
2974
2975
  }
  EXPORT_SYMBOL(blk_mq_alloc_tag_set);
  
  void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
  {
b3c661b15   Jens Axboe   blk-mq: support m...
2976
  	int i, j;
24d2f9030   Christoph Hellwig   blk-mq: split out...
2977

392546aed   Jens Axboe   blk-mq: separate ...
2978
  	for (i = 0; i < nr_hw_queues(set); i++)
cc71a6f43   Jens Axboe   blk-mq: abstract ...
2979
  		blk_mq_free_map_and_requests(set, i);
484b4061e   Jens Axboe   blk-mq: save memo...
2980

b3c661b15   Jens Axboe   blk-mq: support m...
2981
2982
2983
2984
  	for (j = 0; j < set->nr_maps; j++) {
  		kfree(set->map[j].mq_map);
  		set->map[j].mq_map = NULL;
  	}
bdd17e75c   Christoph Hellwig   blk-mq: only allo...
2985

981bd189f   Ming Lei   blk-mq: fix leak ...
2986
  	kfree(set->tags);
5676e7b6d   Robert Elliott   blk-mq: cleanup a...
2987
  	set->tags = NULL;
24d2f9030   Christoph Hellwig   blk-mq: split out...
2988
2989
  }
  EXPORT_SYMBOL(blk_mq_free_tag_set);
e3a2b3f93   Jens Axboe   blk-mq: allow cha...
2990
2991
2992
2993
2994
  int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
  {
  	struct blk_mq_tag_set *set = q->tag_set;
  	struct blk_mq_hw_ctx *hctx;
  	int i, ret;
bd166ef18   Jens Axboe   blk-mq-sched: add...
2995
  	if (!set)
e3a2b3f93   Jens Axboe   blk-mq: allow cha...
2996
  		return -EINVAL;
e5fa81408   Aleksei Zakharov   block: avoid sett...
2997
2998
  	if (q->nr_requests == nr)
  		return 0;
70f36b600   Jens Axboe   blk-mq: allow res...
2999
  	blk_mq_freeze_queue(q);
24f5a90f0   Ming Lei   blk-mq: quiesce q...
3000
  	blk_mq_quiesce_queue(q);
70f36b600   Jens Axboe   blk-mq: allow res...
3001

e3a2b3f93   Jens Axboe   blk-mq: allow cha...
3002
3003
  	ret = 0;
  	queue_for_each_hw_ctx(q, hctx, i) {
e9137d4b9   Keith Busch   blk-mq: Fix NULL ...
3004
3005
  		if (!hctx->tags)
  			continue;
bd166ef18   Jens Axboe   blk-mq-sched: add...
3006
3007
3008
3009
  		/*
  		 * If we're using an MQ scheduler, just update the scheduler
  		 * queue depth. This is similar to what the old code would do.
  		 */
70f36b600   Jens Axboe   blk-mq: allow res...
3010
  		if (!hctx->sched_tags) {
c2e82a234   weiping zhang   blk-mq: fix nr_re...
3011
  			ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr,
70f36b600   Jens Axboe   blk-mq: allow res...
3012
3013
3014
3015
3016
  							false);
  		} else {
  			ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags,
  							nr, true);
  		}
e3a2b3f93   Jens Axboe   blk-mq: allow cha...
3017
3018
  		if (ret)
  			break;
77f1e0a52   Jens Axboe   bfq: update inter...
3019
3020
  		if (q->elevator && q->elevator->type->ops.depth_updated)
  			q->elevator->type->ops.depth_updated(hctx);
e3a2b3f93   Jens Axboe   blk-mq: allow cha...
3021
3022
3023
3024
  	}
  
  	if (!ret)
  		q->nr_requests = nr;
24f5a90f0   Ming Lei   blk-mq: quiesce q...
3025
  	blk_mq_unquiesce_queue(q);
70f36b600   Jens Axboe   blk-mq: allow res...
3026
  	blk_mq_unfreeze_queue(q);
70f36b600   Jens Axboe   blk-mq: allow res...
3027

e3a2b3f93   Jens Axboe   blk-mq: allow cha...
3028
3029
  	return ret;
  }
d48ece209   Jianchao Wang   blk-mq: init hctx...
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
  /*
   * request_queue and elevator_type pair.
   * It is just used by __blk_mq_update_nr_hw_queues to cache
   * the elevator_type associated with a request_queue.
   */
  struct blk_mq_qe_pair {
  	struct list_head node;
  	struct request_queue *q;
  	struct elevator_type *type;
  };
  
  /*
   * Cache the elevator_type in qe pair list and switch the
   * io scheduler to 'none'
   */
  static bool blk_mq_elv_switch_none(struct list_head *head,
  		struct request_queue *q)
  {
  	struct blk_mq_qe_pair *qe;
  
  	if (!q->elevator)
  		return true;
  
  	qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
  	if (!qe)
  		return false;
  
  	INIT_LIST_HEAD(&qe->node);
  	qe->q = q;
  	qe->type = q->elevator->type;
  	list_add(&qe->node, head);
  
  	mutex_lock(&q->sysfs_lock);
  	/*
  	 * After elevator_switch_mq, the previous elevator_queue will be
  	 * released by elevator_release. The reference of the io scheduler
  	 * module get by elevator_get will also be put. So we need to get
  	 * a reference of the io scheduler module here to prevent it to be
  	 * removed.
  	 */
  	__module_get(qe->type->elevator_owner);
  	elevator_switch_mq(q, NULL);
  	mutex_unlock(&q->sysfs_lock);
  
  	return true;
  }
  
  static void blk_mq_elv_switch_back(struct list_head *head,
  		struct request_queue *q)
  {
  	struct blk_mq_qe_pair *qe;
  	struct elevator_type *t = NULL;
  
  	list_for_each_entry(qe, head, node)
  		if (qe->q == q) {
  			t = qe->type;
  			break;
  		}
  
  	if (!t)
  		return;
  
  	list_del(&qe->node);
  	kfree(qe);
  
  	mutex_lock(&q->sysfs_lock);
  	elevator_switch_mq(q, t);
  	mutex_unlock(&q->sysfs_lock);
  }
e4dc2b32d   Keith Busch   blk-mq: Take tags...
3099
3100
  static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
  							int nr_hw_queues)
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3101
3102
  {
  	struct request_queue *q;
d48ece209   Jianchao Wang   blk-mq: init hctx...
3103
  	LIST_HEAD(head);
e01ad46d5   Jianchao Wang   blk-mq: fallback ...
3104
  	int prev_nr_hw_queues;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3105

705cda97e   Bart Van Assche   blk-mq: Make it s...
3106
  	lockdep_assert_held(&set->tag_list_lock);
392546aed   Jens Axboe   blk-mq: separate ...
3107
  	if (set->nr_maps == 1 && nr_hw_queues > nr_cpu_ids)
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3108
  		nr_hw_queues = nr_cpu_ids;
26b0956cb   Weiping Zhang   block: update hct...
3109
3110
3111
  	if (nr_hw_queues < 1)
  		return;
  	if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues)
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3112
3113
3114
3115
  		return;
  
  	list_for_each_entry(q, &set->tag_list, tag_set_list)
  		blk_mq_freeze_queue(q);
d48ece209   Jianchao Wang   blk-mq: init hctx...
3116
  	/*
f5bbbbe4d   Jianchao Wang   blk-mq: sync the ...
3117
3118
3119
3120
  	 * Sync with blk_mq_queue_tag_busy_iter.
  	 */
  	synchronize_rcu();
  	/*
d48ece209   Jianchao Wang   blk-mq: init hctx...
3121
3122
3123
3124
3125
3126
3127
  	 * Switch IO scheduler to 'none', cleaning up the data associated
  	 * with the previous scheduler. We will switch back once we are done
  	 * updating the new sw to hw queue mappings.
  	 */
  	list_for_each_entry(q, &set->tag_list, tag_set_list)
  		if (!blk_mq_elv_switch_none(&head, q))
  			goto switch_back;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3128

477e19ded   Jianchao Wang   blk-mq: adjust de...
3129
3130
3131
3132
  	list_for_each_entry(q, &set->tag_list, tag_set_list) {
  		blk_mq_debugfs_unregister_hctxs(q);
  		blk_mq_sysfs_unregister(q);
  	}
e01ad46d5   Jianchao Wang   blk-mq: fallback ...
3133
  	prev_nr_hw_queues = set->nr_hw_queues;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3134
  	set->nr_hw_queues = nr_hw_queues;
e01ad46d5   Jianchao Wang   blk-mq: fallback ...
3135
  fallback:
e7aefaba3   Weiping Zhang   block: reset mapp...
3136
  	blk_mq_update_queue_map(set);
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3137
3138
  	list_for_each_entry(q, &set->tag_list, tag_set_list) {
  		blk_mq_realloc_hw_ctxs(set, q);
e01ad46d5   Jianchao Wang   blk-mq: fallback ...
3139
3140
3141
3142
3143
  		if (q->nr_hw_queues != set->nr_hw_queues) {
  			pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d
  ",
  					nr_hw_queues, prev_nr_hw_queues);
  			set->nr_hw_queues = prev_nr_hw_queues;
7d76f8562   Dongli Zhang   blk-mq: use HCTX_...
3144
  			blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
e01ad46d5   Jianchao Wang   blk-mq: fallback ...
3145
3146
  			goto fallback;
  		}
477e19ded   Jianchao Wang   blk-mq: adjust de...
3147
3148
3149
3150
3151
3152
  		blk_mq_map_swqueue(q);
  	}
  
  	list_for_each_entry(q, &set->tag_list, tag_set_list) {
  		blk_mq_sysfs_register(q);
  		blk_mq_debugfs_register_hctxs(q);
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3153
  	}
d48ece209   Jianchao Wang   blk-mq: init hctx...
3154
3155
3156
  switch_back:
  	list_for_each_entry(q, &set->tag_list, tag_set_list)
  		blk_mq_elv_switch_back(&head, q);
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3157
3158
3159
  	list_for_each_entry(q, &set->tag_list, tag_set_list)
  		blk_mq_unfreeze_queue(q);
  }
e4dc2b32d   Keith Busch   blk-mq: Take tags...
3160
3161
3162
3163
3164
3165
3166
  
  void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
  {
  	mutex_lock(&set->tag_list_lock);
  	__blk_mq_update_nr_hw_queues(set, nr_hw_queues);
  	mutex_unlock(&set->tag_list_lock);
  }
868f2f0b7   Keith Busch   blk-mq: dynamic h...
3167
  EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
34dbad5d2   Omar Sandoval   blk-stat: convert...
3168
3169
3170
3171
  /* Enable polling stats and return whether they were already enabled. */
  static bool blk_poll_stats_enable(struct request_queue *q)
  {
  	if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
7dfdbc736   Bart Van Assche   block: Protect qu...
3172
  	    blk_queue_flag_test_and_set(QUEUE_FLAG_POLL_STATS, q))
34dbad5d2   Omar Sandoval   blk-stat: convert...
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
  		return true;
  	blk_stat_add_callback(q, q->poll_cb);
  	return false;
  }
  
  static void blk_mq_poll_stats_start(struct request_queue *q)
  {
  	/*
  	 * We don't arm the callback if polling stats are not enabled or the
  	 * callback is already active.
  	 */
  	if (!test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
  	    blk_stat_is_active(q->poll_cb))
  		return;
  
  	blk_stat_activate_msecs(q->poll_cb, 100);
  }
  
  static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb)
  {
  	struct request_queue *q = cb->data;
720b8ccc4   Stephen Bates   blk-mq: Add a pol...
3194
  	int bucket;
34dbad5d2   Omar Sandoval   blk-stat: convert...
3195

720b8ccc4   Stephen Bates   blk-mq: Add a pol...
3196
3197
3198
3199
  	for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS; bucket++) {
  		if (cb->stat[bucket].nr_samples)
  			q->poll_stat[bucket] = cb->stat[bucket];
  	}
34dbad5d2   Omar Sandoval   blk-stat: convert...
3200
  }
64f1c21e8   Jens Axboe   blk-mq: make the ...
3201
3202
3203
3204
  static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
  				       struct blk_mq_hw_ctx *hctx,
  				       struct request *rq)
  {
64f1c21e8   Jens Axboe   blk-mq: make the ...
3205
  	unsigned long ret = 0;
720b8ccc4   Stephen Bates   blk-mq: Add a pol...
3206
  	int bucket;
64f1c21e8   Jens Axboe   blk-mq: make the ...
3207
3208
3209
3210
3211
  
  	/*
  	 * If stats collection isn't on, don't sleep but turn it on for
  	 * future users
  	 */
34dbad5d2   Omar Sandoval   blk-stat: convert...
3212
  	if (!blk_poll_stats_enable(q))
64f1c21e8   Jens Axboe   blk-mq: make the ...
3213
3214
3215
  		return 0;
  
  	/*
64f1c21e8   Jens Axboe   blk-mq: make the ...
3216
3217
3218
3219
3220
  	 * As an optimistic guess, use half of the mean service time
  	 * for this type of request. We can (and should) make this smarter.
  	 * For instance, if the completion latencies are tight, we can
  	 * get closer than just half the mean. This is especially
  	 * important on devices where the completion latencies are longer
720b8ccc4   Stephen Bates   blk-mq: Add a pol...
3221
3222
  	 * than ~10 usec. We do use the stats for the relevant IO size
  	 * if available which does lead to better estimates.
64f1c21e8   Jens Axboe   blk-mq: make the ...
3223
  	 */
720b8ccc4   Stephen Bates   blk-mq: Add a pol...
3224
3225
3226
3227
3228
3229
  	bucket = blk_mq_poll_stats_bkt(rq);
  	if (bucket < 0)
  		return ret;
  
  	if (q->poll_stat[bucket].nr_samples)
  		ret = (q->poll_stat[bucket].mean + 1) / 2;
64f1c21e8   Jens Axboe   blk-mq: make the ...
3230
3231
3232
  
  	return ret;
  }
06426adf0   Jens Axboe   blk-mq: implement...
3233
  static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
64f1c21e8   Jens Axboe   blk-mq: make the ...
3234
  				     struct blk_mq_hw_ctx *hctx,
06426adf0   Jens Axboe   blk-mq: implement...
3235
3236
3237
3238
  				     struct request *rq)
  {
  	struct hrtimer_sleeper hs;
  	enum hrtimer_mode mode;
64f1c21e8   Jens Axboe   blk-mq: make the ...
3239
  	unsigned int nsecs;
06426adf0   Jens Axboe   blk-mq: implement...
3240
  	ktime_t kt;
76a86f9d0   Jens Axboe   block: remove REQ...
3241
  	if (rq->rq_flags & RQF_MQ_POLL_SLEPT)
64f1c21e8   Jens Axboe   blk-mq: make the ...
3242
3243
3244
  		return false;
  
  	/*
1052b8ac5   Jens Axboe   blk-mq: when poll...
3245
  	 * If we get here, hybrid polling is enabled. Hence poll_nsec can be:
64f1c21e8   Jens Axboe   blk-mq: make the ...
3246
  	 *
64f1c21e8   Jens Axboe   blk-mq: make the ...
3247
3248
3249
  	 *  0:	use half of prev avg
  	 * >0:	use this specific value
  	 */
1052b8ac5   Jens Axboe   blk-mq: when poll...
3250
  	if (q->poll_nsec > 0)
64f1c21e8   Jens Axboe   blk-mq: make the ...
3251
3252
3253
3254
3255
  		nsecs = q->poll_nsec;
  	else
  		nsecs = blk_mq_poll_nsecs(q, hctx, rq);
  
  	if (!nsecs)
06426adf0   Jens Axboe   blk-mq: implement...
3256
  		return false;
76a86f9d0   Jens Axboe   block: remove REQ...
3257
  	rq->rq_flags |= RQF_MQ_POLL_SLEPT;
06426adf0   Jens Axboe   blk-mq: implement...
3258
3259
3260
3261
3262
  
  	/*
  	 * This will be replaced with the stats tracking code, using
  	 * 'avg_completion_time / 2' as the pre-sleep target.
  	 */
8b0e19531   Thomas Gleixner   ktime: Cleanup kt...
3263
  	kt = nsecs;
06426adf0   Jens Axboe   blk-mq: implement...
3264
3265
  
  	mode = HRTIMER_MODE_REL;
dbc1625fc   Sebastian Andrzej Siewior   hrtimer: Consolid...
3266
  	hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode);
06426adf0   Jens Axboe   blk-mq: implement...
3267
  	hrtimer_set_expires(&hs.timer, kt);
06426adf0   Jens Axboe   blk-mq: implement...
3268
  	do {
5a61c3639   Tejun Heo   blk-mq: remove RE...
3269
  		if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE)
06426adf0   Jens Axboe   blk-mq: implement...
3270
3271
  			break;
  		set_current_state(TASK_UNINTERRUPTIBLE);
9dd8813ed   Thomas Gleixner   hrtimer/treewide:...
3272
  		hrtimer_sleeper_start_expires(&hs, mode);
06426adf0   Jens Axboe   blk-mq: implement...
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
  		if (hs.task)
  			io_schedule();
  		hrtimer_cancel(&hs.timer);
  		mode = HRTIMER_MODE_ABS;
  	} while (hs.task && !signal_pending(current));
  
  	__set_current_state(TASK_RUNNING);
  	destroy_hrtimer_on_stack(&hs.timer);
  	return true;
  }
1052b8ac5   Jens Axboe   blk-mq: when poll...
3283
3284
  static bool blk_mq_poll_hybrid(struct request_queue *q,
  			       struct blk_mq_hw_ctx *hctx, blk_qc_t cookie)
bbd7bb701   Jens Axboe   block: move poll ...
3285
  {
1052b8ac5   Jens Axboe   blk-mq: when poll...
3286
  	struct request *rq;
29ece8b43   Yufen Yu   block: add BLK_MQ...
3287
  	if (q->poll_nsec == BLK_MQ_POLL_CLASSIC)
1052b8ac5   Jens Axboe   blk-mq: when poll...
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
  		return false;
  
  	if (!blk_qc_t_is_internal(cookie))
  		rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
  	else {
  		rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie));
  		/*
  		 * With scheduling, if the request has completed, we'll
  		 * get a NULL return here, as we clear the sched tag when
  		 * that happens. The request still remains valid, like always,
  		 * so we should be safe with just the NULL check.
  		 */
  		if (!rq)
  			return false;
  	}
  
  	return blk_mq_poll_hybrid_sleep(q, hctx, rq);
  }
529262d56   Christoph Hellwig   block: remove ->p...
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
  /**
   * blk_poll - poll for IO completions
   * @q:  the queue
   * @cookie: cookie passed back at IO submission time
   * @spin: whether to spin for completions
   *
   * Description:
   *    Poll for completions on the passed in queue. Returns number of
   *    completed entries found. If @spin is true, then blk_poll will continue
   *    looping until at least one completion is found, unless the task is
   *    otherwise marked running (or we need to reschedule).
   */
  int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin)
1052b8ac5   Jens Axboe   blk-mq: when poll...
3319
3320
  {
  	struct blk_mq_hw_ctx *hctx;
bbd7bb701   Jens Axboe   block: move poll ...
3321
  	long state;
529262d56   Christoph Hellwig   block: remove ->p...
3322
3323
  	if (!blk_qc_t_valid(cookie) ||
  	    !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
1052b8ac5   Jens Axboe   blk-mq: when poll...
3324
  		return 0;
529262d56   Christoph Hellwig   block: remove ->p...
3325
3326
  	if (current->plug)
  		blk_flush_plug_list(current->plug, false);
1052b8ac5   Jens Axboe   blk-mq: when poll...
3327
  	hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
06426adf0   Jens Axboe   blk-mq: implement...
3328
3329
3330
3331
3332
3333
3334
  	/*
  	 * If we sleep, have the caller restart the poll loop to reset
  	 * the state. Like for the other success return cases, the
  	 * caller is responsible for checking if the IO completed. If
  	 * the IO isn't complete, we'll get called again and will go
  	 * straight to the busy poll loop.
  	 */
1052b8ac5   Jens Axboe   blk-mq: when poll...
3335
  	if (blk_mq_poll_hybrid(q, hctx, cookie))
85f4d4b65   Jens Axboe   block: have ->pol...
3336
  		return 1;
06426adf0   Jens Axboe   blk-mq: implement...
3337

bbd7bb701   Jens Axboe   block: move poll ...
3338
3339
3340
  	hctx->poll_considered++;
  
  	state = current->state;
aa61bec30   Jens Axboe   blk-mq: ensure mq...
3341
  	do {
bbd7bb701   Jens Axboe   block: move poll ...
3342
3343
3344
  		int ret;
  
  		hctx->poll_invoked++;
9743139c5   Jens Axboe   blk-mq: remove 't...
3345
  		ret = q->mq_ops->poll(hctx);
bbd7bb701   Jens Axboe   block: move poll ...
3346
3347
  		if (ret > 0) {
  			hctx->poll_success++;
849a37001   Jens Axboe   block: avoid orde...
3348
  			__set_current_state(TASK_RUNNING);
85f4d4b65   Jens Axboe   block: have ->pol...
3349
  			return ret;
bbd7bb701   Jens Axboe   block: move poll ...
3350
3351
3352
  		}
  
  		if (signal_pending_state(state, current))
849a37001   Jens Axboe   block: avoid orde...
3353
  			__set_current_state(TASK_RUNNING);
bbd7bb701   Jens Axboe   block: move poll ...
3354
3355
  
  		if (current->state == TASK_RUNNING)
85f4d4b65   Jens Axboe   block: have ->pol...
3356
  			return 1;
0a1b8b87d   Jens Axboe   block: make blk_p...
3357
  		if (ret < 0 || !spin)
bbd7bb701   Jens Axboe   block: move poll ...
3358
3359
  			break;
  		cpu_relax();
aa61bec30   Jens Axboe   blk-mq: ensure mq...
3360
  	} while (!need_resched());
bbd7bb701   Jens Axboe   block: move poll ...
3361

67b4110f8   Nitesh Shetty   blk: optimization...
3362
  	__set_current_state(TASK_RUNNING);
85f4d4b65   Jens Axboe   block: have ->pol...
3363
  	return 0;
bbd7bb701   Jens Axboe   block: move poll ...
3364
  }
529262d56   Christoph Hellwig   block: remove ->p...
3365
  EXPORT_SYMBOL_GPL(blk_poll);
bbd7bb701   Jens Axboe   block: move poll ...
3366

9cf2bab63   Jens Axboe   block: kill reque...
3367
3368
3369
3370
3371
  unsigned int blk_mq_rq_cpu(struct request *rq)
  {
  	return rq->mq_ctx->cpu;
  }
  EXPORT_SYMBOL(blk_mq_rq_cpu);
320ae51fe   Jens Axboe   blk-mq: new multi...
3372
3373
  static int __init blk_mq_init(void)
  {
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
3374
3375
  	cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
  				blk_mq_hctx_notify_dead);
320ae51fe   Jens Axboe   blk-mq: new multi...
3376
3377
3378
  	return 0;
  }
  subsys_initcall(blk_mq_init);