Blame view

block/blk-mq.c 56.5 KB
75bb4625b   Jens Axboe   blk-mq: add file ...
1
2
3
4
5
6
  /*
   * Block multiqueue core code
   *
   * Copyright (C) 2013-2014 Jens Axboe
   * Copyright (C) 2013-2014 Christoph Hellwig
   */
320ae51fe   Jens Axboe   blk-mq: new multi...
7
8
9
10
11
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/backing-dev.h>
  #include <linux/bio.h>
  #include <linux/blkdev.h>
f75782e4e   Catalin Marinas   block: kmemleak: ...
12
  #include <linux/kmemleak.h>
320ae51fe   Jens Axboe   blk-mq: new multi...
13
14
15
16
17
18
19
20
21
22
23
  #include <linux/mm.h>
  #include <linux/init.h>
  #include <linux/slab.h>
  #include <linux/workqueue.h>
  #include <linux/smp.h>
  #include <linux/llist.h>
  #include <linux/list_sort.h>
  #include <linux/cpu.h>
  #include <linux/cache.h>
  #include <linux/sched/sysctl.h>
  #include <linux/delay.h>
aedcd72f6   Jens Axboe   blk-mq: limit mem...
24
  #include <linux/crash_dump.h>
88c7b2b75   Jens Axboe   blk-mq: prefetch ...
25
  #include <linux/prefetch.h>
320ae51fe   Jens Axboe   blk-mq: new multi...
26
27
28
29
30
31
32
33
34
35
  
  #include <trace/events/block.h>
  
  #include <linux/blk-mq.h>
  #include "blk.h"
  #include "blk-mq.h"
  #include "blk-mq-tag.h"
  
  static DEFINE_MUTEX(all_q_mutex);
  static LIST_HEAD(all_q_list);
320ae51fe   Jens Axboe   blk-mq: new multi...
36
37
38
39
40
  /*
   * Check if any of the ctx's have pending work in this hardware queue
   */
  static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
  {
88459642c   Omar Sandoval   blk-mq: abstract ...
41
  	return sbitmap_any_bit_set(&hctx->ctx_map);
1429d7c94   Jens Axboe   blk-mq: switch ct...
42
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
43
44
45
46
47
48
  /*
   * Mark this ctx as having pending work in this hardware queue
   */
  static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx,
  				     struct blk_mq_ctx *ctx)
  {
88459642c   Omar Sandoval   blk-mq: abstract ...
49
50
  	if (!sbitmap_test_bit(&hctx->ctx_map, ctx->index_hw))
  		sbitmap_set_bit(&hctx->ctx_map, ctx->index_hw);
1429d7c94   Jens Axboe   blk-mq: switch ct...
51
52
53
54
55
  }
  
  static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
  				      struct blk_mq_ctx *ctx)
  {
88459642c   Omar Sandoval   blk-mq: abstract ...
56
  	sbitmap_clear_bit(&hctx->ctx_map, ctx->index_hw);
320ae51fe   Jens Axboe   blk-mq: new multi...
57
  }
b4c6a0287   Keith Busch   blk-mq: Export fr...
58
  void blk_mq_freeze_queue_start(struct request_queue *q)
43a5e4e21   Ming Lei   block: blk-mq: su...
59
  {
4ecd4fef3   Christoph Hellwig   block: use an ato...
60
  	int freeze_depth;
cddd5d176   Tejun Heo   blk-mq: blk_mq_fr...
61

4ecd4fef3   Christoph Hellwig   block: use an ato...
62
63
  	freeze_depth = atomic_inc_return(&q->mq_freeze_depth);
  	if (freeze_depth == 1) {
3ef28e83a   Dan Williams   block: generic re...
64
  		percpu_ref_kill(&q->q_usage_counter);
b94ec2964   Mike Snitzer   blk-mq: export bl...
65
  		blk_mq_run_hw_queues(q, false);
cddd5d176   Tejun Heo   blk-mq: blk_mq_fr...
66
  	}
f3af020b9   Tejun Heo   blk-mq: make mq_q...
67
  }
b4c6a0287   Keith Busch   blk-mq: Export fr...
68
  EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
f3af020b9   Tejun Heo   blk-mq: make mq_q...
69
70
71
  
  static void blk_mq_freeze_queue_wait(struct request_queue *q)
  {
3ef28e83a   Dan Williams   block: generic re...
72
  	wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
43a5e4e21   Ming Lei   block: blk-mq: su...
73
  }
f3af020b9   Tejun Heo   blk-mq: make mq_q...
74
75
76
77
  /*
   * Guarantee no request is in use, so we can change any data structure of
   * the queue afterward.
   */
3ef28e83a   Dan Williams   block: generic re...
78
  void blk_freeze_queue(struct request_queue *q)
f3af020b9   Tejun Heo   blk-mq: make mq_q...
79
  {
3ef28e83a   Dan Williams   block: generic re...
80
81
82
83
84
85
86
  	/*
  	 * In the !blk_mq case we are only calling this to kill the
  	 * q_usage_counter, otherwise this increases the freeze depth
  	 * and waits for it to return to zero.  For this reason there is
  	 * no blk_unfreeze_queue(), and blk_freeze_queue() is not
  	 * exported to drivers as the only user for unfreeze is blk_mq.
  	 */
f3af020b9   Tejun Heo   blk-mq: make mq_q...
87
88
89
  	blk_mq_freeze_queue_start(q);
  	blk_mq_freeze_queue_wait(q);
  }
3ef28e83a   Dan Williams   block: generic re...
90
91
92
93
94
95
96
97
98
  
  void blk_mq_freeze_queue(struct request_queue *q)
  {
  	/*
  	 * ...just an alias to keep freeze and unfreeze actions balanced
  	 * in the blk_mq_* namespace
  	 */
  	blk_freeze_queue(q);
  }
c761d96b0   Jens Axboe   blk-mq: export bl...
99
  EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
f3af020b9   Tejun Heo   blk-mq: make mq_q...
100

b4c6a0287   Keith Busch   blk-mq: Export fr...
101
  void blk_mq_unfreeze_queue(struct request_queue *q)
320ae51fe   Jens Axboe   blk-mq: new multi...
102
  {
4ecd4fef3   Christoph Hellwig   block: use an ato...
103
  	int freeze_depth;
320ae51fe   Jens Axboe   blk-mq: new multi...
104

4ecd4fef3   Christoph Hellwig   block: use an ato...
105
106
107
  	freeze_depth = atomic_dec_return(&q->mq_freeze_depth);
  	WARN_ON_ONCE(freeze_depth < 0);
  	if (!freeze_depth) {
3ef28e83a   Dan Williams   block: generic re...
108
  		percpu_ref_reinit(&q->q_usage_counter);
320ae51fe   Jens Axboe   blk-mq: new multi...
109
  		wake_up_all(&q->mq_freeze_wq);
add703fda   Tejun Heo   blk-mq: use percp...
110
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
111
  }
b4c6a0287   Keith Busch   blk-mq: Export fr...
112
  EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
320ae51fe   Jens Axboe   blk-mq: new multi...
113

aed3ea94b   Jens Axboe   block: wake up wa...
114
115
116
117
118
119
120
121
  void blk_mq_wake_waiters(struct request_queue *q)
  {
  	struct blk_mq_hw_ctx *hctx;
  	unsigned int i;
  
  	queue_for_each_hw_ctx(q, hctx, i)
  		if (blk_mq_hw_queue_mapped(hctx))
  			blk_mq_tag_wakeup_all(hctx->tags, true);
3fd5940cb   Keith Busch   blk-mq: Wake task...
122
123
124
125
126
127
128
  
  	/*
  	 * If we are called because the queue has now been marked as
  	 * dying, we need to ensure that processes currently waiting on
  	 * the queue are notified as well.
  	 */
  	wake_up_all(&q->mq_freeze_wq);
aed3ea94b   Jens Axboe   block: wake up wa...
129
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
130
131
132
133
134
  bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
  {
  	return blk_mq_has_free_tags(hctx->tags);
  }
  EXPORT_SYMBOL(blk_mq_can_queue);
94eddfbea   Jens Axboe   blk-mq: ensure th...
135
  static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
cc6e3b109   Mike Christie   block: prepare mq...
136
137
  			       struct request *rq, int op,
  			       unsigned int op_flags)
320ae51fe   Jens Axboe   blk-mq: new multi...
138
  {
94eddfbea   Jens Axboe   blk-mq: ensure th...
139
  	if (blk_queue_io_stat(q))
cc6e3b109   Mike Christie   block: prepare mq...
140
  		op_flags |= REQ_IO_STAT;
94eddfbea   Jens Axboe   blk-mq: ensure th...
141

af76e555e   Christoph Hellwig   blk-mq: initializ...
142
143
144
  	INIT_LIST_HEAD(&rq->queuelist);
  	/* csd/requeue_work/fifo_time is initialized before use */
  	rq->q = q;
320ae51fe   Jens Axboe   blk-mq: new multi...
145
  	rq->mq_ctx = ctx;
cc6e3b109   Mike Christie   block: prepare mq...
146
  	req_set_op_attrs(rq, op, op_flags);
af76e555e   Christoph Hellwig   blk-mq: initializ...
147
148
  	/* do not touch atomic flags, it needs atomic ops against the timer */
  	rq->cpu = -1;
af76e555e   Christoph Hellwig   blk-mq: initializ...
149
150
  	INIT_HLIST_NODE(&rq->hash);
  	RB_CLEAR_NODE(&rq->rb_node);
af76e555e   Christoph Hellwig   blk-mq: initializ...
151
152
  	rq->rq_disk = NULL;
  	rq->part = NULL;
3ee323723   Jens Axboe   blk-mq: always in...
153
  	rq->start_time = jiffies;
af76e555e   Christoph Hellwig   blk-mq: initializ...
154
155
  #ifdef CONFIG_BLK_CGROUP
  	rq->rl = NULL;
0fec08b4e   Ming Lei   blk-mq: fix initi...
156
  	set_start_time_ns(rq);
af76e555e   Christoph Hellwig   blk-mq: initializ...
157
158
159
160
161
162
  	rq->io_start_time_ns = 0;
  #endif
  	rq->nr_phys_segments = 0;
  #if defined(CONFIG_BLK_DEV_INTEGRITY)
  	rq->nr_integrity_segments = 0;
  #endif
af76e555e   Christoph Hellwig   blk-mq: initializ...
163
164
165
  	rq->special = NULL;
  	/* tag was already set */
  	rq->errors = 0;
af76e555e   Christoph Hellwig   blk-mq: initializ...
166

6f4a16266   Tony Battersby   scsi-mq: fix requ...
167
  	rq->cmd = rq->__cmd;
af76e555e   Christoph Hellwig   blk-mq: initializ...
168
169
170
171
  	rq->extra_len = 0;
  	rq->sense_len = 0;
  	rq->resid_len = 0;
  	rq->sense = NULL;
af76e555e   Christoph Hellwig   blk-mq: initializ...
172
  	INIT_LIST_HEAD(&rq->timeout_list);
f6be4fb4b   Jens Axboe   blk-mq: ->timeout...
173
  	rq->timeout = 0;
af76e555e   Christoph Hellwig   blk-mq: initializ...
174
175
176
  	rq->end_io = NULL;
  	rq->end_io_data = NULL;
  	rq->next_rq = NULL;
d9d8c5c48   Mike Christie   block: convert is...
177
  	ctx->rq_dispatched[rw_is_sync(op, op_flags)]++;
320ae51fe   Jens Axboe   blk-mq: new multi...
178
  }
5dee85772   Christoph Hellwig   blk-mq: initializ...
179
  static struct request *
cc6e3b109   Mike Christie   block: prepare mq...
180
  __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int op, int op_flags)
5dee85772   Christoph Hellwig   blk-mq: initializ...
181
182
183
  {
  	struct request *rq;
  	unsigned int tag;
cb96a42cc   Ming Lei   blk-mq: fix sched...
184
  	tag = blk_mq_get_tag(data);
5dee85772   Christoph Hellwig   blk-mq: initializ...
185
  	if (tag != BLK_MQ_TAG_FAIL) {
cb96a42cc   Ming Lei   blk-mq: fix sched...
186
  		rq = data->hctx->tags->rqs[tag];
5dee85772   Christoph Hellwig   blk-mq: initializ...
187

cb96a42cc   Ming Lei   blk-mq: fix sched...
188
  		if (blk_mq_tag_busy(data->hctx)) {
5dee85772   Christoph Hellwig   blk-mq: initializ...
189
  			rq->cmd_flags = REQ_MQ_INFLIGHT;
cb96a42cc   Ming Lei   blk-mq: fix sched...
190
  			atomic_inc(&data->hctx->nr_active);
5dee85772   Christoph Hellwig   blk-mq: initializ...
191
192
193
  		}
  
  		rq->tag = tag;
cc6e3b109   Mike Christie   block: prepare mq...
194
  		blk_mq_rq_ctx_init(data->q, data->ctx, rq, op, op_flags);
5dee85772   Christoph Hellwig   blk-mq: initializ...
195
196
197
198
199
  		return rq;
  	}
  
  	return NULL;
  }
6f3b0e8bc   Christoph Hellwig   blk-mq: add a fla...
200
201
  struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
  		unsigned int flags)
320ae51fe   Jens Axboe   blk-mq: new multi...
202
  {
d852564f8   Christoph Hellwig   blk-mq: remove bl...
203
204
  	struct blk_mq_ctx *ctx;
  	struct blk_mq_hw_ctx *hctx;
320ae51fe   Jens Axboe   blk-mq: new multi...
205
  	struct request *rq;
cb96a42cc   Ming Lei   blk-mq: fix sched...
206
  	struct blk_mq_alloc_data alloc_data;
a492f0754   Joe Lawrence   block,scsi: fixup...
207
  	int ret;
320ae51fe   Jens Axboe   blk-mq: new multi...
208

6f3b0e8bc   Christoph Hellwig   blk-mq: add a fla...
209
  	ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
a492f0754   Joe Lawrence   block,scsi: fixup...
210
211
  	if (ret)
  		return ERR_PTR(ret);
320ae51fe   Jens Axboe   blk-mq: new multi...
212

d852564f8   Christoph Hellwig   blk-mq: remove bl...
213
  	ctx = blk_mq_get_ctx(q);
7d7e0f90b   Christoph Hellwig   blk-mq: remove ->...
214
  	hctx = blk_mq_map_queue(q, ctx->cpu);
6f3b0e8bc   Christoph Hellwig   blk-mq: add a fla...
215
  	blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
cc6e3b109   Mike Christie   block: prepare mq...
216
  	rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
d852564f8   Christoph Hellwig   blk-mq: remove bl...
217
  	blk_mq_put_ctx(ctx);
841bac2c8   Jens Axboe   blk-mq: get rid o...
218

c76541a93   Keith Busch   blk-mq: Exit queu...
219
  	if (!rq) {
3ef28e83a   Dan Williams   block: generic re...
220
  		blk_queue_exit(q);
a492f0754   Joe Lawrence   block,scsi: fixup...
221
  		return ERR_PTR(-EWOULDBLOCK);
c76541a93   Keith Busch   blk-mq: Exit queu...
222
  	}
0c4de0f33   Christoph Hellwig   block: ensure bio...
223
224
225
226
  
  	rq->__data_len = 0;
  	rq->__sector = (sector_t) -1;
  	rq->bio = rq->biotail = NULL;
320ae51fe   Jens Axboe   blk-mq: new multi...
227
228
  	return rq;
  }
4bb659b15   Jens Axboe   blk-mq: implement...
229
  EXPORT_SYMBOL(blk_mq_alloc_request);
320ae51fe   Jens Axboe   blk-mq: new multi...
230

1f5bd336b   Ming Lin   blk-mq: add blk_m...
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
  struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
  		unsigned int flags, unsigned int hctx_idx)
  {
  	struct blk_mq_hw_ctx *hctx;
  	struct blk_mq_ctx *ctx;
  	struct request *rq;
  	struct blk_mq_alloc_data alloc_data;
  	int ret;
  
  	/*
  	 * If the tag allocator sleeps we could get an allocation for a
  	 * different hardware context.  No need to complicate the low level
  	 * allocator for this for the rare use case of a command tied to
  	 * a specific queue.
  	 */
  	if (WARN_ON_ONCE(!(flags & BLK_MQ_REQ_NOWAIT)))
  		return ERR_PTR(-EINVAL);
  
  	if (hctx_idx >= q->nr_hw_queues)
  		return ERR_PTR(-EIO);
  
  	ret = blk_queue_enter(q, true);
  	if (ret)
  		return ERR_PTR(ret);
c8712c6a6   Christoph Hellwig   blk-mq: skip unma...
255
256
257
258
  	/*
  	 * Check if the hardware context is actually mapped to anything.
  	 * If not tell the caller that it should skip this queue.
  	 */
1f5bd336b   Ming Lin   blk-mq: add blk_m...
259
  	hctx = q->queue_hw_ctx[hctx_idx];
c8712c6a6   Christoph Hellwig   blk-mq: skip unma...
260
261
262
263
  	if (!blk_mq_hw_queue_mapped(hctx)) {
  		ret = -EXDEV;
  		goto out_queue_exit;
  	}
1f5bd336b   Ming Lin   blk-mq: add blk_m...
264
265
266
267
268
  	ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
  
  	blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
  	rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
  	if (!rq) {
c8712c6a6   Christoph Hellwig   blk-mq: skip unma...
269
270
  		ret = -EWOULDBLOCK;
  		goto out_queue_exit;
1f5bd336b   Ming Lin   blk-mq: add blk_m...
271
272
273
  	}
  
  	return rq;
c8712c6a6   Christoph Hellwig   blk-mq: skip unma...
274
275
276
277
  
  out_queue_exit:
  	blk_queue_exit(q);
  	return ERR_PTR(ret);
1f5bd336b   Ming Lin   blk-mq: add blk_m...
278
279
  }
  EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
320ae51fe   Jens Axboe   blk-mq: new multi...
280
281
282
283
284
  static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
  				  struct blk_mq_ctx *ctx, struct request *rq)
  {
  	const int tag = rq->tag;
  	struct request_queue *q = rq->q;
0d2602ca3   Jens Axboe   blk-mq: improve s...
285
286
  	if (rq->cmd_flags & REQ_MQ_INFLIGHT)
  		atomic_dec(&hctx->nr_active);
683d0e126   David Hildenbrand   blk-mq: Avoid rac...
287
  	rq->cmd_flags = 0;
0d2602ca3   Jens Axboe   blk-mq: improve s...
288

af76e555e   Christoph Hellwig   blk-mq: initializ...
289
  	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
40aabb674   Omar Sandoval   sbitmap: push per...
290
  	blk_mq_put_tag(hctx, ctx, tag);
3ef28e83a   Dan Williams   block: generic re...
291
  	blk_queue_exit(q);
320ae51fe   Jens Axboe   blk-mq: new multi...
292
  }
7c7f2f2bc   Jens Axboe   blk-mq: add blk_m...
293
  void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
320ae51fe   Jens Axboe   blk-mq: new multi...
294
295
  {
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
320ae51fe   Jens Axboe   blk-mq: new multi...
296
297
  
  	ctx->rq_completed[rq_is_sync(rq)]++;
320ae51fe   Jens Axboe   blk-mq: new multi...
298
  	__blk_mq_free_request(hctx, ctx, rq);
7c7f2f2bc   Jens Axboe   blk-mq: add blk_m...
299
300
301
302
303
304
  
  }
  EXPORT_SYMBOL_GPL(blk_mq_free_hctx_request);
  
  void blk_mq_free_request(struct request *rq)
  {
7d7e0f90b   Christoph Hellwig   blk-mq: remove ->...
305
  	blk_mq_free_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
306
  }
1a3b595a2   Jens Axboe   blk-mq: export bl...
307
  EXPORT_SYMBOL_GPL(blk_mq_free_request);
320ae51fe   Jens Axboe   blk-mq: new multi...
308

c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
309
  inline void __blk_mq_end_request(struct request *rq, int error)
320ae51fe   Jens Axboe   blk-mq: new multi...
310
  {
0d11e6aca   Ming Lei   blk-mq: fix use-a...
311
  	blk_account_io_done(rq);
91b63639c   Christoph Hellwig   blk-mq: bidi support
312
  	if (rq->end_io) {
320ae51fe   Jens Axboe   blk-mq: new multi...
313
  		rq->end_io(rq, error);
91b63639c   Christoph Hellwig   blk-mq: bidi support
314
315
316
  	} else {
  		if (unlikely(blk_bidi_rq(rq)))
  			blk_mq_free_request(rq->next_rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
317
  		blk_mq_free_request(rq);
91b63639c   Christoph Hellwig   blk-mq: bidi support
318
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
319
  }
c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
320
  EXPORT_SYMBOL(__blk_mq_end_request);
63151a449   Christoph Hellwig   blk-mq: allow dri...
321

c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
322
  void blk_mq_end_request(struct request *rq, int error)
63151a449   Christoph Hellwig   blk-mq: allow dri...
323
324
325
  {
  	if (blk_update_request(rq, error, blk_rq_bytes(rq)))
  		BUG();
c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
326
  	__blk_mq_end_request(rq, error);
63151a449   Christoph Hellwig   blk-mq: allow dri...
327
  }
c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
328
  EXPORT_SYMBOL(blk_mq_end_request);
320ae51fe   Jens Axboe   blk-mq: new multi...
329

30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
330
  static void __blk_mq_complete_request_remote(void *data)
320ae51fe   Jens Axboe   blk-mq: new multi...
331
  {
3d6efbf62   Christoph Hellwig   blk-mq: use __smp...
332
  	struct request *rq = data;
320ae51fe   Jens Axboe   blk-mq: new multi...
333

30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
334
  	rq->q->softirq_done_fn(rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
335
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
336

ed851860b   Jens Axboe   blk-mq: push IPI ...
337
  static void blk_mq_ipi_complete_request(struct request *rq)
320ae51fe   Jens Axboe   blk-mq: new multi...
338
339
  {
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
385352016   Christoph Hellwig   blk-mq: respect r...
340
  	bool shared = false;
320ae51fe   Jens Axboe   blk-mq: new multi...
341
  	int cpu;
385352016   Christoph Hellwig   blk-mq: respect r...
342
  	if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) {
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
343
344
345
  		rq->q->softirq_done_fn(rq);
  		return;
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
346
347
  
  	cpu = get_cpu();
385352016   Christoph Hellwig   blk-mq: respect r...
348
349
350
351
  	if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags))
  		shared = cpus_share_cache(cpu, ctx->cpu);
  
  	if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
352
  		rq->csd.func = __blk_mq_complete_request_remote;
3d6efbf62   Christoph Hellwig   blk-mq: use __smp...
353
354
  		rq->csd.info = rq;
  		rq->csd.flags = 0;
c46fff2a3   Frederic Weisbecker   smp: Rename __smp...
355
  		smp_call_function_single_async(ctx->cpu, &rq->csd);
3d6efbf62   Christoph Hellwig   blk-mq: use __smp...
356
  	} else {
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
357
  		rq->q->softirq_done_fn(rq);
3d6efbf62   Christoph Hellwig   blk-mq: use __smp...
358
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
359
360
  	put_cpu();
  }
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
361

1fa8cc52f   Jens Axboe   blk-mq: mark __bl...
362
  static void __blk_mq_complete_request(struct request *rq)
ed851860b   Jens Axboe   blk-mq: push IPI ...
363
364
365
366
  {
  	struct request_queue *q = rq->q;
  
  	if (!q->softirq_done_fn)
c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
367
  		blk_mq_end_request(rq, rq->errors);
ed851860b   Jens Axboe   blk-mq: push IPI ...
368
369
370
  	else
  		blk_mq_ipi_complete_request(rq);
  }
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
371
372
373
374
375
376
377
378
  /**
   * blk_mq_complete_request - end I/O on a request
   * @rq:		the request being processed
   *
   * Description:
   *	Ends all I/O on a request. It does not handle partial completions.
   *	The actual completion happens out-of-order, through a IPI handler.
   **/
f4829a9b7   Christoph Hellwig   blk-mq: fix racy ...
379
  void blk_mq_complete_request(struct request *rq, int error)
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
380
  {
95f096849   Jens Axboe   blk-mq: allow non...
381
382
383
  	struct request_queue *q = rq->q;
  
  	if (unlikely(blk_should_fake_timeout(q)))
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
384
  		return;
f4829a9b7   Christoph Hellwig   blk-mq: fix racy ...
385
386
  	if (!blk_mark_rq_complete(rq)) {
  		rq->errors = error;
ed851860b   Jens Axboe   blk-mq: push IPI ...
387
  		__blk_mq_complete_request(rq);
f4829a9b7   Christoph Hellwig   blk-mq: fix racy ...
388
  	}
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
389
390
  }
  EXPORT_SYMBOL(blk_mq_complete_request);
320ae51fe   Jens Axboe   blk-mq: new multi...
391

973c01919   Keith Busch   blk-mq: Export if...
392
393
394
395
396
  int blk_mq_request_started(struct request *rq)
  {
  	return test_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
  }
  EXPORT_SYMBOL_GPL(blk_mq_request_started);
e2490073c   Christoph Hellwig   blk-mq: call blk_...
397
  void blk_mq_start_request(struct request *rq)
320ae51fe   Jens Axboe   blk-mq: new multi...
398
399
400
401
  {
  	struct request_queue *q = rq->q;
  
  	trace_block_rq_issue(q, rq);
742ee69b9   Christoph Hellwig   blk-mq: initializ...
402
  	rq->resid_len = blk_rq_bytes(rq);
91b63639c   Christoph Hellwig   blk-mq: bidi support
403
404
  	if (unlikely(blk_bidi_rq(rq)))
  		rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq);
742ee69b9   Christoph Hellwig   blk-mq: initializ...
405

2b8393b43   Ming Lei   blk-mq: add timer...
406
  	blk_add_timer(rq);
87ee7b112   Jens Axboe   blk-mq: fix race ...
407
408
  
  	/*
538b75341   Jens Axboe   blk-mq: request d...
409
410
411
412
413
414
  	 * Ensure that ->deadline is visible before set the started
  	 * flag and clear the completed flag.
  	 */
  	smp_mb__before_atomic();
  
  	/*
87ee7b112   Jens Axboe   blk-mq: fix race ...
415
416
417
418
419
  	 * Mark us as started and clear complete. Complete might have been
  	 * set if requeue raced with timeout, which then marked it as
  	 * complete. So be sure to clear complete again when we start
  	 * the request, otherwise we'll ignore the completion event.
  	 */
4b570521b   Jens Axboe   blk-mq: request i...
420
421
422
423
  	if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
  		set_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
  	if (test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags))
  		clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
49f5baa51   Christoph Hellwig   blk-mq: pair blk_...
424
425
426
427
428
429
430
431
432
  
  	if (q->dma_drain_size && blk_rq_bytes(rq)) {
  		/*
  		 * Make sure space for the drain appears.  We know we can do
  		 * this because max_hw_segments has been adjusted to be one
  		 * fewer than the device can handle.
  		 */
  		rq->nr_phys_segments++;
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
433
  }
e2490073c   Christoph Hellwig   blk-mq: call blk_...
434
  EXPORT_SYMBOL(blk_mq_start_request);
320ae51fe   Jens Axboe   blk-mq: new multi...
435

ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
436
  static void __blk_mq_requeue_request(struct request *rq)
320ae51fe   Jens Axboe   blk-mq: new multi...
437
438
439
440
  {
  	struct request_queue *q = rq->q;
  
  	trace_block_rq_requeue(q, rq);
49f5baa51   Christoph Hellwig   blk-mq: pair blk_...
441

e2490073c   Christoph Hellwig   blk-mq: call blk_...
442
443
444
445
  	if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
  		if (q->dma_drain_size && blk_rq_bytes(rq))
  			rq->nr_phys_segments--;
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
446
  }
ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
447
448
  void blk_mq_requeue_request(struct request *rq)
  {
ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
449
  	__blk_mq_requeue_request(rq);
ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
450

ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
451
  	BUG_ON(blk_queued_rq(rq));
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
452
  	blk_mq_add_to_requeue_list(rq, true);
ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
453
454
  }
  EXPORT_SYMBOL(blk_mq_requeue_request);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
455
456
457
  static void blk_mq_requeue_work(struct work_struct *work)
  {
  	struct request_queue *q =
2849450ad   Mike Snitzer   blk-mq: introduce...
458
  		container_of(work, struct request_queue, requeue_work.work);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
  	LIST_HEAD(rq_list);
  	struct request *rq, *next;
  	unsigned long flags;
  
  	spin_lock_irqsave(&q->requeue_lock, flags);
  	list_splice_init(&q->requeue_list, &rq_list);
  	spin_unlock_irqrestore(&q->requeue_lock, flags);
  
  	list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
  		if (!(rq->cmd_flags & REQ_SOFTBARRIER))
  			continue;
  
  		rq->cmd_flags &= ~REQ_SOFTBARRIER;
  		list_del_init(&rq->queuelist);
  		blk_mq_insert_request(rq, true, false, false);
  	}
  
  	while (!list_empty(&rq_list)) {
  		rq = list_entry(rq_list.next, struct request, queuelist);
  		list_del_init(&rq->queuelist);
  		blk_mq_insert_request(rq, false, false, false);
  	}
8b9574156   Jens Axboe   blk-mq: use blk_m...
481
482
483
484
485
  	/*
  	 * Use the start variant of queue running here, so that running
  	 * the requeue work will kick stopped queues.
  	 */
  	blk_mq_start_hw_queues(q);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
  }
  
  void blk_mq_add_to_requeue_list(struct request *rq, bool at_head)
  {
  	struct request_queue *q = rq->q;
  	unsigned long flags;
  
  	/*
  	 * We abuse this flag that is otherwise used by the I/O scheduler to
  	 * request head insertation from the workqueue.
  	 */
  	BUG_ON(rq->cmd_flags & REQ_SOFTBARRIER);
  
  	spin_lock_irqsave(&q->requeue_lock, flags);
  	if (at_head) {
  		rq->cmd_flags |= REQ_SOFTBARRIER;
  		list_add(&rq->queuelist, &q->requeue_list);
  	} else {
  		list_add_tail(&rq->queuelist, &q->requeue_list);
  	}
  	spin_unlock_irqrestore(&q->requeue_lock, flags);
  }
  EXPORT_SYMBOL(blk_mq_add_to_requeue_list);
c68ed59f5   Keith Busch   blk-mq: Let drive...
509
510
  void blk_mq_cancel_requeue_work(struct request_queue *q)
  {
2849450ad   Mike Snitzer   blk-mq: introduce...
511
  	cancel_delayed_work_sync(&q->requeue_work);
c68ed59f5   Keith Busch   blk-mq: Let drive...
512
513
  }
  EXPORT_SYMBOL_GPL(blk_mq_cancel_requeue_work);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
514
515
  void blk_mq_kick_requeue_list(struct request_queue *q)
  {
2849450ad   Mike Snitzer   blk-mq: introduce...
516
  	kblockd_schedule_delayed_work(&q->requeue_work, 0);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
517
518
  }
  EXPORT_SYMBOL(blk_mq_kick_requeue_list);
2849450ad   Mike Snitzer   blk-mq: introduce...
519
520
521
522
523
524
525
  void blk_mq_delay_kick_requeue_list(struct request_queue *q,
  				    unsigned long msecs)
  {
  	kblockd_schedule_delayed_work(&q->requeue_work,
  				      msecs_to_jiffies(msecs));
  }
  EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
1885b24d2   Jens Axboe   blk-mq: Add helpe...
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
  void blk_mq_abort_requeue_list(struct request_queue *q)
  {
  	unsigned long flags;
  	LIST_HEAD(rq_list);
  
  	spin_lock_irqsave(&q->requeue_lock, flags);
  	list_splice_init(&q->requeue_list, &rq_list);
  	spin_unlock_irqrestore(&q->requeue_lock, flags);
  
  	while (!list_empty(&rq_list)) {
  		struct request *rq;
  
  		rq = list_first_entry(&rq_list, struct request, queuelist);
  		list_del_init(&rq->queuelist);
  		rq->errors = -EIO;
  		blk_mq_end_request(rq, rq->errors);
  	}
  }
  EXPORT_SYMBOL(blk_mq_abort_requeue_list);
0e62f51f8   Jens Axboe   blk-mq: let blk_m...
545
546
  struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
  {
88c7b2b75   Jens Axboe   blk-mq: prefetch ...
547
548
  	if (tag < tags->nr_tags) {
  		prefetch(tags->rqs[tag]);
4ee86babe   Hannes Reinecke   blk-mq: add bound...
549
  		return tags->rqs[tag];
88c7b2b75   Jens Axboe   blk-mq: prefetch ...
550
  	}
4ee86babe   Hannes Reinecke   blk-mq: add bound...
551
552
  
  	return NULL;
24d2f9030   Christoph Hellwig   blk-mq: split out...
553
554
  }
  EXPORT_SYMBOL(blk_mq_tag_to_rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
555
  struct blk_mq_timeout_data {
46f92d42e   Christoph Hellwig   blk-mq: unshared ...
556
557
  	unsigned long next;
  	unsigned int next_set;
320ae51fe   Jens Axboe   blk-mq: new multi...
558
  };
904158376   Christoph Hellwig   block: fix blk_ab...
559
  void blk_mq_rq_timed_out(struct request *req, bool reserved)
320ae51fe   Jens Axboe   blk-mq: new multi...
560
  {
46f92d42e   Christoph Hellwig   blk-mq: unshared ...
561
562
  	struct blk_mq_ops *ops = req->q->mq_ops;
  	enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
87ee7b112   Jens Axboe   blk-mq: fix race ...
563
564
565
566
567
568
569
570
571
572
  
  	/*
  	 * We know that complete is set at this point. If STARTED isn't set
  	 * anymore, then the request isn't active and the "timeout" should
  	 * just be ignored. This can happen due to the bitflag ordering.
  	 * Timeout first checks if STARTED is set, and if it is, assumes
  	 * the request is active. But if we race with completion, then
  	 * we both flags will get cleared. So check here again, and ignore
  	 * a timeout event with a request that isn't active.
  	 */
46f92d42e   Christoph Hellwig   blk-mq: unshared ...
573
574
  	if (!test_bit(REQ_ATOM_STARTED, &req->atomic_flags))
  		return;
87ee7b112   Jens Axboe   blk-mq: fix race ...
575

46f92d42e   Christoph Hellwig   blk-mq: unshared ...
576
  	if (ops->timeout)
0152fb6b5   Christoph Hellwig   blk-mq: pass a re...
577
  		ret = ops->timeout(req, reserved);
46f92d42e   Christoph Hellwig   blk-mq: unshared ...
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
  
  	switch (ret) {
  	case BLK_EH_HANDLED:
  		__blk_mq_complete_request(req);
  		break;
  	case BLK_EH_RESET_TIMER:
  		blk_add_timer(req);
  		blk_clear_rq_complete(req);
  		break;
  	case BLK_EH_NOT_HANDLED:
  		break;
  	default:
  		printk(KERN_ERR "block: bad eh return: %d
  ", ret);
  		break;
  	}
87ee7b112   Jens Axboe   blk-mq: fix race ...
594
  }
5b3f25fc3   Keith Busch   blk-mq: Allow req...
595

81481eb42   Christoph Hellwig   blk-mq: fix and s...
596
597
598
599
  static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
  		struct request *rq, void *priv, bool reserved)
  {
  	struct blk_mq_timeout_data *data = priv;
87ee7b112   Jens Axboe   blk-mq: fix race ...
600

eb130dbfc   Keith Busch   blk-mq: End unsta...
601
602
603
604
605
  	if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
  		/*
  		 * If a request wasn't started before the queue was
  		 * marked dying, kill it here or it'll go unnoticed.
  		 */
a59e0f579   Keith Busch   blk-mq: End unsta...
606
607
608
609
  		if (unlikely(blk_queue_dying(rq->q))) {
  			rq->errors = -EIO;
  			blk_mq_end_request(rq, rq->errors);
  		}
46f92d42e   Christoph Hellwig   blk-mq: unshared ...
610
  		return;
eb130dbfc   Keith Busch   blk-mq: End unsta...
611
  	}
87ee7b112   Jens Axboe   blk-mq: fix race ...
612

46f92d42e   Christoph Hellwig   blk-mq: unshared ...
613
614
  	if (time_after_eq(jiffies, rq->deadline)) {
  		if (!blk_mark_rq_complete(rq))
0152fb6b5   Christoph Hellwig   blk-mq: pass a re...
615
  			blk_mq_rq_timed_out(rq, reserved);
46f92d42e   Christoph Hellwig   blk-mq: unshared ...
616
617
618
619
  	} else if (!data->next_set || time_after(data->next, rq->deadline)) {
  		data->next = rq->deadline;
  		data->next_set = 1;
  	}
87ee7b112   Jens Axboe   blk-mq: fix race ...
620
  }
287922eb0   Christoph Hellwig   block: defer time...
621
  static void blk_mq_timeout_work(struct work_struct *work)
320ae51fe   Jens Axboe   blk-mq: new multi...
622
  {
287922eb0   Christoph Hellwig   block: defer time...
623
624
  	struct request_queue *q =
  		container_of(work, struct request_queue, timeout_work);
81481eb42   Christoph Hellwig   blk-mq: fix and s...
625
626
627
628
  	struct blk_mq_timeout_data data = {
  		.next		= 0,
  		.next_set	= 0,
  	};
81481eb42   Christoph Hellwig   blk-mq: fix and s...
629
  	int i;
320ae51fe   Jens Axboe   blk-mq: new multi...
630

71f79fb31   Gabriel Krisman Bertazi   blk-mq: Allow tim...
631
632
633
634
635
636
637
638
639
640
641
642
643
644
  	/* A deadlock might occur if a request is stuck requiring a
  	 * timeout at the same time a queue freeze is waiting
  	 * completion, since the timeout code would not be able to
  	 * acquire the queue reference here.
  	 *
  	 * That's why we don't use blk_queue_enter here; instead, we use
  	 * percpu_ref_tryget directly, because we need to be able to
  	 * obtain a reference even in the short window between the queue
  	 * starting to freeze, by dropping the first reference in
  	 * blk_mq_freeze_queue_start, and the moment the last request is
  	 * consumed, marked by the instant q_usage_counter reaches
  	 * zero.
  	 */
  	if (!percpu_ref_tryget(&q->q_usage_counter))
287922eb0   Christoph Hellwig   block: defer time...
645
  		return;
0bf6cd5b9   Christoph Hellwig   blk-mq: factor ou...
646
  	blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);
320ae51fe   Jens Axboe   blk-mq: new multi...
647

81481eb42   Christoph Hellwig   blk-mq: fix and s...
648
649
650
  	if (data.next_set) {
  		data.next = blk_rq_timeout(round_jiffies_up(data.next));
  		mod_timer(&q->timeout, data.next);
0d2602ca3   Jens Axboe   blk-mq: improve s...
651
  	} else {
0bf6cd5b9   Christoph Hellwig   blk-mq: factor ou...
652
  		struct blk_mq_hw_ctx *hctx;
f054b56c9   Ming Lei   blk-mq: fix race ...
653
654
655
656
657
  		queue_for_each_hw_ctx(q, hctx, i) {
  			/* the hctx may be unmapped, so check it here */
  			if (blk_mq_hw_queue_mapped(hctx))
  				blk_mq_tag_idle(hctx);
  		}
0d2602ca3   Jens Axboe   blk-mq: improve s...
658
  	}
287922eb0   Christoph Hellwig   block: defer time...
659
  	blk_queue_exit(q);
320ae51fe   Jens Axboe   blk-mq: new multi...
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
  }
  
  /*
   * Reverse check our software queue for entries that we could potentially
   * merge with. Currently includes a hand-wavy stop count of 8, to not spend
   * too much time checking for merges.
   */
  static bool blk_mq_attempt_merge(struct request_queue *q,
  				 struct blk_mq_ctx *ctx, struct bio *bio)
  {
  	struct request *rq;
  	int checked = 8;
  
  	list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
  		int el_ret;
  
  		if (!checked--)
  			break;
  
  		if (!blk_rq_merge_ok(rq, bio))
  			continue;
  
  		el_ret = blk_try_merge(rq, bio);
  		if (el_ret == ELEVATOR_BACK_MERGE) {
  			if (bio_attempt_back_merge(q, rq, bio)) {
  				ctx->rq_merged++;
  				return true;
  			}
  			break;
  		} else if (el_ret == ELEVATOR_FRONT_MERGE) {
  			if (bio_attempt_front_merge(q, rq, bio)) {
  				ctx->rq_merged++;
  				return true;
  			}
  			break;
  		}
  	}
  
  	return false;
  }
88459642c   Omar Sandoval   blk-mq: abstract ...
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
  struct flush_busy_ctx_data {
  	struct blk_mq_hw_ctx *hctx;
  	struct list_head *list;
  };
  
  static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
  {
  	struct flush_busy_ctx_data *flush_data = data;
  	struct blk_mq_hw_ctx *hctx = flush_data->hctx;
  	struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
  
  	sbitmap_clear_bit(sb, bitnr);
  	spin_lock(&ctx->lock);
  	list_splice_tail_init(&ctx->rq_list, flush_data->list);
  	spin_unlock(&ctx->lock);
  	return true;
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
717
  /*
1429d7c94   Jens Axboe   blk-mq: switch ct...
718
719
720
721
722
   * Process software queues that have been marked busy, splicing them
   * to the for-dispatch
   */
  static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
  {
88459642c   Omar Sandoval   blk-mq: abstract ...
723
724
725
726
  	struct flush_busy_ctx_data data = {
  		.hctx = hctx,
  		.list = list,
  	};
1429d7c94   Jens Axboe   blk-mq: switch ct...
727

88459642c   Omar Sandoval   blk-mq: abstract ...
728
  	sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data);
1429d7c94   Jens Axboe   blk-mq: switch ct...
729
  }
1429d7c94   Jens Axboe   blk-mq: switch ct...
730

703fd1c0f   Jens Axboe   blk-mq: account h...
731
732
733
734
  static inline unsigned int queued_to_index(unsigned int queued)
  {
  	if (!queued)
  		return 0;
1429d7c94   Jens Axboe   blk-mq: switch ct...
735

703fd1c0f   Jens Axboe   blk-mq: account h...
736
  	return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
1429d7c94   Jens Axboe   blk-mq: switch ct...
737
738
739
  }
  
  /*
320ae51fe   Jens Axboe   blk-mq: new multi...
740
741
742
743
744
745
746
747
   * Run this hardware queue, pulling any software queues mapped to it in.
   * Note that this function currently has various problems around ordering
   * of IO. In particular, we'd like FIFO behaviour on handling existing
   * items on the hctx->dispatch list. Ignore that for now.
   */
  static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
  {
  	struct request_queue *q = hctx->queue;
320ae51fe   Jens Axboe   blk-mq: new multi...
748
749
  	struct request *rq;
  	LIST_HEAD(rq_list);
74c450521   Jens Axboe   blk-mq: add a 'li...
750
751
  	LIST_HEAD(driver_list);
  	struct list_head *dptr;
1429d7c94   Jens Axboe   blk-mq: switch ct...
752
  	int queued;
320ae51fe   Jens Axboe   blk-mq: new multi...
753

5d12f905c   Jens Axboe   blk-mq: fix wrong...
754
  	if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
320ae51fe   Jens Axboe   blk-mq: new multi...
755
  		return;
0e87e58bf   Jens Axboe   blk-mq: improve w...
756
757
  	WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
  		cpu_online(hctx->next_cpu));
320ae51fe   Jens Axboe   blk-mq: new multi...
758
759
760
761
762
  	hctx->run++;
  
  	/*
  	 * Touch any software queue that has pending entries.
  	 */
1429d7c94   Jens Axboe   blk-mq: switch ct...
763
  	flush_busy_ctxs(hctx, &rq_list);
320ae51fe   Jens Axboe   blk-mq: new multi...
764
765
766
767
768
769
770
771
772
773
774
775
776
  
  	/*
  	 * If we have previous entries on our dispatch list, grab them
  	 * and stuff them at the front for more fair dispatch.
  	 */
  	if (!list_empty_careful(&hctx->dispatch)) {
  		spin_lock(&hctx->lock);
  		if (!list_empty(&hctx->dispatch))
  			list_splice_init(&hctx->dispatch, &rq_list);
  		spin_unlock(&hctx->lock);
  	}
  
  	/*
74c450521   Jens Axboe   blk-mq: add a 'li...
777
778
779
780
781
782
  	 * Start off with dptr being NULL, so we start the first request
  	 * immediately, even if we have more pending.
  	 */
  	dptr = NULL;
  
  	/*
320ae51fe   Jens Axboe   blk-mq: new multi...
783
784
  	 * Now process all the entries, sending them to the driver.
  	 */
1429d7c94   Jens Axboe   blk-mq: switch ct...
785
  	queued = 0;
320ae51fe   Jens Axboe   blk-mq: new multi...
786
  	while (!list_empty(&rq_list)) {
74c450521   Jens Axboe   blk-mq: add a 'li...
787
  		struct blk_mq_queue_data bd;
320ae51fe   Jens Axboe   blk-mq: new multi...
788
789
790
791
  		int ret;
  
  		rq = list_first_entry(&rq_list, struct request, queuelist);
  		list_del_init(&rq->queuelist);
320ae51fe   Jens Axboe   blk-mq: new multi...
792

74c450521   Jens Axboe   blk-mq: add a 'li...
793
794
795
796
797
  		bd.rq = rq;
  		bd.list = dptr;
  		bd.last = list_empty(&rq_list);
  
  		ret = q->mq_ops->queue_rq(hctx, &bd);
320ae51fe   Jens Axboe   blk-mq: new multi...
798
799
800
  		switch (ret) {
  		case BLK_MQ_RQ_QUEUE_OK:
  			queued++;
52b9c330c   Omar Sandoval   blk-mq: actually ...
801
  			break;
320ae51fe   Jens Axboe   blk-mq: new multi...
802
  		case BLK_MQ_RQ_QUEUE_BUSY:
320ae51fe   Jens Axboe   blk-mq: new multi...
803
  			list_add(&rq->queuelist, &rq_list);
ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
804
  			__blk_mq_requeue_request(rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
805
806
807
808
  			break;
  		default:
  			pr_err("blk-mq: bad return on queue: %d
  ", ret);
320ae51fe   Jens Axboe   blk-mq: new multi...
809
  		case BLK_MQ_RQ_QUEUE_ERROR:
1e93b8c27   Christoph Hellwig   blk-mq: dont assu...
810
  			rq->errors = -EIO;
c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
811
  			blk_mq_end_request(rq, rq->errors);
320ae51fe   Jens Axboe   blk-mq: new multi...
812
813
814
815
816
  			break;
  		}
  
  		if (ret == BLK_MQ_RQ_QUEUE_BUSY)
  			break;
74c450521   Jens Axboe   blk-mq: add a 'li...
817
818
819
820
821
822
823
  
  		/*
  		 * We've done the first request. If we have more than 1
  		 * left in the list, set dptr to defer issue.
  		 */
  		if (!dptr && rq_list.next != rq_list.prev)
  			dptr = &driver_list;
320ae51fe   Jens Axboe   blk-mq: new multi...
824
  	}
703fd1c0f   Jens Axboe   blk-mq: account h...
825
  	hctx->dispatched[queued_to_index(queued)]++;
320ae51fe   Jens Axboe   blk-mq: new multi...
826
827
828
829
830
831
832
833
834
  
  	/*
  	 * Any items that need requeuing? Stuff them into hctx->dispatch,
  	 * that is where we will continue on next queue run.
  	 */
  	if (!list_empty(&rq_list)) {
  		spin_lock(&hctx->lock);
  		list_splice(&rq_list, &hctx->dispatch);
  		spin_unlock(&hctx->lock);
9ba52e581   Shaohua Li   blk-mq: don't los...
835
836
837
838
839
840
841
842
843
844
  		/*
  		 * the queue is expected stopped with BLK_MQ_RQ_QUEUE_BUSY, but
  		 * it's possible the queue is stopped and restarted again
  		 * before this. Queue restart will dispatch requests. And since
  		 * requests in rq_list aren't added into hctx->dispatch yet,
  		 * the requests in rq_list might get lost.
  		 *
  		 * blk_mq_run_hw_queue() already checks the STOPPED bit
  		 **/
  		blk_mq_run_hw_queue(hctx, true);
320ae51fe   Jens Axboe   blk-mq: new multi...
845
846
  	}
  }
506e931f9   Jens Axboe   blk-mq: add basic...
847
848
849
850
851
852
853
854
  /*
   * It'd be great if the workqueue API had a way to pass
   * in a mask and had some smarts for more clever placement.
   * For now we just round-robin here, switching for every
   * BLK_MQ_CPU_WORK_BATCH queued items.
   */
  static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
  {
b657d7e63   Christoph Hellwig   blk-mq: handle th...
855
856
  	if (hctx->queue->nr_hw_queues == 1)
  		return WORK_CPU_UNBOUND;
506e931f9   Jens Axboe   blk-mq: add basic...
857
858
  
  	if (--hctx->next_cpu_batch <= 0) {
b657d7e63   Christoph Hellwig   blk-mq: handle th...
859
  		int cpu = hctx->next_cpu, next_cpu;
506e931f9   Jens Axboe   blk-mq: add basic...
860
861
862
863
864
865
866
  
  		next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask);
  		if (next_cpu >= nr_cpu_ids)
  			next_cpu = cpumask_first(hctx->cpumask);
  
  		hctx->next_cpu = next_cpu;
  		hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
b657d7e63   Christoph Hellwig   blk-mq: handle th...
867
868
  
  		return cpu;
506e931f9   Jens Axboe   blk-mq: add basic...
869
  	}
b657d7e63   Christoph Hellwig   blk-mq: handle th...
870
  	return hctx->next_cpu;
506e931f9   Jens Axboe   blk-mq: add basic...
871
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
872
873
  void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
  {
19c66e59c   Ming Lei   blk-mq: prevent u...
874
875
  	if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state) ||
  	    !blk_mq_hw_queue_mapped(hctx)))
320ae51fe   Jens Axboe   blk-mq: new multi...
876
  		return;
1b792f2f9   Jens Axboe   blk-mq: add flag ...
877
  	if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
2a90d4aae   Paolo Bonzini   blk-mq: use get_c...
878
879
  		int cpu = get_cpu();
  		if (cpumask_test_cpu(cpu, hctx->cpumask)) {
398205b83   Paolo Bonzini   blk_mq: call pree...
880
  			__blk_mq_run_hw_queue(hctx);
2a90d4aae   Paolo Bonzini   blk-mq: use get_c...
881
  			put_cpu();
398205b83   Paolo Bonzini   blk_mq: call pree...
882
883
  			return;
  		}
e4043dcf3   Jens Axboe   blk-mq: ensure th...
884

2a90d4aae   Paolo Bonzini   blk-mq: use get_c...
885
  		put_cpu();
e4043dcf3   Jens Axboe   blk-mq: ensure th...
886
  	}
398205b83   Paolo Bonzini   blk_mq: call pree...
887

27489a3c8   Jens Axboe   blk-mq: turn hctx...
888
  	kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work);
320ae51fe   Jens Axboe   blk-mq: new multi...
889
  }
b94ec2964   Mike Snitzer   blk-mq: export bl...
890
  void blk_mq_run_hw_queues(struct request_queue *q, bool async)
320ae51fe   Jens Axboe   blk-mq: new multi...
891
892
893
894
895
896
897
  {
  	struct blk_mq_hw_ctx *hctx;
  	int i;
  
  	queue_for_each_hw_ctx(q, hctx, i) {
  		if ((!blk_mq_hctx_has_pending(hctx) &&
  		    list_empty_careful(&hctx->dispatch)) ||
5d12f905c   Jens Axboe   blk-mq: fix wrong...
898
  		    test_bit(BLK_MQ_S_STOPPED, &hctx->state))
320ae51fe   Jens Axboe   blk-mq: new multi...
899
  			continue;
b94ec2964   Mike Snitzer   blk-mq: export bl...
900
  		blk_mq_run_hw_queue(hctx, async);
320ae51fe   Jens Axboe   blk-mq: new multi...
901
902
  	}
  }
b94ec2964   Mike Snitzer   blk-mq: export bl...
903
  EXPORT_SYMBOL(blk_mq_run_hw_queues);
320ae51fe   Jens Axboe   blk-mq: new multi...
904
905
906
  
  void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
  {
27489a3c8   Jens Axboe   blk-mq: turn hctx...
907
  	cancel_work(&hctx->run_work);
70f4db639   Christoph Hellwig   blk-mq: add blk_m...
908
  	cancel_delayed_work(&hctx->delay_work);
320ae51fe   Jens Axboe   blk-mq: new multi...
909
910
911
  	set_bit(BLK_MQ_S_STOPPED, &hctx->state);
  }
  EXPORT_SYMBOL(blk_mq_stop_hw_queue);
280d45f6c   Christoph Hellwig   blk-mq: add blk_m...
912
913
914
915
916
917
918
919
920
  void blk_mq_stop_hw_queues(struct request_queue *q)
  {
  	struct blk_mq_hw_ctx *hctx;
  	int i;
  
  	queue_for_each_hw_ctx(q, hctx, i)
  		blk_mq_stop_hw_queue(hctx);
  }
  EXPORT_SYMBOL(blk_mq_stop_hw_queues);
320ae51fe   Jens Axboe   blk-mq: new multi...
921
922
923
  void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
  {
  	clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
e4043dcf3   Jens Axboe   blk-mq: ensure th...
924

0ffbce80c   Jens Axboe   blk-mq: blk_mq_st...
925
  	blk_mq_run_hw_queue(hctx, false);
320ae51fe   Jens Axboe   blk-mq: new multi...
926
927
  }
  EXPORT_SYMBOL(blk_mq_start_hw_queue);
2f2685565   Christoph Hellwig   blk-mq: add blk_m...
928
929
930
931
932
933
934
935
936
  void blk_mq_start_hw_queues(struct request_queue *q)
  {
  	struct blk_mq_hw_ctx *hctx;
  	int i;
  
  	queue_for_each_hw_ctx(q, hctx, i)
  		blk_mq_start_hw_queue(hctx);
  }
  EXPORT_SYMBOL(blk_mq_start_hw_queues);
1b4a32585   Christoph Hellwig   blk-mq: add async...
937
  void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
320ae51fe   Jens Axboe   blk-mq: new multi...
938
939
940
941
942
943
944
945
946
  {
  	struct blk_mq_hw_ctx *hctx;
  	int i;
  
  	queue_for_each_hw_ctx(q, hctx, i) {
  		if (!test_bit(BLK_MQ_S_STOPPED, &hctx->state))
  			continue;
  
  		clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
1b4a32585   Christoph Hellwig   blk-mq: add async...
947
  		blk_mq_run_hw_queue(hctx, async);
320ae51fe   Jens Axboe   blk-mq: new multi...
948
949
950
  	}
  }
  EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
70f4db639   Christoph Hellwig   blk-mq: add blk_m...
951
  static void blk_mq_run_work_fn(struct work_struct *work)
320ae51fe   Jens Axboe   blk-mq: new multi...
952
953
  {
  	struct blk_mq_hw_ctx *hctx;
27489a3c8   Jens Axboe   blk-mq: turn hctx...
954
  	hctx = container_of(work, struct blk_mq_hw_ctx, run_work);
e4043dcf3   Jens Axboe   blk-mq: ensure th...
955

320ae51fe   Jens Axboe   blk-mq: new multi...
956
957
  	__blk_mq_run_hw_queue(hctx);
  }
70f4db639   Christoph Hellwig   blk-mq: add blk_m...
958
959
960
961
962
963
964
965
966
967
968
969
  static void blk_mq_delay_work_fn(struct work_struct *work)
  {
  	struct blk_mq_hw_ctx *hctx;
  
  	hctx = container_of(work, struct blk_mq_hw_ctx, delay_work.work);
  
  	if (test_and_clear_bit(BLK_MQ_S_STOPPED, &hctx->state))
  		__blk_mq_run_hw_queue(hctx);
  }
  
  void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
  {
19c66e59c   Ming Lei   blk-mq: prevent u...
970
971
  	if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
  		return;
70f4db639   Christoph Hellwig   blk-mq: add blk_m...
972

b657d7e63   Christoph Hellwig   blk-mq: handle th...
973
974
  	kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
  			&hctx->delay_work, msecs_to_jiffies(msecs));
70f4db639   Christoph Hellwig   blk-mq: add blk_m...
975
976
  }
  EXPORT_SYMBOL(blk_mq_delay_queue);
cfd0c552a   Ming Lei   blk-mq: mark ctx ...
977
  static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
cfd0c552a   Ming Lei   blk-mq: mark ctx ...
978
979
  					    struct request *rq,
  					    bool at_head)
320ae51fe   Jens Axboe   blk-mq: new multi...
980
  {
e57690fe0   Jens Axboe   blk-mq: don't ove...
981
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
01b983c9f   Jens Axboe   blk-mq: add blktr...
982
  	trace_block_rq_insert(hctx->queue, rq);
72a0a36e2   Christoph Hellwig   blk-mq: support a...
983
984
985
986
  	if (at_head)
  		list_add(&rq->queuelist, &ctx->rq_list);
  	else
  		list_add_tail(&rq->queuelist, &ctx->rq_list);
cfd0c552a   Ming Lei   blk-mq: mark ctx ...
987
  }
4bb659b15   Jens Axboe   blk-mq: implement...
988

cfd0c552a   Ming Lei   blk-mq: mark ctx ...
989
990
991
992
  static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
  				    struct request *rq, bool at_head)
  {
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
e57690fe0   Jens Axboe   blk-mq: don't ove...
993
  	__blk_mq_insert_req_list(hctx, rq, at_head);
320ae51fe   Jens Axboe   blk-mq: new multi...
994
  	blk_mq_hctx_mark_pending(hctx, ctx);
320ae51fe   Jens Axboe   blk-mq: new multi...
995
  }
eeabc850b   Christoph Hellwig   blk-mq: merge blk...
996
  void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
e57690fe0   Jens Axboe   blk-mq: don't ove...
997
  			   bool async)
320ae51fe   Jens Axboe   blk-mq: new multi...
998
  {
e57690fe0   Jens Axboe   blk-mq: don't ove...
999
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
eeabc850b   Christoph Hellwig   blk-mq: merge blk...
1000
  	struct request_queue *q = rq->q;
7d7e0f90b   Christoph Hellwig   blk-mq: remove ->...
1001
  	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
320ae51fe   Jens Axboe   blk-mq: new multi...
1002

a57a178a4   Christoph Hellwig   blk-mq: avoid inf...
1003
1004
1005
  	spin_lock(&ctx->lock);
  	__blk_mq_insert_request(hctx, rq, at_head);
  	spin_unlock(&ctx->lock);
320ae51fe   Jens Axboe   blk-mq: new multi...
1006

320ae51fe   Jens Axboe   blk-mq: new multi...
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
  	if (run_queue)
  		blk_mq_run_hw_queue(hctx, async);
  }
  
  static void blk_mq_insert_requests(struct request_queue *q,
  				     struct blk_mq_ctx *ctx,
  				     struct list_head *list,
  				     int depth,
  				     bool from_schedule)
  
  {
7d7e0f90b   Christoph Hellwig   blk-mq: remove ->...
1018
  	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
320ae51fe   Jens Axboe   blk-mq: new multi...
1019
1020
  
  	trace_block_unplug(q, depth, !from_schedule);
320ae51fe   Jens Axboe   blk-mq: new multi...
1021
1022
1023
1024
1025
1026
1027
1028
1029
  	/*
  	 * preemption doesn't flush plug list, so it's possible ctx->cpu is
  	 * offline now
  	 */
  	spin_lock(&ctx->lock);
  	while (!list_empty(list)) {
  		struct request *rq;
  
  		rq = list_first_entry(list, struct request, queuelist);
e57690fe0   Jens Axboe   blk-mq: don't ove...
1030
  		BUG_ON(rq->mq_ctx != ctx);
320ae51fe   Jens Axboe   blk-mq: new multi...
1031
  		list_del_init(&rq->queuelist);
e57690fe0   Jens Axboe   blk-mq: don't ove...
1032
  		__blk_mq_insert_req_list(hctx, rq, false);
320ae51fe   Jens Axboe   blk-mq: new multi...
1033
  	}
cfd0c552a   Ming Lei   blk-mq: mark ctx ...
1034
  	blk_mq_hctx_mark_pending(hctx, ctx);
320ae51fe   Jens Axboe   blk-mq: new multi...
1035
  	spin_unlock(&ctx->lock);
320ae51fe   Jens Axboe   blk-mq: new multi...
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
  	blk_mq_run_hw_queue(hctx, from_schedule);
  }
  
  static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b)
  {
  	struct request *rqa = container_of(a, struct request, queuelist);
  	struct request *rqb = container_of(b, struct request, queuelist);
  
  	return !(rqa->mq_ctx < rqb->mq_ctx ||
  		 (rqa->mq_ctx == rqb->mq_ctx &&
  		  blk_rq_pos(rqa) < blk_rq_pos(rqb)));
  }
  
  void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  {
  	struct blk_mq_ctx *this_ctx;
  	struct request_queue *this_q;
  	struct request *rq;
  	LIST_HEAD(list);
  	LIST_HEAD(ctx_list);
  	unsigned int depth;
  
  	list_splice_init(&plug->mq_list, &list);
  
  	list_sort(NULL, &list, plug_ctx_cmp);
  
  	this_q = NULL;
  	this_ctx = NULL;
  	depth = 0;
  
  	while (!list_empty(&list)) {
  		rq = list_entry_rq(list.next);
  		list_del_init(&rq->queuelist);
  		BUG_ON(!rq->q);
  		if (rq->mq_ctx != this_ctx) {
  			if (this_ctx) {
  				blk_mq_insert_requests(this_q, this_ctx,
  							&ctx_list, depth,
  							from_schedule);
  			}
  
  			this_ctx = rq->mq_ctx;
  			this_q = rq->q;
  			depth = 0;
  		}
  
  		depth++;
  		list_add_tail(&rq->queuelist, &ctx_list);
  	}
  
  	/*
  	 * If 'this_ctx' is set, we know we have entries to complete
  	 * on 'ctx_list'. Do those.
  	 */
  	if (this_ctx) {
  		blk_mq_insert_requests(this_q, this_ctx, &ctx_list, depth,
  				       from_schedule);
  	}
  }
  
  static void blk_mq_bio_to_request(struct request *rq, struct bio *bio)
  {
  	init_request_from_bio(rq, bio);
4b570521b   Jens Axboe   blk-mq: request i...
1099

a21f2a3ec   Michael Callahan   block: Minor blk_...
1100
  	blk_account_io_start(rq, 1);
320ae51fe   Jens Axboe   blk-mq: new multi...
1101
  }
274a5843f   Jens Axboe   blk-mq: don't all...
1102
1103
1104
1105
1106
  static inline bool hctx_allow_merges(struct blk_mq_hw_ctx *hctx)
  {
  	return (hctx->flags & BLK_MQ_F_SHOULD_MERGE) &&
  		!blk_queue_nomerges(hctx->queue);
  }
07068d5b8   Jens Axboe   blk-mq: split mak...
1107
1108
1109
  static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
  					 struct blk_mq_ctx *ctx,
  					 struct request *rq, struct bio *bio)
320ae51fe   Jens Axboe   blk-mq: new multi...
1110
  {
e18378a60   Ming Lei   blk-mq: check bio...
1111
  	if (!hctx_allow_merges(hctx) || !bio_mergeable(bio)) {
07068d5b8   Jens Axboe   blk-mq: split mak...
1112
1113
1114
1115
1116
1117
1118
  		blk_mq_bio_to_request(rq, bio);
  		spin_lock(&ctx->lock);
  insert_rq:
  		__blk_mq_insert_request(hctx, rq, false);
  		spin_unlock(&ctx->lock);
  		return false;
  	} else {
274a5843f   Jens Axboe   blk-mq: don't all...
1119
  		struct request_queue *q = hctx->queue;
07068d5b8   Jens Axboe   blk-mq: split mak...
1120
1121
1122
1123
1124
  		spin_lock(&ctx->lock);
  		if (!blk_mq_attempt_merge(q, ctx, bio)) {
  			blk_mq_bio_to_request(rq, bio);
  			goto insert_rq;
  		}
320ae51fe   Jens Axboe   blk-mq: new multi...
1125

07068d5b8   Jens Axboe   blk-mq: split mak...
1126
1127
1128
  		spin_unlock(&ctx->lock);
  		__blk_mq_free_request(hctx, ctx, rq);
  		return true;
14ec77f35   Nicholas Bellinger   blk-mq: Add bio_i...
1129
  	}
07068d5b8   Jens Axboe   blk-mq: split mak...
1130
  }
14ec77f35   Nicholas Bellinger   blk-mq: Add bio_i...
1131

07068d5b8   Jens Axboe   blk-mq: split mak...
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
  struct blk_map_ctx {
  	struct blk_mq_hw_ctx *hctx;
  	struct blk_mq_ctx *ctx;
  };
  
  static struct request *blk_mq_map_request(struct request_queue *q,
  					  struct bio *bio,
  					  struct blk_map_ctx *data)
  {
  	struct blk_mq_hw_ctx *hctx;
  	struct blk_mq_ctx *ctx;
  	struct request *rq;
cc6e3b109   Mike Christie   block: prepare mq...
1144
1145
  	int op = bio_data_dir(bio);
  	int op_flags = 0;
cb96a42cc   Ming Lei   blk-mq: fix sched...
1146
  	struct blk_mq_alloc_data alloc_data;
320ae51fe   Jens Axboe   blk-mq: new multi...
1147

3ef28e83a   Dan Williams   block: generic re...
1148
  	blk_queue_enter_live(q);
320ae51fe   Jens Axboe   blk-mq: new multi...
1149
  	ctx = blk_mq_get_ctx(q);
7d7e0f90b   Christoph Hellwig   blk-mq: remove ->...
1150
  	hctx = blk_mq_map_queue(q, ctx->cpu);
320ae51fe   Jens Axboe   blk-mq: new multi...
1151

1eff9d322   Jens Axboe   block: rename bio...
1152
  	if (rw_is_sync(bio_op(bio), bio->bi_opf))
cc6e3b109   Mike Christie   block: prepare mq...
1153
  		op_flags |= REQ_SYNC;
07068d5b8   Jens Axboe   blk-mq: split mak...
1154

cc6e3b109   Mike Christie   block: prepare mq...
1155
  	trace_block_getrq(q, bio, op);
63581af3f   Christoph Hellwig   blk-mq: remove no...
1156
  	blk_mq_set_alloc_data(&alloc_data, q, 0, ctx, hctx);
cc6e3b109   Mike Christie   block: prepare mq...
1157
  	rq = __blk_mq_alloc_request(&alloc_data, op, op_flags);
320ae51fe   Jens Axboe   blk-mq: new multi...
1158
1159
  
  	hctx->queued++;
07068d5b8   Jens Axboe   blk-mq: split mak...
1160
1161
1162
1163
  	data->hctx = hctx;
  	data->ctx = ctx;
  	return rq;
  }
7b371636f   Jens Axboe   blk-mq: return ta...
1164
  static int blk_mq_direct_issue_request(struct request *rq, blk_qc_t *cookie)
f984df1f0   Shaohua Li   blk-mq: do limite...
1165
1166
1167
  {
  	int ret;
  	struct request_queue *q = rq->q;
7d7e0f90b   Christoph Hellwig   blk-mq: remove ->...
1168
  	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
f984df1f0   Shaohua Li   blk-mq: do limite...
1169
1170
1171
1172
1173
  	struct blk_mq_queue_data bd = {
  		.rq = rq,
  		.list = NULL,
  		.last = 1
  	};
7b371636f   Jens Axboe   blk-mq: return ta...
1174
  	blk_qc_t new_cookie = blk_tag_to_qc_t(rq->tag, hctx->queue_num);
f984df1f0   Shaohua Li   blk-mq: do limite...
1175
1176
1177
1178
1179
1180
1181
  
  	/*
  	 * For OK queue, we are done. For error, kill it. Any other
  	 * error (busy), just add it to our list as we previously
  	 * would have done
  	 */
  	ret = q->mq_ops->queue_rq(hctx, &bd);
7b371636f   Jens Axboe   blk-mq: return ta...
1182
1183
  	if (ret == BLK_MQ_RQ_QUEUE_OK) {
  		*cookie = new_cookie;
f984df1f0   Shaohua Li   blk-mq: do limite...
1184
  		return 0;
7b371636f   Jens Axboe   blk-mq: return ta...
1185
  	}
f984df1f0   Shaohua Li   blk-mq: do limite...
1186

7b371636f   Jens Axboe   blk-mq: return ta...
1187
1188
1189
1190
1191
1192
1193
  	__blk_mq_requeue_request(rq);
  
  	if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
  		*cookie = BLK_QC_T_NONE;
  		rq->errors = -EIO;
  		blk_mq_end_request(rq, rq->errors);
  		return 0;
f984df1f0   Shaohua Li   blk-mq: do limite...
1194
  	}
7b371636f   Jens Axboe   blk-mq: return ta...
1195
1196
  
  	return -1;
f984df1f0   Shaohua Li   blk-mq: do limite...
1197
  }
07068d5b8   Jens Axboe   blk-mq: split mak...
1198
1199
1200
1201
1202
  /*
   * Multiple hardware queue variant. This will not use per-process plugs,
   * but will attempt to bypass the hctx queueing if we can go straight to
   * hardware for SYNC IO.
   */
dece16353   Jens Axboe   block: change ->m...
1203
  static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
07068d5b8   Jens Axboe   blk-mq: split mak...
1204
  {
1eff9d322   Jens Axboe   block: rename bio...
1205
1206
  	const int is_sync = rw_is_sync(bio_op(bio), bio->bi_opf);
  	const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
07068d5b8   Jens Axboe   blk-mq: split mak...
1207
1208
  	struct blk_map_ctx data;
  	struct request *rq;
f984df1f0   Shaohua Li   blk-mq: do limite...
1209
1210
  	unsigned int request_count = 0;
  	struct blk_plug *plug;
5b3f341f0   Shaohua Li   blk-mq: make plug...
1211
  	struct request *same_queue_rq = NULL;
7b371636f   Jens Axboe   blk-mq: return ta...
1212
  	blk_qc_t cookie;
07068d5b8   Jens Axboe   blk-mq: split mak...
1213
1214
1215
1216
  
  	blk_queue_bounce(q, &bio);
  
  	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
4246a0b63   Christoph Hellwig   block: add a bi_e...
1217
  		bio_io_error(bio);
dece16353   Jens Axboe   block: change ->m...
1218
  		return BLK_QC_T_NONE;
07068d5b8   Jens Axboe   blk-mq: split mak...
1219
  	}
54efd50bf   Kent Overstreet   block: make gener...
1220
  	blk_queue_split(q, &bio, q->bio_split);
87c279e61   Omar Sandoval   blk-mq: really fi...
1221
1222
1223
  	if (!is_flush_fua && !blk_queue_nomerges(q) &&
  	    blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
  		return BLK_QC_T_NONE;
f984df1f0   Shaohua Li   blk-mq: do limite...
1224

07068d5b8   Jens Axboe   blk-mq: split mak...
1225
1226
  	rq = blk_mq_map_request(q, bio, &data);
  	if (unlikely(!rq))
dece16353   Jens Axboe   block: change ->m...
1227
  		return BLK_QC_T_NONE;
07068d5b8   Jens Axboe   blk-mq: split mak...
1228

7b371636f   Jens Axboe   blk-mq: return ta...
1229
  	cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
07068d5b8   Jens Axboe   blk-mq: split mak...
1230
1231
1232
1233
1234
1235
  
  	if (unlikely(is_flush_fua)) {
  		blk_mq_bio_to_request(rq, bio);
  		blk_insert_flush(rq);
  		goto run_queue;
  	}
f984df1f0   Shaohua Li   blk-mq: do limite...
1236
  	plug = current->plug;
e167dfb53   Jens Axboe   blk-mq: add BLK_M...
1237
1238
1239
1240
1241
  	/*
  	 * If the driver supports defer issued based on 'last', then
  	 * queue it up like normal since we can potentially save some
  	 * CPU this way.
  	 */
f984df1f0   Shaohua Li   blk-mq: do limite...
1242
1243
1244
  	if (((plug && !blk_queue_nomerges(q)) || is_sync) &&
  	    !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) {
  		struct request *old_rq = NULL;
07068d5b8   Jens Axboe   blk-mq: split mak...
1245
1246
  
  		blk_mq_bio_to_request(rq, bio);
07068d5b8   Jens Axboe   blk-mq: split mak...
1247
1248
  
  		/*
b094f89ca   Jens Axboe   blk-mq: fix calli...
1249
  		 * We do limited pluging. If the bio can be merged, do that.
f984df1f0   Shaohua Li   blk-mq: do limite...
1250
1251
  		 * Otherwise the existing request in the plug list will be
  		 * issued. So the plug list will have one request at most
07068d5b8   Jens Axboe   blk-mq: split mak...
1252
  		 */
f984df1f0   Shaohua Li   blk-mq: do limite...
1253
  		if (plug) {
5b3f341f0   Shaohua Li   blk-mq: make plug...
1254
1255
  			/*
  			 * The plug list might get flushed before this. If that
b094f89ca   Jens Axboe   blk-mq: fix calli...
1256
1257
1258
  			 * happens, same_queue_rq is invalid and plug list is
  			 * empty
  			 */
5b3f341f0   Shaohua Li   blk-mq: make plug...
1259
1260
  			if (same_queue_rq && !list_empty(&plug->mq_list)) {
  				old_rq = same_queue_rq;
f984df1f0   Shaohua Li   blk-mq: do limite...
1261
  				list_del_init(&old_rq->queuelist);
07068d5b8   Jens Axboe   blk-mq: split mak...
1262
  			}
f984df1f0   Shaohua Li   blk-mq: do limite...
1263
1264
1265
1266
1267
  			list_add_tail(&rq->queuelist, &plug->mq_list);
  		} else /* is_sync */
  			old_rq = rq;
  		blk_mq_put_ctx(data.ctx);
  		if (!old_rq)
7b371636f   Jens Axboe   blk-mq: return ta...
1268
1269
1270
  			goto done;
  		if (!blk_mq_direct_issue_request(old_rq, &cookie))
  			goto done;
f984df1f0   Shaohua Li   blk-mq: do limite...
1271
  		blk_mq_insert_request(old_rq, false, true, true);
7b371636f   Jens Axboe   blk-mq: return ta...
1272
  		goto done;
07068d5b8   Jens Axboe   blk-mq: split mak...
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
  	}
  
  	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
  		/*
  		 * For a SYNC request, send it to the hardware immediately. For
  		 * an ASYNC request, just ensure that we run it later on. The
  		 * latter allows for merging opportunities and more efficient
  		 * dispatching.
  		 */
  run_queue:
  		blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
  	}
07068d5b8   Jens Axboe   blk-mq: split mak...
1285
  	blk_mq_put_ctx(data.ctx);
7b371636f   Jens Axboe   blk-mq: return ta...
1286
1287
  done:
  	return cookie;
07068d5b8   Jens Axboe   blk-mq: split mak...
1288
1289
1290
1291
1292
1293
  }
  
  /*
   * Single hardware queue variant. This will attempt to use any per-process
   * plug for merging and IO deferral.
   */
dece16353   Jens Axboe   block: change ->m...
1294
  static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
07068d5b8   Jens Axboe   blk-mq: split mak...
1295
  {
1eff9d322   Jens Axboe   block: rename bio...
1296
1297
  	const int is_sync = rw_is_sync(bio_op(bio), bio->bi_opf);
  	const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
e6c4438ba   Jeff Moyer   blk-mq: fix plugg...
1298
1299
  	struct blk_plug *plug;
  	unsigned int request_count = 0;
07068d5b8   Jens Axboe   blk-mq: split mak...
1300
1301
  	struct blk_map_ctx data;
  	struct request *rq;
7b371636f   Jens Axboe   blk-mq: return ta...
1302
  	blk_qc_t cookie;
07068d5b8   Jens Axboe   blk-mq: split mak...
1303

07068d5b8   Jens Axboe   blk-mq: split mak...
1304
1305
1306
  	blk_queue_bounce(q, &bio);
  
  	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
4246a0b63   Christoph Hellwig   block: add a bi_e...
1307
  		bio_io_error(bio);
dece16353   Jens Axboe   block: change ->m...
1308
  		return BLK_QC_T_NONE;
07068d5b8   Jens Axboe   blk-mq: split mak...
1309
  	}
54efd50bf   Kent Overstreet   block: make gener...
1310
  	blk_queue_split(q, &bio, q->bio_split);
87c279e61   Omar Sandoval   blk-mq: really fi...
1311
1312
1313
1314
1315
  	if (!is_flush_fua && !blk_queue_nomerges(q)) {
  		if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
  			return BLK_QC_T_NONE;
  	} else
  		request_count = blk_plug_queued_count(q);
07068d5b8   Jens Axboe   blk-mq: split mak...
1316
1317
  
  	rq = blk_mq_map_request(q, bio, &data);
ff87bcec1   Jens Axboe   blk-mq: handle NU...
1318
  	if (unlikely(!rq))
dece16353   Jens Axboe   block: change ->m...
1319
  		return BLK_QC_T_NONE;
320ae51fe   Jens Axboe   blk-mq: new multi...
1320

7b371636f   Jens Axboe   blk-mq: return ta...
1321
  	cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
320ae51fe   Jens Axboe   blk-mq: new multi...
1322
1323
1324
  
  	if (unlikely(is_flush_fua)) {
  		blk_mq_bio_to_request(rq, bio);
320ae51fe   Jens Axboe   blk-mq: new multi...
1325
1326
1327
1328
1329
1330
1331
1332
1333
  		blk_insert_flush(rq);
  		goto run_queue;
  	}
  
  	/*
  	 * A task plug currently exists. Since this is completely lockless,
  	 * utilize that to temporarily store requests until the task is
  	 * either done or scheduled away.
  	 */
e6c4438ba   Jeff Moyer   blk-mq: fix plugg...
1334
1335
1336
  	plug = current->plug;
  	if (plug) {
  		blk_mq_bio_to_request(rq, bio);
676d06077   Ming Lei   blk-mq: fix for t...
1337
  		if (!request_count)
e6c4438ba   Jeff Moyer   blk-mq: fix plugg...
1338
  			trace_block_plug(q);
b094f89ca   Jens Axboe   blk-mq: fix calli...
1339
1340
1341
1342
  
  		blk_mq_put_ctx(data.ctx);
  
  		if (request_count >= BLK_MAX_REQUEST_COUNT) {
e6c4438ba   Jeff Moyer   blk-mq: fix plugg...
1343
1344
  			blk_flush_plug_list(plug, false);
  			trace_block_plug(q);
320ae51fe   Jens Axboe   blk-mq: new multi...
1345
  		}
b094f89ca   Jens Axboe   blk-mq: fix calli...
1346

e6c4438ba   Jeff Moyer   blk-mq: fix plugg...
1347
  		list_add_tail(&rq->queuelist, &plug->mq_list);
7b371636f   Jens Axboe   blk-mq: return ta...
1348
  		return cookie;
320ae51fe   Jens Axboe   blk-mq: new multi...
1349
  	}
07068d5b8   Jens Axboe   blk-mq: split mak...
1350
1351
1352
1353
1354
1355
1356
1357
1358
  	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
  		/*
  		 * For a SYNC request, send it to the hardware immediately. For
  		 * an ASYNC request, just ensure that we run it later on. The
  		 * latter allows for merging opportunities and more efficient
  		 * dispatching.
  		 */
  run_queue:
  		blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
320ae51fe   Jens Axboe   blk-mq: new multi...
1359
  	}
07068d5b8   Jens Axboe   blk-mq: split mak...
1360
  	blk_mq_put_ctx(data.ctx);
7b371636f   Jens Axboe   blk-mq: return ta...
1361
  	return cookie;
320ae51fe   Jens Axboe   blk-mq: new multi...
1362
  }
24d2f9030   Christoph Hellwig   blk-mq: split out...
1363
1364
  static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
  		struct blk_mq_tags *tags, unsigned int hctx_idx)
95363efde   Jens Axboe   blk-mq: allow blk...
1365
  {
e9b267d91   Christoph Hellwig   blk-mq: add ->ini...
1366
  	struct page *page;
320ae51fe   Jens Axboe   blk-mq: new multi...
1367

24d2f9030   Christoph Hellwig   blk-mq: split out...
1368
  	if (tags->rqs && set->ops->exit_request) {
e9b267d91   Christoph Hellwig   blk-mq: add ->ini...
1369
  		int i;
320ae51fe   Jens Axboe   blk-mq: new multi...
1370

24d2f9030   Christoph Hellwig   blk-mq: split out...
1371
1372
  		for (i = 0; i < tags->nr_tags; i++) {
  			if (!tags->rqs[i])
e9b267d91   Christoph Hellwig   blk-mq: add ->ini...
1373
  				continue;
24d2f9030   Christoph Hellwig   blk-mq: split out...
1374
1375
  			set->ops->exit_request(set->driver_data, tags->rqs[i],
  						hctx_idx, i);
a51644054   Jens Axboe   blk-mq: scale dep...
1376
  			tags->rqs[i] = NULL;
e9b267d91   Christoph Hellwig   blk-mq: add ->ini...
1377
  		}
320ae51fe   Jens Axboe   blk-mq: new multi...
1378
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
1379

24d2f9030   Christoph Hellwig   blk-mq: split out...
1380
1381
  	while (!list_empty(&tags->page_list)) {
  		page = list_first_entry(&tags->page_list, struct page, lru);
6753471c0   Dave Hansen   blk-mq: uses page...
1382
  		list_del_init(&page->lru);
f75782e4e   Catalin Marinas   block: kmemleak: ...
1383
1384
1385
1386
1387
  		/*
  		 * Remove kmemleak object previously allocated in
  		 * blk_mq_init_rq_map().
  		 */
  		kmemleak_free(page_address(page));
320ae51fe   Jens Axboe   blk-mq: new multi...
1388
1389
  		__free_pages(page, page->private);
  	}
24d2f9030   Christoph Hellwig   blk-mq: split out...
1390
  	kfree(tags->rqs);
320ae51fe   Jens Axboe   blk-mq: new multi...
1391

24d2f9030   Christoph Hellwig   blk-mq: split out...
1392
  	blk_mq_free_tags(tags);
320ae51fe   Jens Axboe   blk-mq: new multi...
1393
1394
1395
1396
  }
  
  static size_t order_to_size(unsigned int order)
  {
4ca085009   Ming Lei   blk-mq: user (1 <...
1397
  	return (size_t)PAGE_SIZE << order;
320ae51fe   Jens Axboe   blk-mq: new multi...
1398
  }
24d2f9030   Christoph Hellwig   blk-mq: split out...
1399
1400
  static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
  		unsigned int hctx_idx)
320ae51fe   Jens Axboe   blk-mq: new multi...
1401
  {
24d2f9030   Christoph Hellwig   blk-mq: split out...
1402
  	struct blk_mq_tags *tags;
320ae51fe   Jens Axboe   blk-mq: new multi...
1403
1404
  	unsigned int i, j, entries_per_page, max_order = 4;
  	size_t rq_size, left;
24d2f9030   Christoph Hellwig   blk-mq: split out...
1405
  	tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags,
24391c0dc   Shaohua Li   blk-mq: add tag a...
1406
1407
  				set->numa_node,
  				BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
24d2f9030   Christoph Hellwig   blk-mq: split out...
1408
1409
  	if (!tags)
  		return NULL;
320ae51fe   Jens Axboe   blk-mq: new multi...
1410

24d2f9030   Christoph Hellwig   blk-mq: split out...
1411
  	INIT_LIST_HEAD(&tags->page_list);
a51644054   Jens Axboe   blk-mq: scale dep...
1412
1413
1414
  	tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *),
  				 GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
  				 set->numa_node);
24d2f9030   Christoph Hellwig   blk-mq: split out...
1415
1416
1417
1418
  	if (!tags->rqs) {
  		blk_mq_free_tags(tags);
  		return NULL;
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
1419
1420
1421
1422
1423
  
  	/*
  	 * rq_size is the size of the request plus driver payload, rounded
  	 * to the cacheline size
  	 */
24d2f9030   Christoph Hellwig   blk-mq: split out...
1424
  	rq_size = round_up(sizeof(struct request) + set->cmd_size,
320ae51fe   Jens Axboe   blk-mq: new multi...
1425
  				cache_line_size());
24d2f9030   Christoph Hellwig   blk-mq: split out...
1426
  	left = rq_size * set->queue_depth;
320ae51fe   Jens Axboe   blk-mq: new multi...
1427

24d2f9030   Christoph Hellwig   blk-mq: split out...
1428
  	for (i = 0; i < set->queue_depth; ) {
320ae51fe   Jens Axboe   blk-mq: new multi...
1429
1430
1431
1432
  		int this_order = max_order;
  		struct page *page;
  		int to_do;
  		void *p;
b3a834b15   Bartlomiej Zolnierkiewicz   blk-mq: fix undef...
1433
  		while (this_order && left < order_to_size(this_order - 1))
320ae51fe   Jens Axboe   blk-mq: new multi...
1434
1435
1436
  			this_order--;
  
  		do {
a51644054   Jens Axboe   blk-mq: scale dep...
1437
  			page = alloc_pages_node(set->numa_node,
ac2111753   Linus Torvalds   blk-mq: initializ...
1438
  				GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
a51644054   Jens Axboe   blk-mq: scale dep...
1439
  				this_order);
320ae51fe   Jens Axboe   blk-mq: new multi...
1440
1441
1442
1443
1444
1445
1446
1447
1448
  			if (page)
  				break;
  			if (!this_order--)
  				break;
  			if (order_to_size(this_order) < rq_size)
  				break;
  		} while (1);
  
  		if (!page)
24d2f9030   Christoph Hellwig   blk-mq: split out...
1449
  			goto fail;
320ae51fe   Jens Axboe   blk-mq: new multi...
1450
1451
  
  		page->private = this_order;
24d2f9030   Christoph Hellwig   blk-mq: split out...
1452
  		list_add_tail(&page->lru, &tags->page_list);
320ae51fe   Jens Axboe   blk-mq: new multi...
1453
1454
  
  		p = page_address(page);
f75782e4e   Catalin Marinas   block: kmemleak: ...
1455
1456
1457
1458
1459
  		/*
  		 * Allow kmemleak to scan these pages as they contain pointers
  		 * to additional allocations like via ops->init_request().
  		 */
  		kmemleak_alloc(p, order_to_size(this_order), 1, GFP_KERNEL);
320ae51fe   Jens Axboe   blk-mq: new multi...
1460
  		entries_per_page = order_to_size(this_order) / rq_size;
24d2f9030   Christoph Hellwig   blk-mq: split out...
1461
  		to_do = min(entries_per_page, set->queue_depth - i);
320ae51fe   Jens Axboe   blk-mq: new multi...
1462
1463
  		left -= to_do * rq_size;
  		for (j = 0; j < to_do; j++) {
24d2f9030   Christoph Hellwig   blk-mq: split out...
1464
1465
1466
1467
  			tags->rqs[i] = p;
  			if (set->ops->init_request) {
  				if (set->ops->init_request(set->driver_data,
  						tags->rqs[i], hctx_idx, i,
a51644054   Jens Axboe   blk-mq: scale dep...
1468
1469
  						set->numa_node)) {
  					tags->rqs[i] = NULL;
24d2f9030   Christoph Hellwig   blk-mq: split out...
1470
  					goto fail;
a51644054   Jens Axboe   blk-mq: scale dep...
1471
  				}
e9b267d91   Christoph Hellwig   blk-mq: add ->ini...
1472
  			}
320ae51fe   Jens Axboe   blk-mq: new multi...
1473
1474
1475
1476
  			p += rq_size;
  			i++;
  		}
  	}
24d2f9030   Christoph Hellwig   blk-mq: split out...
1477
  	return tags;
320ae51fe   Jens Axboe   blk-mq: new multi...
1478

24d2f9030   Christoph Hellwig   blk-mq: split out...
1479
  fail:
24d2f9030   Christoph Hellwig   blk-mq: split out...
1480
1481
  	blk_mq_free_rq_map(set, tags, hctx_idx);
  	return NULL;
320ae51fe   Jens Axboe   blk-mq: new multi...
1482
  }
e57690fe0   Jens Axboe   blk-mq: don't ove...
1483
1484
1485
1486
1487
  /*
   * 'cpu' is going away. splice any existing rq_list entries from this
   * software queue to the hw queue dispatch list, and ensure that it
   * gets run.
   */
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
1488
  static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
484b4061e   Jens Axboe   blk-mq: save memo...
1489
  {
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
1490
  	struct blk_mq_hw_ctx *hctx;
484b4061e   Jens Axboe   blk-mq: save memo...
1491
1492
  	struct blk_mq_ctx *ctx;
  	LIST_HEAD(tmp);
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
1493
  	hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead);
e57690fe0   Jens Axboe   blk-mq: don't ove...
1494
  	ctx = __blk_mq_get_ctx(hctx->queue, cpu);
484b4061e   Jens Axboe   blk-mq: save memo...
1495
1496
1497
1498
1499
1500
1501
1502
1503
  
  	spin_lock(&ctx->lock);
  	if (!list_empty(&ctx->rq_list)) {
  		list_splice_init(&ctx->rq_list, &tmp);
  		blk_mq_hctx_clear_pending(hctx, ctx);
  	}
  	spin_unlock(&ctx->lock);
  
  	if (list_empty(&tmp))
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
1504
  		return 0;
484b4061e   Jens Axboe   blk-mq: save memo...
1505

e57690fe0   Jens Axboe   blk-mq: don't ove...
1506
1507
1508
  	spin_lock(&hctx->lock);
  	list_splice_tail_init(&tmp, &hctx->dispatch);
  	spin_unlock(&hctx->lock);
484b4061e   Jens Axboe   blk-mq: save memo...
1509
1510
  
  	blk_mq_run_hw_queue(hctx, true);
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
1511
  	return 0;
484b4061e   Jens Axboe   blk-mq: save memo...
1512
  }
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
1513
  static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
484b4061e   Jens Axboe   blk-mq: save memo...
1514
  {
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
1515
1516
  	cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD,
  					    &hctx->cpuhp_dead);
484b4061e   Jens Axboe   blk-mq: save memo...
1517
  }
c3b4afca7   Ming Lei   blk-mq: free hctx...
1518
  /* hctx->ctxs will be freed in queue's release handler */
08e98fc60   Ming Lei   blk-mq: handle fa...
1519
1520
1521
1522
  static void blk_mq_exit_hctx(struct request_queue *q,
  		struct blk_mq_tag_set *set,
  		struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
  {
f70ced091   Ming Lei   blk-mq: support p...
1523
  	unsigned flush_start_tag = set->queue_depth;
08e98fc60   Ming Lei   blk-mq: handle fa...
1524
  	blk_mq_tag_idle(hctx);
f70ced091   Ming Lei   blk-mq: support p...
1525
1526
1527
1528
  	if (set->ops->exit_request)
  		set->ops->exit_request(set->driver_data,
  				       hctx->fq->flush_rq, hctx_idx,
  				       flush_start_tag + hctx_idx);
08e98fc60   Ming Lei   blk-mq: handle fa...
1529
1530
  	if (set->ops->exit_hctx)
  		set->ops->exit_hctx(hctx, hctx_idx);
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
1531
  	blk_mq_remove_cpuhp(hctx);
f70ced091   Ming Lei   blk-mq: support p...
1532
  	blk_free_flush_queue(hctx->fq);
88459642c   Omar Sandoval   blk-mq: abstract ...
1533
  	sbitmap_free(&hctx->ctx_map);
08e98fc60   Ming Lei   blk-mq: handle fa...
1534
  }
624dbe475   Ming Lei   blk-mq: avoid cod...
1535
1536
1537
1538
1539
1540
1541
1542
1543
  static void blk_mq_exit_hw_queues(struct request_queue *q,
  		struct blk_mq_tag_set *set, int nr_queue)
  {
  	struct blk_mq_hw_ctx *hctx;
  	unsigned int i;
  
  	queue_for_each_hw_ctx(q, hctx, i) {
  		if (i == nr_queue)
  			break;
08e98fc60   Ming Lei   blk-mq: handle fa...
1544
  		blk_mq_exit_hctx(q, set, hctx, i);
624dbe475   Ming Lei   blk-mq: avoid cod...
1545
  	}
624dbe475   Ming Lei   blk-mq: avoid cod...
1546
1547
1548
1549
1550
1551
1552
  }
  
  static void blk_mq_free_hw_queues(struct request_queue *q,
  		struct blk_mq_tag_set *set)
  {
  	struct blk_mq_hw_ctx *hctx;
  	unsigned int i;
e09aae7ed   Ming Lei   blk-mq: release m...
1553
  	queue_for_each_hw_ctx(q, hctx, i)
624dbe475   Ming Lei   blk-mq: avoid cod...
1554
  		free_cpumask_var(hctx->cpumask);
624dbe475   Ming Lei   blk-mq: avoid cod...
1555
  }
08e98fc60   Ming Lei   blk-mq: handle fa...
1556
1557
1558
  static int blk_mq_init_hctx(struct request_queue *q,
  		struct blk_mq_tag_set *set,
  		struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
320ae51fe   Jens Axboe   blk-mq: new multi...
1559
  {
08e98fc60   Ming Lei   blk-mq: handle fa...
1560
  	int node;
f70ced091   Ming Lei   blk-mq: support p...
1561
  	unsigned flush_start_tag = set->queue_depth;
08e98fc60   Ming Lei   blk-mq: handle fa...
1562
1563
1564
1565
  
  	node = hctx->numa_node;
  	if (node == NUMA_NO_NODE)
  		node = hctx->numa_node = set->numa_node;
27489a3c8   Jens Axboe   blk-mq: turn hctx...
1566
  	INIT_WORK(&hctx->run_work, blk_mq_run_work_fn);
08e98fc60   Ming Lei   blk-mq: handle fa...
1567
1568
1569
1570
1571
  	INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn);
  	spin_lock_init(&hctx->lock);
  	INIT_LIST_HEAD(&hctx->dispatch);
  	hctx->queue = q;
  	hctx->queue_num = hctx_idx;
2404e607a   Jeff Moyer   blk-mq: avoid exc...
1572
  	hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;
08e98fc60   Ming Lei   blk-mq: handle fa...
1573

9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
1574
  	cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
08e98fc60   Ming Lei   blk-mq: handle fa...
1575
1576
  
  	hctx->tags = set->tags[hctx_idx];
320ae51fe   Jens Axboe   blk-mq: new multi...
1577
1578
  
  	/*
08e98fc60   Ming Lei   blk-mq: handle fa...
1579
1580
  	 * Allocate space for all possible cpus to avoid allocation at
  	 * runtime
320ae51fe   Jens Axboe   blk-mq: new multi...
1581
  	 */
08e98fc60   Ming Lei   blk-mq: handle fa...
1582
1583
1584
1585
  	hctx->ctxs = kmalloc_node(nr_cpu_ids * sizeof(void *),
  					GFP_KERNEL, node);
  	if (!hctx->ctxs)
  		goto unregister_cpu_notifier;
320ae51fe   Jens Axboe   blk-mq: new multi...
1586

88459642c   Omar Sandoval   blk-mq: abstract ...
1587
1588
  	if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8), GFP_KERNEL,
  			      node))
08e98fc60   Ming Lei   blk-mq: handle fa...
1589
  		goto free_ctxs;
320ae51fe   Jens Axboe   blk-mq: new multi...
1590

08e98fc60   Ming Lei   blk-mq: handle fa...
1591
  	hctx->nr_ctx = 0;
320ae51fe   Jens Axboe   blk-mq: new multi...
1592

08e98fc60   Ming Lei   blk-mq: handle fa...
1593
1594
1595
  	if (set->ops->init_hctx &&
  	    set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
  		goto free_bitmap;
320ae51fe   Jens Axboe   blk-mq: new multi...
1596

f70ced091   Ming Lei   blk-mq: support p...
1597
1598
1599
  	hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
  	if (!hctx->fq)
  		goto exit_hctx;
320ae51fe   Jens Axboe   blk-mq: new multi...
1600

f70ced091   Ming Lei   blk-mq: support p...
1601
1602
1603
1604
1605
  	if (set->ops->init_request &&
  	    set->ops->init_request(set->driver_data,
  				   hctx->fq->flush_rq, hctx_idx,
  				   flush_start_tag + hctx_idx, node))
  		goto free_fq;
320ae51fe   Jens Axboe   blk-mq: new multi...
1606

08e98fc60   Ming Lei   blk-mq: handle fa...
1607
  	return 0;
320ae51fe   Jens Axboe   blk-mq: new multi...
1608

f70ced091   Ming Lei   blk-mq: support p...
1609
1610
1611
1612
1613
   free_fq:
  	kfree(hctx->fq);
   exit_hctx:
  	if (set->ops->exit_hctx)
  		set->ops->exit_hctx(hctx, hctx_idx);
08e98fc60   Ming Lei   blk-mq: handle fa...
1614
   free_bitmap:
88459642c   Omar Sandoval   blk-mq: abstract ...
1615
  	sbitmap_free(&hctx->ctx_map);
08e98fc60   Ming Lei   blk-mq: handle fa...
1616
1617
1618
   free_ctxs:
  	kfree(hctx->ctxs);
   unregister_cpu_notifier:
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
1619
  	blk_mq_remove_cpuhp(hctx);
08e98fc60   Ming Lei   blk-mq: handle fa...
1620
1621
  	return -1;
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
1622

320ae51fe   Jens Axboe   blk-mq: new multi...
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
  static void blk_mq_init_cpu_queues(struct request_queue *q,
  				   unsigned int nr_hw_queues)
  {
  	unsigned int i;
  
  	for_each_possible_cpu(i) {
  		struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
  		struct blk_mq_hw_ctx *hctx;
  
  		memset(__ctx, 0, sizeof(*__ctx));
  		__ctx->cpu = i;
  		spin_lock_init(&__ctx->lock);
  		INIT_LIST_HEAD(&__ctx->rq_list);
  		__ctx->queue = q;
  
  		/* If the cpu isn't online, the cpu is mapped to first hctx */
320ae51fe   Jens Axboe   blk-mq: new multi...
1639
1640
  		if (!cpu_online(i))
  			continue;
7d7e0f90b   Christoph Hellwig   blk-mq: remove ->...
1641
  		hctx = blk_mq_map_queue(q, i);
e4043dcf3   Jens Axboe   blk-mq: ensure th...
1642

320ae51fe   Jens Axboe   blk-mq: new multi...
1643
1644
1645
1646
1647
  		/*
  		 * Set local node, IFF we have more than one hw queue. If
  		 * not, we remain on the home node of the device
  		 */
  		if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
bffed4571   Raghavendra K T   blk-mq: Avoid mem...
1648
  			hctx->numa_node = local_memory_node(cpu_to_node(i));
320ae51fe   Jens Axboe   blk-mq: new multi...
1649
1650
  	}
  }
5778322e6   Akinobu Mita   blk-mq: avoid ins...
1651
1652
  static void blk_mq_map_swqueue(struct request_queue *q,
  			       const struct cpumask *online_mask)
320ae51fe   Jens Axboe   blk-mq: new multi...
1653
1654
1655
1656
  {
  	unsigned int i;
  	struct blk_mq_hw_ctx *hctx;
  	struct blk_mq_ctx *ctx;
2a34c0872   Ming Lei   blk-mq: fix CPU h...
1657
  	struct blk_mq_tag_set *set = q->tag_set;
320ae51fe   Jens Axboe   blk-mq: new multi...
1658

60de074ba   Akinobu Mita   blk-mq: fix deadl...
1659
1660
1661
1662
  	/*
  	 * Avoid others reading imcomplete hctx->cpumask through sysfs
  	 */
  	mutex_lock(&q->sysfs_lock);
320ae51fe   Jens Axboe   blk-mq: new multi...
1663
  	queue_for_each_hw_ctx(q, hctx, i) {
e4043dcf3   Jens Axboe   blk-mq: ensure th...
1664
  		cpumask_clear(hctx->cpumask);
320ae51fe   Jens Axboe   blk-mq: new multi...
1665
1666
1667
1668
1669
1670
  		hctx->nr_ctx = 0;
  	}
  
  	/*
  	 * Map software to hardware queues
  	 */
897bb0c7f   Thomas Gleixner   blk-mq: Use prope...
1671
  	for_each_possible_cpu(i) {
320ae51fe   Jens Axboe   blk-mq: new multi...
1672
  		/* If the cpu isn't online, the cpu is mapped to first hctx */
5778322e6   Akinobu Mita   blk-mq: avoid ins...
1673
  		if (!cpumask_test_cpu(i, online_mask))
e4043dcf3   Jens Axboe   blk-mq: ensure th...
1674
  			continue;
897bb0c7f   Thomas Gleixner   blk-mq: Use prope...
1675
  		ctx = per_cpu_ptr(q->queue_ctx, i);
7d7e0f90b   Christoph Hellwig   blk-mq: remove ->...
1676
  		hctx = blk_mq_map_queue(q, i);
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1677

e4043dcf3   Jens Axboe   blk-mq: ensure th...
1678
  		cpumask_set_cpu(i, hctx->cpumask);
320ae51fe   Jens Axboe   blk-mq: new multi...
1679
1680
1681
  		ctx->index_hw = hctx->nr_ctx;
  		hctx->ctxs[hctx->nr_ctx++] = ctx;
  	}
506e931f9   Jens Axboe   blk-mq: add basic...
1682

60de074ba   Akinobu Mita   blk-mq: fix deadl...
1683
  	mutex_unlock(&q->sysfs_lock);
506e931f9   Jens Axboe   blk-mq: add basic...
1684
  	queue_for_each_hw_ctx(q, hctx, i) {
484b4061e   Jens Axboe   blk-mq: save memo...
1685
  		/*
a68aafa5b   Jens Axboe   blk-mq: correct a...
1686
1687
  		 * If no software queues are mapped to this hardware queue,
  		 * disable it and free the request entries.
484b4061e   Jens Axboe   blk-mq: save memo...
1688
1689
  		 */
  		if (!hctx->nr_ctx) {
484b4061e   Jens Axboe   blk-mq: save memo...
1690
1691
1692
  			if (set->tags[i]) {
  				blk_mq_free_rq_map(set, set->tags[i], i);
  				set->tags[i] = NULL;
484b4061e   Jens Axboe   blk-mq: save memo...
1693
  			}
2a34c0872   Ming Lei   blk-mq: fix CPU h...
1694
  			hctx->tags = NULL;
484b4061e   Jens Axboe   blk-mq: save memo...
1695
1696
  			continue;
  		}
2a34c0872   Ming Lei   blk-mq: fix CPU h...
1697
1698
1699
1700
1701
  		/* unmapped hw queue can be remapped after CPU topo changed */
  		if (!set->tags[i])
  			set->tags[i] = blk_mq_init_rq_map(set, i);
  		hctx->tags = set->tags[i];
  		WARN_ON(!hctx->tags);
484b4061e   Jens Axboe   blk-mq: save memo...
1702
  		/*
889fa31f0   Chong Yuan   blk-mq: reduce un...
1703
1704
1705
1706
  		 * Set the map size to the number of mapped software queues.
  		 * This is more accurate and more efficient than looping
  		 * over all possibly mapped software queues.
  		 */
88459642c   Omar Sandoval   blk-mq: abstract ...
1707
  		sbitmap_resize(&hctx->ctx_map, hctx->nr_ctx);
889fa31f0   Chong Yuan   blk-mq: reduce un...
1708
1709
  
  		/*
484b4061e   Jens Axboe   blk-mq: save memo...
1710
1711
  		 * Initialize batch roundrobin counts
  		 */
506e931f9   Jens Axboe   blk-mq: add basic...
1712
1713
1714
  		hctx->next_cpu = cpumask_first(hctx->cpumask);
  		hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
1715
  }
2404e607a   Jeff Moyer   blk-mq: avoid exc...
1716
  static void queue_set_hctx_shared(struct request_queue *q, bool shared)
0d2602ca3   Jens Axboe   blk-mq: improve s...
1717
1718
  {
  	struct blk_mq_hw_ctx *hctx;
0d2602ca3   Jens Axboe   blk-mq: improve s...
1719
  	int i;
2404e607a   Jeff Moyer   blk-mq: avoid exc...
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
  	queue_for_each_hw_ctx(q, hctx, i) {
  		if (shared)
  			hctx->flags |= BLK_MQ_F_TAG_SHARED;
  		else
  			hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
  	}
  }
  
  static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared)
  {
  	struct request_queue *q;
0d2602ca3   Jens Axboe   blk-mq: improve s...
1731
1732
1733
  
  	list_for_each_entry(q, &set->tag_list, tag_set_list) {
  		blk_mq_freeze_queue(q);
2404e607a   Jeff Moyer   blk-mq: avoid exc...
1734
  		queue_set_hctx_shared(q, shared);
0d2602ca3   Jens Axboe   blk-mq: improve s...
1735
1736
1737
1738
1739
1740
1741
  		blk_mq_unfreeze_queue(q);
  	}
  }
  
  static void blk_mq_del_queue_tag_set(struct request_queue *q)
  {
  	struct blk_mq_tag_set *set = q->tag_set;
0d2602ca3   Jens Axboe   blk-mq: improve s...
1742
1743
  	mutex_lock(&set->tag_list_lock);
  	list_del_init(&q->tag_set_list);
2404e607a   Jeff Moyer   blk-mq: avoid exc...
1744
1745
1746
1747
1748
1749
  	if (list_is_singular(&set->tag_list)) {
  		/* just transitioned to unshared */
  		set->flags &= ~BLK_MQ_F_TAG_SHARED;
  		/* update existing queue */
  		blk_mq_update_tag_set_depth(set, false);
  	}
0d2602ca3   Jens Axboe   blk-mq: improve s...
1750
  	mutex_unlock(&set->tag_list_lock);
0d2602ca3   Jens Axboe   blk-mq: improve s...
1751
1752
1753
1754
1755
1756
1757
1758
  }
  
  static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
  				     struct request_queue *q)
  {
  	q->tag_set = set;
  
  	mutex_lock(&set->tag_list_lock);
2404e607a   Jeff Moyer   blk-mq: avoid exc...
1759
1760
1761
1762
1763
1764
1765
1766
1767
  
  	/* Check to see if we're transitioning to shared (from 1 to 2 queues). */
  	if (!list_empty(&set->tag_list) && !(set->flags & BLK_MQ_F_TAG_SHARED)) {
  		set->flags |= BLK_MQ_F_TAG_SHARED;
  		/* update existing queue */
  		blk_mq_update_tag_set_depth(set, true);
  	}
  	if (set->flags & BLK_MQ_F_TAG_SHARED)
  		queue_set_hctx_shared(q, true);
0d2602ca3   Jens Axboe   blk-mq: improve s...
1768
  	list_add_tail(&q->tag_set_list, &set->tag_list);
2404e607a   Jeff Moyer   blk-mq: avoid exc...
1769

0d2602ca3   Jens Axboe   blk-mq: improve s...
1770
1771
  	mutex_unlock(&set->tag_list_lock);
  }
e09aae7ed   Ming Lei   blk-mq: release m...
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
  /*
   * It is the actual release handler for mq, but we do it from
   * request queue's release handler for avoiding use-after-free
   * and headache because q->mq_kobj shouldn't have been introduced,
   * but we can't group ctx/kctx kobj without it.
   */
  void blk_mq_release(struct request_queue *q)
  {
  	struct blk_mq_hw_ctx *hctx;
  	unsigned int i;
  
  	/* hctx kobj stays in hctx */
c3b4afca7   Ming Lei   blk-mq: free hctx...
1784
1785
1786
1787
  	queue_for_each_hw_ctx(q, hctx, i) {
  		if (!hctx)
  			continue;
  		kfree(hctx->ctxs);
e09aae7ed   Ming Lei   blk-mq: release m...
1788
  		kfree(hctx);
c3b4afca7   Ming Lei   blk-mq: free hctx...
1789
  	}
e09aae7ed   Ming Lei   blk-mq: release m...
1790

a723bab3d   Akinobu Mita   blk-mq: Fix use a...
1791
  	q->mq_map = NULL;
e09aae7ed   Ming Lei   blk-mq: release m...
1792
1793
1794
1795
1796
  	kfree(q->queue_hw_ctx);
  
  	/* ctx kobj stays in queue_ctx */
  	free_percpu(q->queue_ctx);
  }
24d2f9030   Christoph Hellwig   blk-mq: split out...
1797
  struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
320ae51fe   Jens Axboe   blk-mq: new multi...
1798
  {
b62c21b71   Mike Snitzer   blk-mq: add blk_m...
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
  	struct request_queue *uninit_q, *q;
  
  	uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node);
  	if (!uninit_q)
  		return ERR_PTR(-ENOMEM);
  
  	q = blk_mq_init_allocated_queue(set, uninit_q);
  	if (IS_ERR(q))
  		blk_cleanup_queue(uninit_q);
  
  	return q;
  }
  EXPORT_SYMBOL(blk_mq_init_queue);
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1812
1813
  static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
  						struct request_queue *q)
b62c21b71   Mike Snitzer   blk-mq: add blk_m...
1814
  {
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1815
1816
  	int i, j;
  	struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
f14bbe77a   Jens Axboe   blk-mq: pass in s...
1817

868f2f0b7   Keith Busch   blk-mq: dynamic h...
1818
  	blk_mq_sysfs_unregister(q);
24d2f9030   Christoph Hellwig   blk-mq: split out...
1819
  	for (i = 0; i < set->nr_hw_queues; i++) {
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1820
  		int node;
f14bbe77a   Jens Axboe   blk-mq: pass in s...
1821

868f2f0b7   Keith Busch   blk-mq: dynamic h...
1822
1823
1824
1825
  		if (hctxs[i])
  			continue;
  
  		node = blk_mq_hw_queue_to_node(q->mq_map, i);
cdef54dd8   Christoph Hellwig   blk-mq: remove al...
1826
1827
  		hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx),
  					GFP_KERNEL, node);
320ae51fe   Jens Axboe   blk-mq: new multi...
1828
  		if (!hctxs[i])
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1829
  			break;
320ae51fe   Jens Axboe   blk-mq: new multi...
1830

a86073e48   Jens Axboe   blk-mq: allocate ...
1831
  		if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, GFP_KERNEL,
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1832
1833
1834
1835
1836
  						node)) {
  			kfree(hctxs[i]);
  			hctxs[i] = NULL;
  			break;
  		}
e4043dcf3   Jens Axboe   blk-mq: ensure th...
1837

0d2602ca3   Jens Axboe   blk-mq: improve s...
1838
  		atomic_set(&hctxs[i]->nr_active, 0);
f14bbe77a   Jens Axboe   blk-mq: pass in s...
1839
  		hctxs[i]->numa_node = node;
320ae51fe   Jens Axboe   blk-mq: new multi...
1840
  		hctxs[i]->queue_num = i;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1841
1842
1843
1844
1845
1846
1847
1848
  
  		if (blk_mq_init_hctx(q, set, hctxs[i], i)) {
  			free_cpumask_var(hctxs[i]->cpumask);
  			kfree(hctxs[i]);
  			hctxs[i] = NULL;
  			break;
  		}
  		blk_mq_hctx_kobj_init(hctxs[i]);
320ae51fe   Jens Axboe   blk-mq: new multi...
1849
  	}
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
  	for (j = i; j < q->nr_hw_queues; j++) {
  		struct blk_mq_hw_ctx *hctx = hctxs[j];
  
  		if (hctx) {
  			if (hctx->tags) {
  				blk_mq_free_rq_map(set, hctx->tags, j);
  				set->tags[j] = NULL;
  			}
  			blk_mq_exit_hctx(q, set, hctx, j);
  			free_cpumask_var(hctx->cpumask);
  			kobject_put(&hctx->kobj);
  			kfree(hctx->ctxs);
  			kfree(hctx);
  			hctxs[j] = NULL;
  
  		}
  	}
  	q->nr_hw_queues = i;
  	blk_mq_sysfs_register(q);
  }
  
  struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
  						  struct request_queue *q)
  {
668416721   Ming Lei   blk-mq: mark requ...
1874
1875
  	/* mark the queue as mq asap */
  	q->mq_ops = set->ops;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1876
1877
  	q->queue_ctx = alloc_percpu(struct blk_mq_ctx);
  	if (!q->queue_ctx)
c7de57263   Ming Lin   blk-mq: clear q->...
1878
  		goto err_exit;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1879
1880
1881
1882
1883
  
  	q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)),
  						GFP_KERNEL, set->numa_node);
  	if (!q->queue_hw_ctx)
  		goto err_percpu;
bdd17e75c   Christoph Hellwig   blk-mq: only allo...
1884
  	q->mq_map = set->mq_map;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1885
1886
1887
1888
  
  	blk_mq_realloc_hw_ctxs(set, q);
  	if (!q->nr_hw_queues)
  		goto err_hctxs;
320ae51fe   Jens Axboe   blk-mq: new multi...
1889

287922eb0   Christoph Hellwig   block: defer time...
1890
  	INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
e56f698bd   Ming Lei   blk-mq: set defau...
1891
  	blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
320ae51fe   Jens Axboe   blk-mq: new multi...
1892
1893
  
  	q->nr_queues = nr_cpu_ids;
320ae51fe   Jens Axboe   blk-mq: new multi...
1894

94eddfbea   Jens Axboe   blk-mq: ensure th...
1895
  	q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
320ae51fe   Jens Axboe   blk-mq: new multi...
1896

05f1dd531   Jens Axboe   block: add queue ...
1897
1898
  	if (!(set->flags & BLK_MQ_F_SG_MERGE))
  		q->queue_flags |= 1 << QUEUE_FLAG_NO_SG_MERGE;
1be036e94   Christoph Hellwig   blk-mq: initializ...
1899
  	q->sg_reserved_size = INT_MAX;
2849450ad   Mike Snitzer   blk-mq: introduce...
1900
  	INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
1901
1902
  	INIT_LIST_HEAD(&q->requeue_list);
  	spin_lock_init(&q->requeue_lock);
07068d5b8   Jens Axboe   blk-mq: split mak...
1903
1904
1905
1906
  	if (q->nr_hw_queues > 1)
  		blk_queue_make_request(q, blk_mq_make_request);
  	else
  		blk_queue_make_request(q, blk_sq_make_request);
eba717682   Jens Axboe   blk-mq: initializ...
1907
1908
1909
1910
  	/*
  	 * Do this after blk_queue_make_request() overrides it...
  	 */
  	q->nr_requests = set->queue_depth;
24d2f9030   Christoph Hellwig   blk-mq: split out...
1911
1912
  	if (set->ops->complete)
  		blk_queue_softirq_done(q, set->ops->complete);
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
1913

24d2f9030   Christoph Hellwig   blk-mq: split out...
1914
  	blk_mq_init_cpu_queues(q, set->nr_hw_queues);
320ae51fe   Jens Axboe   blk-mq: new multi...
1915

5778322e6   Akinobu Mita   blk-mq: avoid ins...
1916
  	get_online_cpus();
320ae51fe   Jens Axboe   blk-mq: new multi...
1917
  	mutex_lock(&all_q_mutex);
320ae51fe   Jens Axboe   blk-mq: new multi...
1918

4593fdbe7   Akinobu Mita   blk-mq: fix sysfs...
1919
  	list_add_tail(&q->all_q_node, &all_q_list);
0d2602ca3   Jens Axboe   blk-mq: improve s...
1920
  	blk_mq_add_queue_tag_set(set, q);
5778322e6   Akinobu Mita   blk-mq: avoid ins...
1921
  	blk_mq_map_swqueue(q, cpu_online_mask);
484b4061e   Jens Axboe   blk-mq: save memo...
1922

4593fdbe7   Akinobu Mita   blk-mq: fix sysfs...
1923
  	mutex_unlock(&all_q_mutex);
5778322e6   Akinobu Mita   blk-mq: avoid ins...
1924
  	put_online_cpus();
4593fdbe7   Akinobu Mita   blk-mq: fix sysfs...
1925

320ae51fe   Jens Axboe   blk-mq: new multi...
1926
  	return q;
18741986a   Christoph Hellwig   blk-mq: rework fl...
1927

320ae51fe   Jens Axboe   blk-mq: new multi...
1928
  err_hctxs:
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1929
  	kfree(q->queue_hw_ctx);
320ae51fe   Jens Axboe   blk-mq: new multi...
1930
  err_percpu:
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1931
  	free_percpu(q->queue_ctx);
c7de57263   Ming Lin   blk-mq: clear q->...
1932
1933
  err_exit:
  	q->mq_ops = NULL;
320ae51fe   Jens Axboe   blk-mq: new multi...
1934
1935
  	return ERR_PTR(-ENOMEM);
  }
b62c21b71   Mike Snitzer   blk-mq: add blk_m...
1936
  EXPORT_SYMBOL(blk_mq_init_allocated_queue);
320ae51fe   Jens Axboe   blk-mq: new multi...
1937
1938
1939
  
  void blk_mq_free_queue(struct request_queue *q)
  {
624dbe475   Ming Lei   blk-mq: avoid cod...
1940
  	struct blk_mq_tag_set	*set = q->tag_set;
320ae51fe   Jens Axboe   blk-mq: new multi...
1941

0e6263682   Akinobu Mita   blk-mq: fix q->mq...
1942
1943
1944
  	mutex_lock(&all_q_mutex);
  	list_del_init(&q->all_q_node);
  	mutex_unlock(&all_q_mutex);
0d2602ca3   Jens Axboe   blk-mq: improve s...
1945
  	blk_mq_del_queue_tag_set(q);
624dbe475   Ming Lei   blk-mq: avoid cod...
1946
1947
  	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
  	blk_mq_free_hw_queues(q, set);
320ae51fe   Jens Axboe   blk-mq: new multi...
1948
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
1949
1950
  
  /* Basically redo blk_mq_init_queue with queue frozen */
5778322e6   Akinobu Mita   blk-mq: avoid ins...
1951
1952
  static void blk_mq_queue_reinit(struct request_queue *q,
  				const struct cpumask *online_mask)
320ae51fe   Jens Axboe   blk-mq: new multi...
1953
  {
4ecd4fef3   Christoph Hellwig   block: use an ato...
1954
  	WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth));
320ae51fe   Jens Axboe   blk-mq: new multi...
1955

67aec14ce   Jens Axboe   blk-mq: make the ...
1956
  	blk_mq_sysfs_unregister(q);
320ae51fe   Jens Axboe   blk-mq: new multi...
1957
1958
1959
1960
1961
  	/*
  	 * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe
  	 * we should change hctx numa_node according to new topology (this
  	 * involves free and re-allocate memory, worthy doing?)
  	 */
5778322e6   Akinobu Mita   blk-mq: avoid ins...
1962
  	blk_mq_map_swqueue(q, online_mask);
320ae51fe   Jens Axboe   blk-mq: new multi...
1963

67aec14ce   Jens Axboe   blk-mq: make the ...
1964
  	blk_mq_sysfs_register(q);
320ae51fe   Jens Axboe   blk-mq: new multi...
1965
  }
65d5291ee   Sebastian Andrzej Siewior   blk-mq: Convert t...
1966
1967
1968
1969
1970
1971
1972
1973
  /*
   * New online cpumask which is going to be set in this hotplug event.
   * Declare this cpumasks as global as cpu-hotplug operation is invoked
   * one-by-one and dynamically allocating this could result in a failure.
   */
  static struct cpumask cpuhp_online_new;
  
  static void blk_mq_queue_reinit_work(void)
320ae51fe   Jens Axboe   blk-mq: new multi...
1974
1975
  {
  	struct request_queue *q;
320ae51fe   Jens Axboe   blk-mq: new multi...
1976
1977
  
  	mutex_lock(&all_q_mutex);
f3af020b9   Tejun Heo   blk-mq: make mq_q...
1978
1979
1980
1981
1982
1983
1984
1985
1986
  	/*
  	 * We need to freeze and reinit all existing queues.  Freezing
  	 * involves synchronous wait for an RCU grace period and doing it
  	 * one by one may take a long time.  Start freezing all queues in
  	 * one swoop and then wait for the completions so that freezing can
  	 * take place in parallel.
  	 */
  	list_for_each_entry(q, &all_q_list, all_q_node)
  		blk_mq_freeze_queue_start(q);
f054b56c9   Ming Lei   blk-mq: fix race ...
1987
  	list_for_each_entry(q, &all_q_list, all_q_node) {
f3af020b9   Tejun Heo   blk-mq: make mq_q...
1988
  		blk_mq_freeze_queue_wait(q);
f054b56c9   Ming Lei   blk-mq: fix race ...
1989
1990
1991
1992
1993
1994
  		/*
  		 * timeout handler can't touch hw queue during the
  		 * reinitialization
  		 */
  		del_timer_sync(&q->timeout);
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
1995
  	list_for_each_entry(q, &all_q_list, all_q_node)
65d5291ee   Sebastian Andrzej Siewior   blk-mq: Convert t...
1996
  		blk_mq_queue_reinit(q, &cpuhp_online_new);
f3af020b9   Tejun Heo   blk-mq: make mq_q...
1997
1998
1999
  
  	list_for_each_entry(q, &all_q_list, all_q_node)
  		blk_mq_unfreeze_queue(q);
320ae51fe   Jens Axboe   blk-mq: new multi...
2000
  	mutex_unlock(&all_q_mutex);
65d5291ee   Sebastian Andrzej Siewior   blk-mq: Convert t...
2001
2002
2003
2004
  }
  
  static int blk_mq_queue_reinit_dead(unsigned int cpu)
  {
97a32864e   Sebastian Andrzej Siewior   blk-mq: fixup "Co...
2005
  	cpumask_copy(&cpuhp_online_new, cpu_online_mask);
65d5291ee   Sebastian Andrzej Siewior   blk-mq: Convert t...
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
  	blk_mq_queue_reinit_work();
  	return 0;
  }
  
  /*
   * Before hotadded cpu starts handling requests, new mappings must be
   * established.  Otherwise, these requests in hw queue might never be
   * dispatched.
   *
   * For example, there is a single hw queue (hctx) and two CPU queues (ctx0
   * for CPU0, and ctx1 for CPU1).
   *
   * Now CPU1 is just onlined and a request is inserted into ctx1->rq_list
   * and set bit0 in pending bitmap as ctx1->index_hw is still zero.
   *
   * And then while running hw queue, flush_busy_ctxs() finds bit0 is set in
   * pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list.
   * But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list
   * is ignored.
   */
  static int blk_mq_queue_reinit_prepare(unsigned int cpu)
  {
  	cpumask_copy(&cpuhp_online_new, cpu_online_mask);
  	cpumask_set_cpu(cpu, &cpuhp_online_new);
  	blk_mq_queue_reinit_work();
  	return 0;
320ae51fe   Jens Axboe   blk-mq: new multi...
2032
  }
a51644054   Jens Axboe   blk-mq: scale dep...
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
  static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
  {
  	int i;
  
  	for (i = 0; i < set->nr_hw_queues; i++) {
  		set->tags[i] = blk_mq_init_rq_map(set, i);
  		if (!set->tags[i])
  			goto out_unwind;
  	}
  
  	return 0;
  
  out_unwind:
  	while (--i >= 0)
  		blk_mq_free_rq_map(set, set->tags[i], i);
a51644054   Jens Axboe   blk-mq: scale dep...
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
  	return -ENOMEM;
  }
  
  /*
   * Allocate the request maps associated with this tag_set. Note that this
   * may reduce the depth asked for, if memory is tight. set->queue_depth
   * will be updated to reflect the allocated depth.
   */
  static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
  {
  	unsigned int depth;
  	int err;
  
  	depth = set->queue_depth;
  	do {
  		err = __blk_mq_alloc_rq_maps(set);
  		if (!err)
  			break;
  
  		set->queue_depth >>= 1;
  		if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) {
  			err = -ENOMEM;
  			break;
  		}
  	} while (set->queue_depth);
  
  	if (!set->queue_depth || err) {
  		pr_err("blk-mq: failed to allocate request map
  ");
  		return -ENOMEM;
  	}
  
  	if (depth != set->queue_depth)
  		pr_info("blk-mq: reduced tag depth (%u -> %u)
  ",
  						depth, set->queue_depth);
  
  	return 0;
  }
a4391c646   Jens Axboe   blk-mq: bump max ...
2087
2088
2089
2090
2091
2092
  /*
   * Alloc a tag set to be associated with one or more request queues.
   * May fail with EINVAL for various error conditions. May adjust the
   * requested depth down, if if it too large. In that case, the set
   * value will be stored in set->queue_depth.
   */
24d2f9030   Christoph Hellwig   blk-mq: split out...
2093
2094
  int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
  {
da695ba23   Christoph Hellwig   blk-mq: allow the...
2095
  	int ret;
205fb5f5b   Bart Van Assche   blk-mq: add blk_m...
2096
  	BUILD_BUG_ON(BLK_MQ_MAX_DEPTH > 1 << BLK_MQ_UNIQUE_TAG_BITS);
24d2f9030   Christoph Hellwig   blk-mq: split out...
2097
2098
  	if (!set->nr_hw_queues)
  		return -EINVAL;
a4391c646   Jens Axboe   blk-mq: bump max ...
2099
  	if (!set->queue_depth)
24d2f9030   Christoph Hellwig   blk-mq: split out...
2100
2101
2102
  		return -EINVAL;
  	if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
  		return -EINVAL;
7d7e0f90b   Christoph Hellwig   blk-mq: remove ->...
2103
  	if (!set->ops->queue_rq)
24d2f9030   Christoph Hellwig   blk-mq: split out...
2104
  		return -EINVAL;
a4391c646   Jens Axboe   blk-mq: bump max ...
2105
2106
2107
2108
2109
2110
  	if (set->queue_depth > BLK_MQ_MAX_DEPTH) {
  		pr_info("blk-mq: reduced tag depth to %u
  ",
  			BLK_MQ_MAX_DEPTH);
  		set->queue_depth = BLK_MQ_MAX_DEPTH;
  	}
24d2f9030   Christoph Hellwig   blk-mq: split out...
2111

6637fadf2   Shaohua Li   blk-mq: move the ...
2112
2113
2114
2115
2116
2117
2118
2119
2120
  	/*
  	 * If a crashdump is active, then we are potentially in a very
  	 * memory constrained environment. Limit us to 1 queue and
  	 * 64 tags to prevent using too much memory.
  	 */
  	if (is_kdump_kernel()) {
  		set->nr_hw_queues = 1;
  		set->queue_depth = min(64U, set->queue_depth);
  	}
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2121
2122
2123
2124
2125
  	/*
  	 * There is no use for more h/w queues than cpus.
  	 */
  	if (set->nr_hw_queues > nr_cpu_ids)
  		set->nr_hw_queues = nr_cpu_ids;
6637fadf2   Shaohua Li   blk-mq: move the ...
2126

868f2f0b7   Keith Busch   blk-mq: dynamic h...
2127
  	set->tags = kzalloc_node(nr_cpu_ids * sizeof(struct blk_mq_tags *),
24d2f9030   Christoph Hellwig   blk-mq: split out...
2128
2129
  				 GFP_KERNEL, set->numa_node);
  	if (!set->tags)
a51644054   Jens Axboe   blk-mq: scale dep...
2130
  		return -ENOMEM;
24d2f9030   Christoph Hellwig   blk-mq: split out...
2131

da695ba23   Christoph Hellwig   blk-mq: allow the...
2132
2133
2134
  	ret = -ENOMEM;
  	set->mq_map = kzalloc_node(sizeof(*set->mq_map) * nr_cpu_ids,
  			GFP_KERNEL, set->numa_node);
bdd17e75c   Christoph Hellwig   blk-mq: only allo...
2135
2136
  	if (!set->mq_map)
  		goto out_free_tags;
da695ba23   Christoph Hellwig   blk-mq: allow the...
2137
2138
2139
2140
2141
2142
2143
2144
2145
  	if (set->ops->map_queues)
  		ret = set->ops->map_queues(set);
  	else
  		ret = blk_mq_map_queues(set);
  	if (ret)
  		goto out_free_mq_map;
  
  	ret = blk_mq_alloc_rq_maps(set);
  	if (ret)
bdd17e75c   Christoph Hellwig   blk-mq: only allo...
2146
  		goto out_free_mq_map;
24d2f9030   Christoph Hellwig   blk-mq: split out...
2147

0d2602ca3   Jens Axboe   blk-mq: improve s...
2148
2149
  	mutex_init(&set->tag_list_lock);
  	INIT_LIST_HEAD(&set->tag_list);
24d2f9030   Christoph Hellwig   blk-mq: split out...
2150
  	return 0;
bdd17e75c   Christoph Hellwig   blk-mq: only allo...
2151
2152
2153
2154
2155
  
  out_free_mq_map:
  	kfree(set->mq_map);
  	set->mq_map = NULL;
  out_free_tags:
5676e7b6d   Robert Elliott   blk-mq: cleanup a...
2156
2157
  	kfree(set->tags);
  	set->tags = NULL;
da695ba23   Christoph Hellwig   blk-mq: allow the...
2158
  	return ret;
24d2f9030   Christoph Hellwig   blk-mq: split out...
2159
2160
2161
2162
2163
2164
  }
  EXPORT_SYMBOL(blk_mq_alloc_tag_set);
  
  void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
  {
  	int i;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2165
  	for (i = 0; i < nr_cpu_ids; i++) {
f42d79ab6   Junichi Nomura   blk-mq: fix use-a...
2166
  		if (set->tags[i])
484b4061e   Jens Axboe   blk-mq: save memo...
2167
2168
  			blk_mq_free_rq_map(set, set->tags[i], i);
  	}
bdd17e75c   Christoph Hellwig   blk-mq: only allo...
2169
2170
  	kfree(set->mq_map);
  	set->mq_map = NULL;
981bd189f   Ming Lei   blk-mq: fix leak ...
2171
  	kfree(set->tags);
5676e7b6d   Robert Elliott   blk-mq: cleanup a...
2172
  	set->tags = NULL;
24d2f9030   Christoph Hellwig   blk-mq: split out...
2173
2174
  }
  EXPORT_SYMBOL(blk_mq_free_tag_set);
e3a2b3f93   Jens Axboe   blk-mq: allow cha...
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
  int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
  {
  	struct blk_mq_tag_set *set = q->tag_set;
  	struct blk_mq_hw_ctx *hctx;
  	int i, ret;
  
  	if (!set || nr > set->queue_depth)
  		return -EINVAL;
  
  	ret = 0;
  	queue_for_each_hw_ctx(q, hctx, i) {
e9137d4b9   Keith Busch   blk-mq: Fix NULL ...
2186
2187
  		if (!hctx->tags)
  			continue;
e3a2b3f93   Jens Axboe   blk-mq: allow cha...
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
  		ret = blk_mq_tag_update_depth(hctx->tags, nr);
  		if (ret)
  			break;
  	}
  
  	if (!ret)
  		q->nr_requests = nr;
  
  	return ret;
  }
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
  void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
  {
  	struct request_queue *q;
  
  	if (nr_hw_queues > nr_cpu_ids)
  		nr_hw_queues = nr_cpu_ids;
  	if (nr_hw_queues < 1 || nr_hw_queues == set->nr_hw_queues)
  		return;
  
  	list_for_each_entry(q, &set->tag_list, tag_set_list)
  		blk_mq_freeze_queue(q);
  
  	set->nr_hw_queues = nr_hw_queues;
  	list_for_each_entry(q, &set->tag_list, tag_set_list) {
  		blk_mq_realloc_hw_ctxs(set, q);
  
  		if (q->nr_hw_queues > 1)
  			blk_queue_make_request(q, blk_mq_make_request);
  		else
  			blk_queue_make_request(q, blk_sq_make_request);
  
  		blk_mq_queue_reinit(q, cpu_online_mask);
  	}
  
  	list_for_each_entry(q, &set->tag_list, tag_set_list)
  		blk_mq_unfreeze_queue(q);
  }
  EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
676141e48   Jens Axboe   blk-mq: don't dum...
2226
2227
2228
2229
2230
2231
2232
2233
2234
  void blk_mq_disable_hotplug(void)
  {
  	mutex_lock(&all_q_mutex);
  }
  
  void blk_mq_enable_hotplug(void)
  {
  	mutex_unlock(&all_q_mutex);
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
2235
2236
  static int __init blk_mq_init(void)
  {
9467f8596   Thomas Gleixner   blk-mq/cpu-notif:...
2237
2238
  	cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
  				blk_mq_hctx_notify_dead);
320ae51fe   Jens Axboe   blk-mq: new multi...
2239

65d5291ee   Sebastian Andrzej Siewior   blk-mq: Convert t...
2240
2241
2242
  	cpuhp_setup_state_nocalls(CPUHP_BLK_MQ_PREPARE, "block/mq:prepare",
  				  blk_mq_queue_reinit_prepare,
  				  blk_mq_queue_reinit_dead);
320ae51fe   Jens Axboe   blk-mq: new multi...
2243
2244
2245
  	return 0;
  }
  subsys_initcall(blk_mq_init);