Blame view

block/blk-mq.c 58 KB
75bb4625b   Jens Axboe   blk-mq: add file ...
1
2
3
4
5
6
  /*
   * Block multiqueue core code
   *
   * Copyright (C) 2013-2014 Jens Axboe
   * Copyright (C) 2013-2014 Christoph Hellwig
   */
320ae51fe   Jens Axboe   blk-mq: new multi...
7
8
9
10
11
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/backing-dev.h>
  #include <linux/bio.h>
  #include <linux/blkdev.h>
f75782e4e   Catalin Marinas   block: kmemleak: ...
12
  #include <linux/kmemleak.h>
320ae51fe   Jens Axboe   blk-mq: new multi...
13
14
15
16
17
18
19
20
21
22
23
  #include <linux/mm.h>
  #include <linux/init.h>
  #include <linux/slab.h>
  #include <linux/workqueue.h>
  #include <linux/smp.h>
  #include <linux/llist.h>
  #include <linux/list_sort.h>
  #include <linux/cpu.h>
  #include <linux/cache.h>
  #include <linux/sched/sysctl.h>
  #include <linux/delay.h>
aedcd72f6   Jens Axboe   blk-mq: limit mem...
24
  #include <linux/crash_dump.h>
320ae51fe   Jens Axboe   blk-mq: new multi...
25
26
27
28
29
30
31
32
33
34
35
36
  
  #include <trace/events/block.h>
  
  #include <linux/blk-mq.h>
  #include "blk.h"
  #include "blk-mq.h"
  #include "blk-mq-tag.h"
  
  static DEFINE_MUTEX(all_q_mutex);
  static LIST_HEAD(all_q_list);
  
  static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx);
320ae51fe   Jens Axboe   blk-mq: new multi...
37
38
39
40
41
42
  /*
   * Check if any of the ctx's have pending work in this hardware queue
   */
  static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
  {
  	unsigned int i;
569fd0ce9   Jens Axboe   blk-mq: fix itera...
43
  	for (i = 0; i < hctx->ctx_map.size; i++)
1429d7c94   Jens Axboe   blk-mq: switch ct...
44
  		if (hctx->ctx_map.map[i].word)
320ae51fe   Jens Axboe   blk-mq: new multi...
45
46
47
48
  			return true;
  
  	return false;
  }
1429d7c94   Jens Axboe   blk-mq: switch ct...
49
50
51
52
53
54
55
56
  static inline struct blk_align_bitmap *get_bm(struct blk_mq_hw_ctx *hctx,
  					      struct blk_mq_ctx *ctx)
  {
  	return &hctx->ctx_map.map[ctx->index_hw / hctx->ctx_map.bits_per_word];
  }
  
  #define CTX_TO_BIT(hctx, ctx)	\
  	((ctx)->index_hw & ((hctx)->ctx_map.bits_per_word - 1))
320ae51fe   Jens Axboe   blk-mq: new multi...
57
58
59
60
61
62
  /*
   * Mark this ctx as having pending work in this hardware queue
   */
  static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx,
  				     struct blk_mq_ctx *ctx)
  {
1429d7c94   Jens Axboe   blk-mq: switch ct...
63
64
65
66
67
68
69
70
71
72
73
74
  	struct blk_align_bitmap *bm = get_bm(hctx, ctx);
  
  	if (!test_bit(CTX_TO_BIT(hctx, ctx), &bm->word))
  		set_bit(CTX_TO_BIT(hctx, ctx), &bm->word);
  }
  
  static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
  				      struct blk_mq_ctx *ctx)
  {
  	struct blk_align_bitmap *bm = get_bm(hctx, ctx);
  
  	clear_bit(CTX_TO_BIT(hctx, ctx), &bm->word);
320ae51fe   Jens Axboe   blk-mq: new multi...
75
  }
b4c6a0287   Keith Busch   blk-mq: Export fr...
76
  void blk_mq_freeze_queue_start(struct request_queue *q)
43a5e4e21   Ming Lei   block: blk-mq: su...
77
  {
4ecd4fef3   Christoph Hellwig   block: use an ato...
78
  	int freeze_depth;
cddd5d176   Tejun Heo   blk-mq: blk_mq_fr...
79

4ecd4fef3   Christoph Hellwig   block: use an ato...
80
81
  	freeze_depth = atomic_inc_return(&q->mq_freeze_depth);
  	if (freeze_depth == 1) {
3ef28e83a   Dan Williams   block: generic re...
82
  		percpu_ref_kill(&q->q_usage_counter);
b94ec2964   Mike Snitzer   blk-mq: export bl...
83
  		blk_mq_run_hw_queues(q, false);
cddd5d176   Tejun Heo   blk-mq: blk_mq_fr...
84
  	}
f3af020b9   Tejun Heo   blk-mq: make mq_q...
85
  }
b4c6a0287   Keith Busch   blk-mq: Export fr...
86
  EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
f3af020b9   Tejun Heo   blk-mq: make mq_q...
87
88
89
  
  static void blk_mq_freeze_queue_wait(struct request_queue *q)
  {
3ef28e83a   Dan Williams   block: generic re...
90
  	wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
43a5e4e21   Ming Lei   block: blk-mq: su...
91
  }
f3af020b9   Tejun Heo   blk-mq: make mq_q...
92
93
94
95
  /*
   * Guarantee no request is in use, so we can change any data structure of
   * the queue afterward.
   */
3ef28e83a   Dan Williams   block: generic re...
96
  void blk_freeze_queue(struct request_queue *q)
f3af020b9   Tejun Heo   blk-mq: make mq_q...
97
  {
3ef28e83a   Dan Williams   block: generic re...
98
99
100
101
102
103
104
  	/*
  	 * In the !blk_mq case we are only calling this to kill the
  	 * q_usage_counter, otherwise this increases the freeze depth
  	 * and waits for it to return to zero.  For this reason there is
  	 * no blk_unfreeze_queue(), and blk_freeze_queue() is not
  	 * exported to drivers as the only user for unfreeze is blk_mq.
  	 */
f3af020b9   Tejun Heo   blk-mq: make mq_q...
105
106
107
  	blk_mq_freeze_queue_start(q);
  	blk_mq_freeze_queue_wait(q);
  }
3ef28e83a   Dan Williams   block: generic re...
108
109
110
111
112
113
114
115
116
  
  void blk_mq_freeze_queue(struct request_queue *q)
  {
  	/*
  	 * ...just an alias to keep freeze and unfreeze actions balanced
  	 * in the blk_mq_* namespace
  	 */
  	blk_freeze_queue(q);
  }
c761d96b0   Jens Axboe   blk-mq: export bl...
117
  EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
f3af020b9   Tejun Heo   blk-mq: make mq_q...
118

b4c6a0287   Keith Busch   blk-mq: Export fr...
119
  void blk_mq_unfreeze_queue(struct request_queue *q)
320ae51fe   Jens Axboe   blk-mq: new multi...
120
  {
4ecd4fef3   Christoph Hellwig   block: use an ato...
121
  	int freeze_depth;
320ae51fe   Jens Axboe   blk-mq: new multi...
122

4ecd4fef3   Christoph Hellwig   block: use an ato...
123
124
125
  	freeze_depth = atomic_dec_return(&q->mq_freeze_depth);
  	WARN_ON_ONCE(freeze_depth < 0);
  	if (!freeze_depth) {
3ef28e83a   Dan Williams   block: generic re...
126
  		percpu_ref_reinit(&q->q_usage_counter);
320ae51fe   Jens Axboe   blk-mq: new multi...
127
  		wake_up_all(&q->mq_freeze_wq);
add703fda   Tejun Heo   blk-mq: use percp...
128
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
129
  }
b4c6a0287   Keith Busch   blk-mq: Export fr...
130
  EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
320ae51fe   Jens Axboe   blk-mq: new multi...
131

aed3ea94b   Jens Axboe   block: wake up wa...
132
133
134
135
136
137
138
139
  void blk_mq_wake_waiters(struct request_queue *q)
  {
  	struct blk_mq_hw_ctx *hctx;
  	unsigned int i;
  
  	queue_for_each_hw_ctx(q, hctx, i)
  		if (blk_mq_hw_queue_mapped(hctx))
  			blk_mq_tag_wakeup_all(hctx->tags, true);
3fd5940cb   Keith Busch   blk-mq: Wake task...
140
141
142
143
144
145
146
  
  	/*
  	 * If we are called because the queue has now been marked as
  	 * dying, we need to ensure that processes currently waiting on
  	 * the queue are notified as well.
  	 */
  	wake_up_all(&q->mq_freeze_wq);
aed3ea94b   Jens Axboe   block: wake up wa...
147
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
148
149
150
151
152
  bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
  {
  	return blk_mq_has_free_tags(hctx->tags);
  }
  EXPORT_SYMBOL(blk_mq_can_queue);
94eddfbea   Jens Axboe   blk-mq: ensure th...
153
  static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
cc6e3b109   Mike Christie   block: prepare mq...
154
155
  			       struct request *rq, int op,
  			       unsigned int op_flags)
320ae51fe   Jens Axboe   blk-mq: new multi...
156
  {
94eddfbea   Jens Axboe   blk-mq: ensure th...
157
  	if (blk_queue_io_stat(q))
cc6e3b109   Mike Christie   block: prepare mq...
158
  		op_flags |= REQ_IO_STAT;
94eddfbea   Jens Axboe   blk-mq: ensure th...
159

af76e555e   Christoph Hellwig   blk-mq: initializ...
160
161
162
  	INIT_LIST_HEAD(&rq->queuelist);
  	/* csd/requeue_work/fifo_time is initialized before use */
  	rq->q = q;
320ae51fe   Jens Axboe   blk-mq: new multi...
163
  	rq->mq_ctx = ctx;
cc6e3b109   Mike Christie   block: prepare mq...
164
  	req_set_op_attrs(rq, op, op_flags);
af76e555e   Christoph Hellwig   blk-mq: initializ...
165
166
  	/* do not touch atomic flags, it needs atomic ops against the timer */
  	rq->cpu = -1;
af76e555e   Christoph Hellwig   blk-mq: initializ...
167
168
  	INIT_HLIST_NODE(&rq->hash);
  	RB_CLEAR_NODE(&rq->rb_node);
af76e555e   Christoph Hellwig   blk-mq: initializ...
169
170
  	rq->rq_disk = NULL;
  	rq->part = NULL;
3ee323723   Jens Axboe   blk-mq: always in...
171
  	rq->start_time = jiffies;
af76e555e   Christoph Hellwig   blk-mq: initializ...
172
173
  #ifdef CONFIG_BLK_CGROUP
  	rq->rl = NULL;
0fec08b4e   Ming Lei   blk-mq: fix initi...
174
  	set_start_time_ns(rq);
af76e555e   Christoph Hellwig   blk-mq: initializ...
175
176
177
178
179
180
  	rq->io_start_time_ns = 0;
  #endif
  	rq->nr_phys_segments = 0;
  #if defined(CONFIG_BLK_DEV_INTEGRITY)
  	rq->nr_integrity_segments = 0;
  #endif
af76e555e   Christoph Hellwig   blk-mq: initializ...
181
182
183
  	rq->special = NULL;
  	/* tag was already set */
  	rq->errors = 0;
af76e555e   Christoph Hellwig   blk-mq: initializ...
184

6f4a16266   Tony Battersby   scsi-mq: fix requ...
185
  	rq->cmd = rq->__cmd;
af76e555e   Christoph Hellwig   blk-mq: initializ...
186
187
188
189
  	rq->extra_len = 0;
  	rq->sense_len = 0;
  	rq->resid_len = 0;
  	rq->sense = NULL;
af76e555e   Christoph Hellwig   blk-mq: initializ...
190
  	INIT_LIST_HEAD(&rq->timeout_list);
f6be4fb4b   Jens Axboe   blk-mq: ->timeout...
191
  	rq->timeout = 0;
af76e555e   Christoph Hellwig   blk-mq: initializ...
192
193
194
  	rq->end_io = NULL;
  	rq->end_io_data = NULL;
  	rq->next_rq = NULL;
d9d8c5c48   Mike Christie   block: convert is...
195
  	ctx->rq_dispatched[rw_is_sync(op, op_flags)]++;
320ae51fe   Jens Axboe   blk-mq: new multi...
196
  }
5dee85772   Christoph Hellwig   blk-mq: initializ...
197
  static struct request *
cc6e3b109   Mike Christie   block: prepare mq...
198
  __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int op, int op_flags)
5dee85772   Christoph Hellwig   blk-mq: initializ...
199
200
201
  {
  	struct request *rq;
  	unsigned int tag;
cb96a42cc   Ming Lei   blk-mq: fix sched...
202
  	tag = blk_mq_get_tag(data);
5dee85772   Christoph Hellwig   blk-mq: initializ...
203
  	if (tag != BLK_MQ_TAG_FAIL) {
cb96a42cc   Ming Lei   blk-mq: fix sched...
204
  		rq = data->hctx->tags->rqs[tag];
5dee85772   Christoph Hellwig   blk-mq: initializ...
205

cb96a42cc   Ming Lei   blk-mq: fix sched...
206
  		if (blk_mq_tag_busy(data->hctx)) {
5dee85772   Christoph Hellwig   blk-mq: initializ...
207
  			rq->cmd_flags = REQ_MQ_INFLIGHT;
cb96a42cc   Ming Lei   blk-mq: fix sched...
208
  			atomic_inc(&data->hctx->nr_active);
5dee85772   Christoph Hellwig   blk-mq: initializ...
209
210
211
  		}
  
  		rq->tag = tag;
cc6e3b109   Mike Christie   block: prepare mq...
212
  		blk_mq_rq_ctx_init(data->q, data->ctx, rq, op, op_flags);
5dee85772   Christoph Hellwig   blk-mq: initializ...
213
214
215
216
217
  		return rq;
  	}
  
  	return NULL;
  }
6f3b0e8bc   Christoph Hellwig   blk-mq: add a fla...
218
219
  struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
  		unsigned int flags)
320ae51fe   Jens Axboe   blk-mq: new multi...
220
  {
d852564f8   Christoph Hellwig   blk-mq: remove bl...
221
222
  	struct blk_mq_ctx *ctx;
  	struct blk_mq_hw_ctx *hctx;
320ae51fe   Jens Axboe   blk-mq: new multi...
223
  	struct request *rq;
cb96a42cc   Ming Lei   blk-mq: fix sched...
224
  	struct blk_mq_alloc_data alloc_data;
a492f0754   Joe Lawrence   block,scsi: fixup...
225
  	int ret;
320ae51fe   Jens Axboe   blk-mq: new multi...
226

6f3b0e8bc   Christoph Hellwig   blk-mq: add a fla...
227
  	ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
a492f0754   Joe Lawrence   block,scsi: fixup...
228
229
  	if (ret)
  		return ERR_PTR(ret);
320ae51fe   Jens Axboe   blk-mq: new multi...
230

d852564f8   Christoph Hellwig   blk-mq: remove bl...
231
232
  	ctx = blk_mq_get_ctx(q);
  	hctx = q->mq_ops->map_queue(q, ctx->cpu);
6f3b0e8bc   Christoph Hellwig   blk-mq: add a fla...
233
  	blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
d852564f8   Christoph Hellwig   blk-mq: remove bl...
234

cc6e3b109   Mike Christie   block: prepare mq...
235
  	rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
6f3b0e8bc   Christoph Hellwig   blk-mq: add a fla...
236
  	if (!rq && !(flags & BLK_MQ_REQ_NOWAIT)) {
d852564f8   Christoph Hellwig   blk-mq: remove bl...
237
238
239
240
241
  		__blk_mq_run_hw_queue(hctx);
  		blk_mq_put_ctx(ctx);
  
  		ctx = blk_mq_get_ctx(q);
  		hctx = q->mq_ops->map_queue(q, ctx->cpu);
6f3b0e8bc   Christoph Hellwig   blk-mq: add a fla...
242
  		blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
cc6e3b109   Mike Christie   block: prepare mq...
243
  		rq =  __blk_mq_alloc_request(&alloc_data, rw, 0);
cb96a42cc   Ming Lei   blk-mq: fix sched...
244
  		ctx = alloc_data.ctx;
d852564f8   Christoph Hellwig   blk-mq: remove bl...
245
246
  	}
  	blk_mq_put_ctx(ctx);
c76541a93   Keith Busch   blk-mq: Exit queu...
247
  	if (!rq) {
3ef28e83a   Dan Williams   block: generic re...
248
  		blk_queue_exit(q);
a492f0754   Joe Lawrence   block,scsi: fixup...
249
  		return ERR_PTR(-EWOULDBLOCK);
c76541a93   Keith Busch   blk-mq: Exit queu...
250
  	}
0c4de0f33   Christoph Hellwig   block: ensure bio...
251
252
253
254
  
  	rq->__data_len = 0;
  	rq->__sector = (sector_t) -1;
  	rq->bio = rq->biotail = NULL;
320ae51fe   Jens Axboe   blk-mq: new multi...
255
256
  	return rq;
  }
4bb659b15   Jens Axboe   blk-mq: implement...
257
  EXPORT_SYMBOL(blk_mq_alloc_request);
320ae51fe   Jens Axboe   blk-mq: new multi...
258

1f5bd336b   Ming Lin   blk-mq: add blk_m...
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
  struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
  		unsigned int flags, unsigned int hctx_idx)
  {
  	struct blk_mq_hw_ctx *hctx;
  	struct blk_mq_ctx *ctx;
  	struct request *rq;
  	struct blk_mq_alloc_data alloc_data;
  	int ret;
  
  	/*
  	 * If the tag allocator sleeps we could get an allocation for a
  	 * different hardware context.  No need to complicate the low level
  	 * allocator for this for the rare use case of a command tied to
  	 * a specific queue.
  	 */
  	if (WARN_ON_ONCE(!(flags & BLK_MQ_REQ_NOWAIT)))
  		return ERR_PTR(-EINVAL);
  
  	if (hctx_idx >= q->nr_hw_queues)
  		return ERR_PTR(-EIO);
  
  	ret = blk_queue_enter(q, true);
  	if (ret)
  		return ERR_PTR(ret);
  
  	hctx = q->queue_hw_ctx[hctx_idx];
  	ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
  
  	blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
  	rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
  	if (!rq) {
  		blk_queue_exit(q);
  		return ERR_PTR(-EWOULDBLOCK);
  	}
  
  	return rq;
  }
  EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
320ae51fe   Jens Axboe   blk-mq: new multi...
297
298
299
300
301
  static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
  				  struct blk_mq_ctx *ctx, struct request *rq)
  {
  	const int tag = rq->tag;
  	struct request_queue *q = rq->q;
0d2602ca3   Jens Axboe   blk-mq: improve s...
302
303
  	if (rq->cmd_flags & REQ_MQ_INFLIGHT)
  		atomic_dec(&hctx->nr_active);
683d0e126   David Hildenbrand   blk-mq: Avoid rac...
304
  	rq->cmd_flags = 0;
0d2602ca3   Jens Axboe   blk-mq: improve s...
305

af76e555e   Christoph Hellwig   blk-mq: initializ...
306
  	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
0d2602ca3   Jens Axboe   blk-mq: improve s...
307
  	blk_mq_put_tag(hctx, tag, &ctx->last_tag);
3ef28e83a   Dan Williams   block: generic re...
308
  	blk_queue_exit(q);
320ae51fe   Jens Axboe   blk-mq: new multi...
309
  }
7c7f2f2bc   Jens Axboe   blk-mq: add blk_m...
310
  void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
320ae51fe   Jens Axboe   blk-mq: new multi...
311
312
  {
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
320ae51fe   Jens Axboe   blk-mq: new multi...
313
314
  
  	ctx->rq_completed[rq_is_sync(rq)]++;
320ae51fe   Jens Axboe   blk-mq: new multi...
315
  	__blk_mq_free_request(hctx, ctx, rq);
7c7f2f2bc   Jens Axboe   blk-mq: add blk_m...
316
317
318
319
320
321
322
323
324
325
326
  
  }
  EXPORT_SYMBOL_GPL(blk_mq_free_hctx_request);
  
  void blk_mq_free_request(struct request *rq)
  {
  	struct blk_mq_hw_ctx *hctx;
  	struct request_queue *q = rq->q;
  
  	hctx = q->mq_ops->map_queue(q, rq->mq_ctx->cpu);
  	blk_mq_free_hctx_request(hctx, rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
327
  }
1a3b595a2   Jens Axboe   blk-mq: export bl...
328
  EXPORT_SYMBOL_GPL(blk_mq_free_request);
320ae51fe   Jens Axboe   blk-mq: new multi...
329

c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
330
  inline void __blk_mq_end_request(struct request *rq, int error)
320ae51fe   Jens Axboe   blk-mq: new multi...
331
  {
0d11e6aca   Ming Lei   blk-mq: fix use-a...
332
  	blk_account_io_done(rq);
91b63639c   Christoph Hellwig   blk-mq: bidi support
333
  	if (rq->end_io) {
320ae51fe   Jens Axboe   blk-mq: new multi...
334
  		rq->end_io(rq, error);
91b63639c   Christoph Hellwig   blk-mq: bidi support
335
336
337
  	} else {
  		if (unlikely(blk_bidi_rq(rq)))
  			blk_mq_free_request(rq->next_rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
338
  		blk_mq_free_request(rq);
91b63639c   Christoph Hellwig   blk-mq: bidi support
339
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
340
  }
c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
341
  EXPORT_SYMBOL(__blk_mq_end_request);
63151a449   Christoph Hellwig   blk-mq: allow dri...
342

c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
343
  void blk_mq_end_request(struct request *rq, int error)
63151a449   Christoph Hellwig   blk-mq: allow dri...
344
345
346
  {
  	if (blk_update_request(rq, error, blk_rq_bytes(rq)))
  		BUG();
c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
347
  	__blk_mq_end_request(rq, error);
63151a449   Christoph Hellwig   blk-mq: allow dri...
348
  }
c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
349
  EXPORT_SYMBOL(blk_mq_end_request);
320ae51fe   Jens Axboe   blk-mq: new multi...
350

30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
351
  static void __blk_mq_complete_request_remote(void *data)
320ae51fe   Jens Axboe   blk-mq: new multi...
352
  {
3d6efbf62   Christoph Hellwig   blk-mq: use __smp...
353
  	struct request *rq = data;
320ae51fe   Jens Axboe   blk-mq: new multi...
354

30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
355
  	rq->q->softirq_done_fn(rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
356
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
357

ed851860b   Jens Axboe   blk-mq: push IPI ...
358
  static void blk_mq_ipi_complete_request(struct request *rq)
320ae51fe   Jens Axboe   blk-mq: new multi...
359
360
  {
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
385352016   Christoph Hellwig   blk-mq: respect r...
361
  	bool shared = false;
320ae51fe   Jens Axboe   blk-mq: new multi...
362
  	int cpu;
385352016   Christoph Hellwig   blk-mq: respect r...
363
  	if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) {
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
364
365
366
  		rq->q->softirq_done_fn(rq);
  		return;
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
367
368
  
  	cpu = get_cpu();
385352016   Christoph Hellwig   blk-mq: respect r...
369
370
371
372
  	if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags))
  		shared = cpus_share_cache(cpu, ctx->cpu);
  
  	if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
373
  		rq->csd.func = __blk_mq_complete_request_remote;
3d6efbf62   Christoph Hellwig   blk-mq: use __smp...
374
375
  		rq->csd.info = rq;
  		rq->csd.flags = 0;
c46fff2a3   Frederic Weisbecker   smp: Rename __smp...
376
  		smp_call_function_single_async(ctx->cpu, &rq->csd);
3d6efbf62   Christoph Hellwig   blk-mq: use __smp...
377
  	} else {
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
378
  		rq->q->softirq_done_fn(rq);
3d6efbf62   Christoph Hellwig   blk-mq: use __smp...
379
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
380
381
  	put_cpu();
  }
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
382

1fa8cc52f   Jens Axboe   blk-mq: mark __bl...
383
  static void __blk_mq_complete_request(struct request *rq)
ed851860b   Jens Axboe   blk-mq: push IPI ...
384
385
386
387
  {
  	struct request_queue *q = rq->q;
  
  	if (!q->softirq_done_fn)
c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
388
  		blk_mq_end_request(rq, rq->errors);
ed851860b   Jens Axboe   blk-mq: push IPI ...
389
390
391
  	else
  		blk_mq_ipi_complete_request(rq);
  }
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
392
393
394
395
396
397
398
399
  /**
   * blk_mq_complete_request - end I/O on a request
   * @rq:		the request being processed
   *
   * Description:
   *	Ends all I/O on a request. It does not handle partial completions.
   *	The actual completion happens out-of-order, through a IPI handler.
   **/
f4829a9b7   Christoph Hellwig   blk-mq: fix racy ...
400
  void blk_mq_complete_request(struct request *rq, int error)
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
401
  {
95f096849   Jens Axboe   blk-mq: allow non...
402
403
404
  	struct request_queue *q = rq->q;
  
  	if (unlikely(blk_should_fake_timeout(q)))
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
405
  		return;
f4829a9b7   Christoph Hellwig   blk-mq: fix racy ...
406
407
  	if (!blk_mark_rq_complete(rq)) {
  		rq->errors = error;
ed851860b   Jens Axboe   blk-mq: push IPI ...
408
  		__blk_mq_complete_request(rq);
f4829a9b7   Christoph Hellwig   blk-mq: fix racy ...
409
  	}
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
410
411
  }
  EXPORT_SYMBOL(blk_mq_complete_request);
320ae51fe   Jens Axboe   blk-mq: new multi...
412

973c01919   Keith Busch   blk-mq: Export if...
413
414
415
416
417
  int blk_mq_request_started(struct request *rq)
  {
  	return test_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
  }
  EXPORT_SYMBOL_GPL(blk_mq_request_started);
e2490073c   Christoph Hellwig   blk-mq: call blk_...
418
  void blk_mq_start_request(struct request *rq)
320ae51fe   Jens Axboe   blk-mq: new multi...
419
420
421
422
  {
  	struct request_queue *q = rq->q;
  
  	trace_block_rq_issue(q, rq);
742ee69b9   Christoph Hellwig   blk-mq: initializ...
423
  	rq->resid_len = blk_rq_bytes(rq);
91b63639c   Christoph Hellwig   blk-mq: bidi support
424
425
  	if (unlikely(blk_bidi_rq(rq)))
  		rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq);
742ee69b9   Christoph Hellwig   blk-mq: initializ...
426

2b8393b43   Ming Lei   blk-mq: add timer...
427
  	blk_add_timer(rq);
87ee7b112   Jens Axboe   blk-mq: fix race ...
428
429
  
  	/*
538b75341   Jens Axboe   blk-mq: request d...
430
431
432
433
434
435
  	 * Ensure that ->deadline is visible before set the started
  	 * flag and clear the completed flag.
  	 */
  	smp_mb__before_atomic();
  
  	/*
87ee7b112   Jens Axboe   blk-mq: fix race ...
436
437
438
439
440
  	 * Mark us as started and clear complete. Complete might have been
  	 * set if requeue raced with timeout, which then marked it as
  	 * complete. So be sure to clear complete again when we start
  	 * the request, otherwise we'll ignore the completion event.
  	 */
4b570521b   Jens Axboe   blk-mq: request i...
441
442
443
444
  	if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
  		set_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
  	if (test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags))
  		clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
49f5baa51   Christoph Hellwig   blk-mq: pair blk_...
445
446
447
448
449
450
451
452
453
  
  	if (q->dma_drain_size && blk_rq_bytes(rq)) {
  		/*
  		 * Make sure space for the drain appears.  We know we can do
  		 * this because max_hw_segments has been adjusted to be one
  		 * fewer than the device can handle.
  		 */
  		rq->nr_phys_segments++;
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
454
  }
e2490073c   Christoph Hellwig   blk-mq: call blk_...
455
  EXPORT_SYMBOL(blk_mq_start_request);
320ae51fe   Jens Axboe   blk-mq: new multi...
456

ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
457
  static void __blk_mq_requeue_request(struct request *rq)
320ae51fe   Jens Axboe   blk-mq: new multi...
458
459
460
461
  {
  	struct request_queue *q = rq->q;
  
  	trace_block_rq_requeue(q, rq);
49f5baa51   Christoph Hellwig   blk-mq: pair blk_...
462

e2490073c   Christoph Hellwig   blk-mq: call blk_...
463
464
465
466
  	if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
  		if (q->dma_drain_size && blk_rq_bytes(rq))
  			rq->nr_phys_segments--;
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
467
  }
ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
468
469
  void blk_mq_requeue_request(struct request *rq)
  {
ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
470
  	__blk_mq_requeue_request(rq);
ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
471

ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
472
  	BUG_ON(blk_queued_rq(rq));
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
473
  	blk_mq_add_to_requeue_list(rq, true);
ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
474
475
  }
  EXPORT_SYMBOL(blk_mq_requeue_request);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
  static void blk_mq_requeue_work(struct work_struct *work)
  {
  	struct request_queue *q =
  		container_of(work, struct request_queue, requeue_work);
  	LIST_HEAD(rq_list);
  	struct request *rq, *next;
  	unsigned long flags;
  
  	spin_lock_irqsave(&q->requeue_lock, flags);
  	list_splice_init(&q->requeue_list, &rq_list);
  	spin_unlock_irqrestore(&q->requeue_lock, flags);
  
  	list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
  		if (!(rq->cmd_flags & REQ_SOFTBARRIER))
  			continue;
  
  		rq->cmd_flags &= ~REQ_SOFTBARRIER;
  		list_del_init(&rq->queuelist);
  		blk_mq_insert_request(rq, true, false, false);
  	}
  
  	while (!list_empty(&rq_list)) {
  		rq = list_entry(rq_list.next, struct request, queuelist);
  		list_del_init(&rq->queuelist);
  		blk_mq_insert_request(rq, false, false, false);
  	}
8b9574156   Jens Axboe   blk-mq: use blk_m...
502
503
504
505
506
  	/*
  	 * Use the start variant of queue running here, so that running
  	 * the requeue work will kick stopped queues.
  	 */
  	blk_mq_start_hw_queues(q);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
  }
  
  void blk_mq_add_to_requeue_list(struct request *rq, bool at_head)
  {
  	struct request_queue *q = rq->q;
  	unsigned long flags;
  
  	/*
  	 * We abuse this flag that is otherwise used by the I/O scheduler to
  	 * request head insertation from the workqueue.
  	 */
  	BUG_ON(rq->cmd_flags & REQ_SOFTBARRIER);
  
  	spin_lock_irqsave(&q->requeue_lock, flags);
  	if (at_head) {
  		rq->cmd_flags |= REQ_SOFTBARRIER;
  		list_add(&rq->queuelist, &q->requeue_list);
  	} else {
  		list_add_tail(&rq->queuelist, &q->requeue_list);
  	}
  	spin_unlock_irqrestore(&q->requeue_lock, flags);
  }
  EXPORT_SYMBOL(blk_mq_add_to_requeue_list);
c68ed59f5   Keith Busch   blk-mq: Let drive...
530
531
532
533
534
  void blk_mq_cancel_requeue_work(struct request_queue *q)
  {
  	cancel_work_sync(&q->requeue_work);
  }
  EXPORT_SYMBOL_GPL(blk_mq_cancel_requeue_work);
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
535
536
537
538
539
  void blk_mq_kick_requeue_list(struct request_queue *q)
  {
  	kblockd_schedule_work(&q->requeue_work);
  }
  EXPORT_SYMBOL(blk_mq_kick_requeue_list);
1885b24d2   Jens Axboe   blk-mq: Add helpe...
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
  void blk_mq_abort_requeue_list(struct request_queue *q)
  {
  	unsigned long flags;
  	LIST_HEAD(rq_list);
  
  	spin_lock_irqsave(&q->requeue_lock, flags);
  	list_splice_init(&q->requeue_list, &rq_list);
  	spin_unlock_irqrestore(&q->requeue_lock, flags);
  
  	while (!list_empty(&rq_list)) {
  		struct request *rq;
  
  		rq = list_first_entry(&rq_list, struct request, queuelist);
  		list_del_init(&rq->queuelist);
  		rq->errors = -EIO;
  		blk_mq_end_request(rq, rq->errors);
  	}
  }
  EXPORT_SYMBOL(blk_mq_abort_requeue_list);
0e62f51f8   Jens Axboe   blk-mq: let blk_m...
559
560
  struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
  {
4ee86babe   Hannes Reinecke   blk-mq: add bound...
561
562
563
564
  	if (tag < tags->nr_tags)
  		return tags->rqs[tag];
  
  	return NULL;
24d2f9030   Christoph Hellwig   blk-mq: split out...
565
566
  }
  EXPORT_SYMBOL(blk_mq_tag_to_rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
567
  struct blk_mq_timeout_data {
46f92d42e   Christoph Hellwig   blk-mq: unshared ...
568
569
  	unsigned long next;
  	unsigned int next_set;
320ae51fe   Jens Axboe   blk-mq: new multi...
570
  };
904158376   Christoph Hellwig   block: fix blk_ab...
571
  void blk_mq_rq_timed_out(struct request *req, bool reserved)
320ae51fe   Jens Axboe   blk-mq: new multi...
572
  {
46f92d42e   Christoph Hellwig   blk-mq: unshared ...
573
574
  	struct blk_mq_ops *ops = req->q->mq_ops;
  	enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
87ee7b112   Jens Axboe   blk-mq: fix race ...
575
576
577
578
579
580
581
582
583
584
  
  	/*
  	 * We know that complete is set at this point. If STARTED isn't set
  	 * anymore, then the request isn't active and the "timeout" should
  	 * just be ignored. This can happen due to the bitflag ordering.
  	 * Timeout first checks if STARTED is set, and if it is, assumes
  	 * the request is active. But if we race with completion, then
  	 * we both flags will get cleared. So check here again, and ignore
  	 * a timeout event with a request that isn't active.
  	 */
46f92d42e   Christoph Hellwig   blk-mq: unshared ...
585
586
  	if (!test_bit(REQ_ATOM_STARTED, &req->atomic_flags))
  		return;
87ee7b112   Jens Axboe   blk-mq: fix race ...
587

46f92d42e   Christoph Hellwig   blk-mq: unshared ...
588
  	if (ops->timeout)
0152fb6b5   Christoph Hellwig   blk-mq: pass a re...
589
  		ret = ops->timeout(req, reserved);
46f92d42e   Christoph Hellwig   blk-mq: unshared ...
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
  
  	switch (ret) {
  	case BLK_EH_HANDLED:
  		__blk_mq_complete_request(req);
  		break;
  	case BLK_EH_RESET_TIMER:
  		blk_add_timer(req);
  		blk_clear_rq_complete(req);
  		break;
  	case BLK_EH_NOT_HANDLED:
  		break;
  	default:
  		printk(KERN_ERR "block: bad eh return: %d
  ", ret);
  		break;
  	}
87ee7b112   Jens Axboe   blk-mq: fix race ...
606
  }
5b3f25fc3   Keith Busch   blk-mq: Allow req...
607

81481eb42   Christoph Hellwig   blk-mq: fix and s...
608
609
610
611
  static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
  		struct request *rq, void *priv, bool reserved)
  {
  	struct blk_mq_timeout_data *data = priv;
87ee7b112   Jens Axboe   blk-mq: fix race ...
612

eb130dbfc   Keith Busch   blk-mq: End unsta...
613
614
615
616
617
  	if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
  		/*
  		 * If a request wasn't started before the queue was
  		 * marked dying, kill it here or it'll go unnoticed.
  		 */
a59e0f579   Keith Busch   blk-mq: End unsta...
618
619
620
621
  		if (unlikely(blk_queue_dying(rq->q))) {
  			rq->errors = -EIO;
  			blk_mq_end_request(rq, rq->errors);
  		}
46f92d42e   Christoph Hellwig   blk-mq: unshared ...
622
  		return;
eb130dbfc   Keith Busch   blk-mq: End unsta...
623
  	}
87ee7b112   Jens Axboe   blk-mq: fix race ...
624

46f92d42e   Christoph Hellwig   blk-mq: unshared ...
625
626
  	if (time_after_eq(jiffies, rq->deadline)) {
  		if (!blk_mark_rq_complete(rq))
0152fb6b5   Christoph Hellwig   blk-mq: pass a re...
627
  			blk_mq_rq_timed_out(rq, reserved);
46f92d42e   Christoph Hellwig   blk-mq: unshared ...
628
629
630
631
  	} else if (!data->next_set || time_after(data->next, rq->deadline)) {
  		data->next = rq->deadline;
  		data->next_set = 1;
  	}
87ee7b112   Jens Axboe   blk-mq: fix race ...
632
  }
287922eb0   Christoph Hellwig   block: defer time...
633
  static void blk_mq_timeout_work(struct work_struct *work)
320ae51fe   Jens Axboe   blk-mq: new multi...
634
  {
287922eb0   Christoph Hellwig   block: defer time...
635
636
  	struct request_queue *q =
  		container_of(work, struct request_queue, timeout_work);
81481eb42   Christoph Hellwig   blk-mq: fix and s...
637
638
639
640
  	struct blk_mq_timeout_data data = {
  		.next		= 0,
  		.next_set	= 0,
  	};
81481eb42   Christoph Hellwig   blk-mq: fix and s...
641
  	int i;
320ae51fe   Jens Axboe   blk-mq: new multi...
642

71f79fb31   Gabriel Krisman Bertazi   blk-mq: Allow tim...
643
644
645
646
647
648
649
650
651
652
653
654
655
656
  	/* A deadlock might occur if a request is stuck requiring a
  	 * timeout at the same time a queue freeze is waiting
  	 * completion, since the timeout code would not be able to
  	 * acquire the queue reference here.
  	 *
  	 * That's why we don't use blk_queue_enter here; instead, we use
  	 * percpu_ref_tryget directly, because we need to be able to
  	 * obtain a reference even in the short window between the queue
  	 * starting to freeze, by dropping the first reference in
  	 * blk_mq_freeze_queue_start, and the moment the last request is
  	 * consumed, marked by the instant q_usage_counter reaches
  	 * zero.
  	 */
  	if (!percpu_ref_tryget(&q->q_usage_counter))
287922eb0   Christoph Hellwig   block: defer time...
657
  		return;
0bf6cd5b9   Christoph Hellwig   blk-mq: factor ou...
658
  	blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);
320ae51fe   Jens Axboe   blk-mq: new multi...
659

81481eb42   Christoph Hellwig   blk-mq: fix and s...
660
661
662
  	if (data.next_set) {
  		data.next = blk_rq_timeout(round_jiffies_up(data.next));
  		mod_timer(&q->timeout, data.next);
0d2602ca3   Jens Axboe   blk-mq: improve s...
663
  	} else {
0bf6cd5b9   Christoph Hellwig   blk-mq: factor ou...
664
  		struct blk_mq_hw_ctx *hctx;
f054b56c9   Ming Lei   blk-mq: fix race ...
665
666
667
668
669
  		queue_for_each_hw_ctx(q, hctx, i) {
  			/* the hctx may be unmapped, so check it here */
  			if (blk_mq_hw_queue_mapped(hctx))
  				blk_mq_tag_idle(hctx);
  		}
0d2602ca3   Jens Axboe   blk-mq: improve s...
670
  	}
287922eb0   Christoph Hellwig   block: defer time...
671
  	blk_queue_exit(q);
320ae51fe   Jens Axboe   blk-mq: new multi...
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
  }
  
  /*
   * Reverse check our software queue for entries that we could potentially
   * merge with. Currently includes a hand-wavy stop count of 8, to not spend
   * too much time checking for merges.
   */
  static bool blk_mq_attempt_merge(struct request_queue *q,
  				 struct blk_mq_ctx *ctx, struct bio *bio)
  {
  	struct request *rq;
  	int checked = 8;
  
  	list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
  		int el_ret;
  
  		if (!checked--)
  			break;
  
  		if (!blk_rq_merge_ok(rq, bio))
  			continue;
  
  		el_ret = blk_try_merge(rq, bio);
  		if (el_ret == ELEVATOR_BACK_MERGE) {
  			if (bio_attempt_back_merge(q, rq, bio)) {
  				ctx->rq_merged++;
  				return true;
  			}
  			break;
  		} else if (el_ret == ELEVATOR_FRONT_MERGE) {
  			if (bio_attempt_front_merge(q, rq, bio)) {
  				ctx->rq_merged++;
  				return true;
  			}
  			break;
  		}
  	}
  
  	return false;
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
712
  /*
1429d7c94   Jens Axboe   blk-mq: switch ct...
713
714
715
716
717
718
719
   * Process software queues that have been marked busy, splicing them
   * to the for-dispatch
   */
  static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
  {
  	struct blk_mq_ctx *ctx;
  	int i;
569fd0ce9   Jens Axboe   blk-mq: fix itera...
720
  	for (i = 0; i < hctx->ctx_map.size; i++) {
1429d7c94   Jens Axboe   blk-mq: switch ct...
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
  		struct blk_align_bitmap *bm = &hctx->ctx_map.map[i];
  		unsigned int off, bit;
  
  		if (!bm->word)
  			continue;
  
  		bit = 0;
  		off = i * hctx->ctx_map.bits_per_word;
  		do {
  			bit = find_next_bit(&bm->word, bm->depth, bit);
  			if (bit >= bm->depth)
  				break;
  
  			ctx = hctx->ctxs[bit + off];
  			clear_bit(bit, &bm->word);
  			spin_lock(&ctx->lock);
  			list_splice_tail_init(&ctx->rq_list, list);
  			spin_unlock(&ctx->lock);
  
  			bit++;
  		} while (1);
  	}
  }
  
  /*
320ae51fe   Jens Axboe   blk-mq: new multi...
746
747
748
749
750
751
752
753
   * Run this hardware queue, pulling any software queues mapped to it in.
   * Note that this function currently has various problems around ordering
   * of IO. In particular, we'd like FIFO behaviour on handling existing
   * items on the hctx->dispatch list. Ignore that for now.
   */
  static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
  {
  	struct request_queue *q = hctx->queue;
320ae51fe   Jens Axboe   blk-mq: new multi...
754
755
  	struct request *rq;
  	LIST_HEAD(rq_list);
74c450521   Jens Axboe   blk-mq: add a 'li...
756
757
  	LIST_HEAD(driver_list);
  	struct list_head *dptr;
1429d7c94   Jens Axboe   blk-mq: switch ct...
758
  	int queued;
320ae51fe   Jens Axboe   blk-mq: new multi...
759

5d12f905c   Jens Axboe   blk-mq: fix wrong...
760
  	if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
320ae51fe   Jens Axboe   blk-mq: new multi...
761
  		return;
0e87e58bf   Jens Axboe   blk-mq: improve w...
762
763
  	WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
  		cpu_online(hctx->next_cpu));
320ae51fe   Jens Axboe   blk-mq: new multi...
764
765
766
767
768
  	hctx->run++;
  
  	/*
  	 * Touch any software queue that has pending entries.
  	 */
1429d7c94   Jens Axboe   blk-mq: switch ct...
769
  	flush_busy_ctxs(hctx, &rq_list);
320ae51fe   Jens Axboe   blk-mq: new multi...
770
771
772
773
774
775
776
777
778
779
780
781
782
  
  	/*
  	 * If we have previous entries on our dispatch list, grab them
  	 * and stuff them at the front for more fair dispatch.
  	 */
  	if (!list_empty_careful(&hctx->dispatch)) {
  		spin_lock(&hctx->lock);
  		if (!list_empty(&hctx->dispatch))
  			list_splice_init(&hctx->dispatch, &rq_list);
  		spin_unlock(&hctx->lock);
  	}
  
  	/*
74c450521   Jens Axboe   blk-mq: add a 'li...
783
784
785
786
787
788
  	 * Start off with dptr being NULL, so we start the first request
  	 * immediately, even if we have more pending.
  	 */
  	dptr = NULL;
  
  	/*
320ae51fe   Jens Axboe   blk-mq: new multi...
789
790
  	 * Now process all the entries, sending them to the driver.
  	 */
1429d7c94   Jens Axboe   blk-mq: switch ct...
791
  	queued = 0;
320ae51fe   Jens Axboe   blk-mq: new multi...
792
  	while (!list_empty(&rq_list)) {
74c450521   Jens Axboe   blk-mq: add a 'li...
793
  		struct blk_mq_queue_data bd;
320ae51fe   Jens Axboe   blk-mq: new multi...
794
795
796
797
  		int ret;
  
  		rq = list_first_entry(&rq_list, struct request, queuelist);
  		list_del_init(&rq->queuelist);
320ae51fe   Jens Axboe   blk-mq: new multi...
798

74c450521   Jens Axboe   blk-mq: add a 'li...
799
800
801
802
803
  		bd.rq = rq;
  		bd.list = dptr;
  		bd.last = list_empty(&rq_list);
  
  		ret = q->mq_ops->queue_rq(hctx, &bd);
320ae51fe   Jens Axboe   blk-mq: new multi...
804
805
806
  		switch (ret) {
  		case BLK_MQ_RQ_QUEUE_OK:
  			queued++;
52b9c330c   Omar Sandoval   blk-mq: actually ...
807
  			break;
320ae51fe   Jens Axboe   blk-mq: new multi...
808
  		case BLK_MQ_RQ_QUEUE_BUSY:
320ae51fe   Jens Axboe   blk-mq: new multi...
809
  			list_add(&rq->queuelist, &rq_list);
ed0791b2f   Christoph Hellwig   blk-mq: add blk_m...
810
  			__blk_mq_requeue_request(rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
811
812
813
814
  			break;
  		default:
  			pr_err("blk-mq: bad return on queue: %d
  ", ret);
320ae51fe   Jens Axboe   blk-mq: new multi...
815
  		case BLK_MQ_RQ_QUEUE_ERROR:
1e93b8c27   Christoph Hellwig   blk-mq: dont assu...
816
  			rq->errors = -EIO;
c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
817
  			blk_mq_end_request(rq, rq->errors);
320ae51fe   Jens Axboe   blk-mq: new multi...
818
819
820
821
822
  			break;
  		}
  
  		if (ret == BLK_MQ_RQ_QUEUE_BUSY)
  			break;
74c450521   Jens Axboe   blk-mq: add a 'li...
823
824
825
826
827
828
829
  
  		/*
  		 * We've done the first request. If we have more than 1
  		 * left in the list, set dptr to defer issue.
  		 */
  		if (!dptr && rq_list.next != rq_list.prev)
  			dptr = &driver_list;
320ae51fe   Jens Axboe   blk-mq: new multi...
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
  	}
  
  	if (!queued)
  		hctx->dispatched[0]++;
  	else if (queued < (1 << (BLK_MQ_MAX_DISPATCH_ORDER - 1)))
  		hctx->dispatched[ilog2(queued) + 1]++;
  
  	/*
  	 * Any items that need requeuing? Stuff them into hctx->dispatch,
  	 * that is where we will continue on next queue run.
  	 */
  	if (!list_empty(&rq_list)) {
  		spin_lock(&hctx->lock);
  		list_splice(&rq_list, &hctx->dispatch);
  		spin_unlock(&hctx->lock);
9ba52e581   Shaohua Li   blk-mq: don't los...
845
846
847
848
849
850
851
852
853
854
  		/*
  		 * the queue is expected stopped with BLK_MQ_RQ_QUEUE_BUSY, but
  		 * it's possible the queue is stopped and restarted again
  		 * before this. Queue restart will dispatch requests. And since
  		 * requests in rq_list aren't added into hctx->dispatch yet,
  		 * the requests in rq_list might get lost.
  		 *
  		 * blk_mq_run_hw_queue() already checks the STOPPED bit
  		 **/
  		blk_mq_run_hw_queue(hctx, true);
320ae51fe   Jens Axboe   blk-mq: new multi...
855
856
  	}
  }
506e931f9   Jens Axboe   blk-mq: add basic...
857
858
859
860
861
862
863
864
  /*
   * It'd be great if the workqueue API had a way to pass
   * in a mask and had some smarts for more clever placement.
   * For now we just round-robin here, switching for every
   * BLK_MQ_CPU_WORK_BATCH queued items.
   */
  static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
  {
b657d7e63   Christoph Hellwig   blk-mq: handle th...
865
866
  	if (hctx->queue->nr_hw_queues == 1)
  		return WORK_CPU_UNBOUND;
506e931f9   Jens Axboe   blk-mq: add basic...
867
868
  
  	if (--hctx->next_cpu_batch <= 0) {
b657d7e63   Christoph Hellwig   blk-mq: handle th...
869
  		int cpu = hctx->next_cpu, next_cpu;
506e931f9   Jens Axboe   blk-mq: add basic...
870
871
872
873
874
875
876
  
  		next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask);
  		if (next_cpu >= nr_cpu_ids)
  			next_cpu = cpumask_first(hctx->cpumask);
  
  		hctx->next_cpu = next_cpu;
  		hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
b657d7e63   Christoph Hellwig   blk-mq: handle th...
877
878
  
  		return cpu;
506e931f9   Jens Axboe   blk-mq: add basic...
879
  	}
b657d7e63   Christoph Hellwig   blk-mq: handle th...
880
  	return hctx->next_cpu;
506e931f9   Jens Axboe   blk-mq: add basic...
881
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
882
883
  void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
  {
19c66e59c   Ming Lei   blk-mq: prevent u...
884
885
  	if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state) ||
  	    !blk_mq_hw_queue_mapped(hctx)))
320ae51fe   Jens Axboe   blk-mq: new multi...
886
  		return;
398205b83   Paolo Bonzini   blk_mq: call pree...
887
  	if (!async) {
2a90d4aae   Paolo Bonzini   blk-mq: use get_c...
888
889
  		int cpu = get_cpu();
  		if (cpumask_test_cpu(cpu, hctx->cpumask)) {
398205b83   Paolo Bonzini   blk_mq: call pree...
890
  			__blk_mq_run_hw_queue(hctx);
2a90d4aae   Paolo Bonzini   blk-mq: use get_c...
891
  			put_cpu();
398205b83   Paolo Bonzini   blk_mq: call pree...
892
893
  			return;
  		}
e4043dcf3   Jens Axboe   blk-mq: ensure th...
894

2a90d4aae   Paolo Bonzini   blk-mq: use get_c...
895
  		put_cpu();
e4043dcf3   Jens Axboe   blk-mq: ensure th...
896
  	}
398205b83   Paolo Bonzini   blk_mq: call pree...
897

b657d7e63   Christoph Hellwig   blk-mq: handle th...
898
899
  	kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
  			&hctx->run_work, 0);
320ae51fe   Jens Axboe   blk-mq: new multi...
900
  }
b94ec2964   Mike Snitzer   blk-mq: export bl...
901
  void blk_mq_run_hw_queues(struct request_queue *q, bool async)
320ae51fe   Jens Axboe   blk-mq: new multi...
902
903
904
905
906
907
908
  {
  	struct blk_mq_hw_ctx *hctx;
  	int i;
  
  	queue_for_each_hw_ctx(q, hctx, i) {
  		if ((!blk_mq_hctx_has_pending(hctx) &&
  		    list_empty_careful(&hctx->dispatch)) ||
5d12f905c   Jens Axboe   blk-mq: fix wrong...
909
  		    test_bit(BLK_MQ_S_STOPPED, &hctx->state))
320ae51fe   Jens Axboe   blk-mq: new multi...
910
  			continue;
b94ec2964   Mike Snitzer   blk-mq: export bl...
911
  		blk_mq_run_hw_queue(hctx, async);
320ae51fe   Jens Axboe   blk-mq: new multi...
912
913
  	}
  }
b94ec2964   Mike Snitzer   blk-mq: export bl...
914
  EXPORT_SYMBOL(blk_mq_run_hw_queues);
320ae51fe   Jens Axboe   blk-mq: new multi...
915
916
917
  
  void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
  {
70f4db639   Christoph Hellwig   blk-mq: add blk_m...
918
919
  	cancel_delayed_work(&hctx->run_work);
  	cancel_delayed_work(&hctx->delay_work);
320ae51fe   Jens Axboe   blk-mq: new multi...
920
921
922
  	set_bit(BLK_MQ_S_STOPPED, &hctx->state);
  }
  EXPORT_SYMBOL(blk_mq_stop_hw_queue);
280d45f6c   Christoph Hellwig   blk-mq: add blk_m...
923
924
925
926
927
928
929
930
931
  void blk_mq_stop_hw_queues(struct request_queue *q)
  {
  	struct blk_mq_hw_ctx *hctx;
  	int i;
  
  	queue_for_each_hw_ctx(q, hctx, i)
  		blk_mq_stop_hw_queue(hctx);
  }
  EXPORT_SYMBOL(blk_mq_stop_hw_queues);
320ae51fe   Jens Axboe   blk-mq: new multi...
932
933
934
  void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
  {
  	clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
e4043dcf3   Jens Axboe   blk-mq: ensure th...
935

0ffbce80c   Jens Axboe   blk-mq: blk_mq_st...
936
  	blk_mq_run_hw_queue(hctx, false);
320ae51fe   Jens Axboe   blk-mq: new multi...
937
938
  }
  EXPORT_SYMBOL(blk_mq_start_hw_queue);
2f2685565   Christoph Hellwig   blk-mq: add blk_m...
939
940
941
942
943
944
945
946
947
  void blk_mq_start_hw_queues(struct request_queue *q)
  {
  	struct blk_mq_hw_ctx *hctx;
  	int i;
  
  	queue_for_each_hw_ctx(q, hctx, i)
  		blk_mq_start_hw_queue(hctx);
  }
  EXPORT_SYMBOL(blk_mq_start_hw_queues);
1b4a32585   Christoph Hellwig   blk-mq: add async...
948
  void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
320ae51fe   Jens Axboe   blk-mq: new multi...
949
950
951
952
953
954
955
956
957
  {
  	struct blk_mq_hw_ctx *hctx;
  	int i;
  
  	queue_for_each_hw_ctx(q, hctx, i) {
  		if (!test_bit(BLK_MQ_S_STOPPED, &hctx->state))
  			continue;
  
  		clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
1b4a32585   Christoph Hellwig   blk-mq: add async...
958
  		blk_mq_run_hw_queue(hctx, async);
320ae51fe   Jens Axboe   blk-mq: new multi...
959
960
961
  	}
  }
  EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
70f4db639   Christoph Hellwig   blk-mq: add blk_m...
962
  static void blk_mq_run_work_fn(struct work_struct *work)
320ae51fe   Jens Axboe   blk-mq: new multi...
963
964
  {
  	struct blk_mq_hw_ctx *hctx;
70f4db639   Christoph Hellwig   blk-mq: add blk_m...
965
  	hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work);
e4043dcf3   Jens Axboe   blk-mq: ensure th...
966

320ae51fe   Jens Axboe   blk-mq: new multi...
967
968
  	__blk_mq_run_hw_queue(hctx);
  }
70f4db639   Christoph Hellwig   blk-mq: add blk_m...
969
970
971
972
973
974
975
976
977
978
979
980
  static void blk_mq_delay_work_fn(struct work_struct *work)
  {
  	struct blk_mq_hw_ctx *hctx;
  
  	hctx = container_of(work, struct blk_mq_hw_ctx, delay_work.work);
  
  	if (test_and_clear_bit(BLK_MQ_S_STOPPED, &hctx->state))
  		__blk_mq_run_hw_queue(hctx);
  }
  
  void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
  {
19c66e59c   Ming Lei   blk-mq: prevent u...
981
982
  	if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
  		return;
70f4db639   Christoph Hellwig   blk-mq: add blk_m...
983

b657d7e63   Christoph Hellwig   blk-mq: handle th...
984
985
  	kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
  			&hctx->delay_work, msecs_to_jiffies(msecs));
70f4db639   Christoph Hellwig   blk-mq: add blk_m...
986
987
  }
  EXPORT_SYMBOL(blk_mq_delay_queue);
cfd0c552a   Ming Lei   blk-mq: mark ctx ...
988
  static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
cfd0c552a   Ming Lei   blk-mq: mark ctx ...
989
990
  					    struct request *rq,
  					    bool at_head)
320ae51fe   Jens Axboe   blk-mq: new multi...
991
  {
e57690fe0   Jens Axboe   blk-mq: don't ove...
992
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
01b983c9f   Jens Axboe   blk-mq: add blktr...
993
  	trace_block_rq_insert(hctx->queue, rq);
72a0a36e2   Christoph Hellwig   blk-mq: support a...
994
995
996
997
  	if (at_head)
  		list_add(&rq->queuelist, &ctx->rq_list);
  	else
  		list_add_tail(&rq->queuelist, &ctx->rq_list);
cfd0c552a   Ming Lei   blk-mq: mark ctx ...
998
  }
4bb659b15   Jens Axboe   blk-mq: implement...
999

cfd0c552a   Ming Lei   blk-mq: mark ctx ...
1000
1001
1002
1003
  static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
  				    struct request *rq, bool at_head)
  {
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
e57690fe0   Jens Axboe   blk-mq: don't ove...
1004
  	__blk_mq_insert_req_list(hctx, rq, at_head);
320ae51fe   Jens Axboe   blk-mq: new multi...
1005
  	blk_mq_hctx_mark_pending(hctx, ctx);
320ae51fe   Jens Axboe   blk-mq: new multi...
1006
  }
eeabc850b   Christoph Hellwig   blk-mq: merge blk...
1007
  void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
e57690fe0   Jens Axboe   blk-mq: don't ove...
1008
  			   bool async)
320ae51fe   Jens Axboe   blk-mq: new multi...
1009
  {
e57690fe0   Jens Axboe   blk-mq: don't ove...
1010
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
eeabc850b   Christoph Hellwig   blk-mq: merge blk...
1011
  	struct request_queue *q = rq->q;
320ae51fe   Jens Axboe   blk-mq: new multi...
1012
  	struct blk_mq_hw_ctx *hctx;
320ae51fe   Jens Axboe   blk-mq: new multi...
1013

320ae51fe   Jens Axboe   blk-mq: new multi...
1014
  	hctx = q->mq_ops->map_queue(q, ctx->cpu);
a57a178a4   Christoph Hellwig   blk-mq: avoid inf...
1015
1016
1017
  	spin_lock(&ctx->lock);
  	__blk_mq_insert_request(hctx, rq, at_head);
  	spin_unlock(&ctx->lock);
320ae51fe   Jens Axboe   blk-mq: new multi...
1018

320ae51fe   Jens Axboe   blk-mq: new multi...
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
  	if (run_queue)
  		blk_mq_run_hw_queue(hctx, async);
  }
  
  static void blk_mq_insert_requests(struct request_queue *q,
  				     struct blk_mq_ctx *ctx,
  				     struct list_head *list,
  				     int depth,
  				     bool from_schedule)
  
  {
  	struct blk_mq_hw_ctx *hctx;
320ae51fe   Jens Axboe   blk-mq: new multi...
1031
1032
  
  	trace_block_unplug(q, depth, !from_schedule);
320ae51fe   Jens Axboe   blk-mq: new multi...
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
  	hctx = q->mq_ops->map_queue(q, ctx->cpu);
  
  	/*
  	 * preemption doesn't flush plug list, so it's possible ctx->cpu is
  	 * offline now
  	 */
  	spin_lock(&ctx->lock);
  	while (!list_empty(list)) {
  		struct request *rq;
  
  		rq = list_first_entry(list, struct request, queuelist);
e57690fe0   Jens Axboe   blk-mq: don't ove...
1044
  		BUG_ON(rq->mq_ctx != ctx);
320ae51fe   Jens Axboe   blk-mq: new multi...
1045
  		list_del_init(&rq->queuelist);
e57690fe0   Jens Axboe   blk-mq: don't ove...
1046
  		__blk_mq_insert_req_list(hctx, rq, false);
320ae51fe   Jens Axboe   blk-mq: new multi...
1047
  	}
cfd0c552a   Ming Lei   blk-mq: mark ctx ...
1048
  	blk_mq_hctx_mark_pending(hctx, ctx);
320ae51fe   Jens Axboe   blk-mq: new multi...
1049
  	spin_unlock(&ctx->lock);
320ae51fe   Jens Axboe   blk-mq: new multi...
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
  	blk_mq_run_hw_queue(hctx, from_schedule);
  }
  
  static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b)
  {
  	struct request *rqa = container_of(a, struct request, queuelist);
  	struct request *rqb = container_of(b, struct request, queuelist);
  
  	return !(rqa->mq_ctx < rqb->mq_ctx ||
  		 (rqa->mq_ctx == rqb->mq_ctx &&
  		  blk_rq_pos(rqa) < blk_rq_pos(rqb)));
  }
  
  void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  {
  	struct blk_mq_ctx *this_ctx;
  	struct request_queue *this_q;
  	struct request *rq;
  	LIST_HEAD(list);
  	LIST_HEAD(ctx_list);
  	unsigned int depth;
  
  	list_splice_init(&plug->mq_list, &list);
  
  	list_sort(NULL, &list, plug_ctx_cmp);
  
  	this_q = NULL;
  	this_ctx = NULL;
  	depth = 0;
  
  	while (!list_empty(&list)) {
  		rq = list_entry_rq(list.next);
  		list_del_init(&rq->queuelist);
  		BUG_ON(!rq->q);
  		if (rq->mq_ctx != this_ctx) {
  			if (this_ctx) {
  				blk_mq_insert_requests(this_q, this_ctx,
  							&ctx_list, depth,
  							from_schedule);
  			}
  
  			this_ctx = rq->mq_ctx;
  			this_q = rq->q;
  			depth = 0;
  		}
  
  		depth++;
  		list_add_tail(&rq->queuelist, &ctx_list);
  	}
  
  	/*
  	 * If 'this_ctx' is set, we know we have entries to complete
  	 * on 'ctx_list'. Do those.
  	 */
  	if (this_ctx) {
  		blk_mq_insert_requests(this_q, this_ctx, &ctx_list, depth,
  				       from_schedule);
  	}
  }
  
  static void blk_mq_bio_to_request(struct request *rq, struct bio *bio)
  {
  	init_request_from_bio(rq, bio);
4b570521b   Jens Axboe   blk-mq: request i...
1113

a21f2a3ec   Michael Callahan   block: Minor blk_...
1114
  	blk_account_io_start(rq, 1);
320ae51fe   Jens Axboe   blk-mq: new multi...
1115
  }
274a5843f   Jens Axboe   blk-mq: don't all...
1116
1117
1118
1119
1120
  static inline bool hctx_allow_merges(struct blk_mq_hw_ctx *hctx)
  {
  	return (hctx->flags & BLK_MQ_F_SHOULD_MERGE) &&
  		!blk_queue_nomerges(hctx->queue);
  }
07068d5b8   Jens Axboe   blk-mq: split mak...
1121
1122
1123
  static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
  					 struct blk_mq_ctx *ctx,
  					 struct request *rq, struct bio *bio)
320ae51fe   Jens Axboe   blk-mq: new multi...
1124
  {
e18378a60   Ming Lei   blk-mq: check bio...
1125
  	if (!hctx_allow_merges(hctx) || !bio_mergeable(bio)) {
07068d5b8   Jens Axboe   blk-mq: split mak...
1126
1127
1128
1129
1130
1131
1132
  		blk_mq_bio_to_request(rq, bio);
  		spin_lock(&ctx->lock);
  insert_rq:
  		__blk_mq_insert_request(hctx, rq, false);
  		spin_unlock(&ctx->lock);
  		return false;
  	} else {
274a5843f   Jens Axboe   blk-mq: don't all...
1133
  		struct request_queue *q = hctx->queue;
07068d5b8   Jens Axboe   blk-mq: split mak...
1134
1135
1136
1137
1138
  		spin_lock(&ctx->lock);
  		if (!blk_mq_attempt_merge(q, ctx, bio)) {
  			blk_mq_bio_to_request(rq, bio);
  			goto insert_rq;
  		}
320ae51fe   Jens Axboe   blk-mq: new multi...
1139

07068d5b8   Jens Axboe   blk-mq: split mak...
1140
1141
1142
  		spin_unlock(&ctx->lock);
  		__blk_mq_free_request(hctx, ctx, rq);
  		return true;
14ec77f35   Nicholas Bellinger   blk-mq: Add bio_i...
1143
  	}
07068d5b8   Jens Axboe   blk-mq: split mak...
1144
  }
14ec77f35   Nicholas Bellinger   blk-mq: Add bio_i...
1145

07068d5b8   Jens Axboe   blk-mq: split mak...
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
  struct blk_map_ctx {
  	struct blk_mq_hw_ctx *hctx;
  	struct blk_mq_ctx *ctx;
  };
  
  static struct request *blk_mq_map_request(struct request_queue *q,
  					  struct bio *bio,
  					  struct blk_map_ctx *data)
  {
  	struct blk_mq_hw_ctx *hctx;
  	struct blk_mq_ctx *ctx;
  	struct request *rq;
cc6e3b109   Mike Christie   block: prepare mq...
1158
1159
  	int op = bio_data_dir(bio);
  	int op_flags = 0;
cb96a42cc   Ming Lei   blk-mq: fix sched...
1160
  	struct blk_mq_alloc_data alloc_data;
320ae51fe   Jens Axboe   blk-mq: new multi...
1161

3ef28e83a   Dan Williams   block: generic re...
1162
  	blk_queue_enter_live(q);
320ae51fe   Jens Axboe   blk-mq: new multi...
1163
1164
  	ctx = blk_mq_get_ctx(q);
  	hctx = q->mq_ops->map_queue(q, ctx->cpu);
1eff9d322   Jens Axboe   block: rename bio...
1165
  	if (rw_is_sync(bio_op(bio), bio->bi_opf))
cc6e3b109   Mike Christie   block: prepare mq...
1166
  		op_flags |= REQ_SYNC;
07068d5b8   Jens Axboe   blk-mq: split mak...
1167

cc6e3b109   Mike Christie   block: prepare mq...
1168
  	trace_block_getrq(q, bio, op);
6f3b0e8bc   Christoph Hellwig   blk-mq: add a fla...
1169
  	blk_mq_set_alloc_data(&alloc_data, q, BLK_MQ_REQ_NOWAIT, ctx, hctx);
cc6e3b109   Mike Christie   block: prepare mq...
1170
  	rq = __blk_mq_alloc_request(&alloc_data, op, op_flags);
5dee85772   Christoph Hellwig   blk-mq: initializ...
1171
  	if (unlikely(!rq)) {
793597a6a   Christoph Hellwig   blk-mq: do not us...
1172
  		__blk_mq_run_hw_queue(hctx);
320ae51fe   Jens Axboe   blk-mq: new multi...
1173
  		blk_mq_put_ctx(ctx);
cc6e3b109   Mike Christie   block: prepare mq...
1174
  		trace_block_sleeprq(q, bio, op);
793597a6a   Christoph Hellwig   blk-mq: do not us...
1175
1176
  
  		ctx = blk_mq_get_ctx(q);
320ae51fe   Jens Axboe   blk-mq: new multi...
1177
  		hctx = q->mq_ops->map_queue(q, ctx->cpu);
6f3b0e8bc   Christoph Hellwig   blk-mq: add a fla...
1178
  		blk_mq_set_alloc_data(&alloc_data, q, 0, ctx, hctx);
cc6e3b109   Mike Christie   block: prepare mq...
1179
  		rq = __blk_mq_alloc_request(&alloc_data, op, op_flags);
cb96a42cc   Ming Lei   blk-mq: fix sched...
1180
1181
  		ctx = alloc_data.ctx;
  		hctx = alloc_data.hctx;
320ae51fe   Jens Axboe   blk-mq: new multi...
1182
1183
1184
  	}
  
  	hctx->queued++;
07068d5b8   Jens Axboe   blk-mq: split mak...
1185
1186
1187
1188
  	data->hctx = hctx;
  	data->ctx = ctx;
  	return rq;
  }
7b371636f   Jens Axboe   blk-mq: return ta...
1189
  static int blk_mq_direct_issue_request(struct request *rq, blk_qc_t *cookie)
f984df1f0   Shaohua Li   blk-mq: do limite...
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
  {
  	int ret;
  	struct request_queue *q = rq->q;
  	struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q,
  			rq->mq_ctx->cpu);
  	struct blk_mq_queue_data bd = {
  		.rq = rq,
  		.list = NULL,
  		.last = 1
  	};
7b371636f   Jens Axboe   blk-mq: return ta...
1200
  	blk_qc_t new_cookie = blk_tag_to_qc_t(rq->tag, hctx->queue_num);
f984df1f0   Shaohua Li   blk-mq: do limite...
1201
1202
1203
1204
1205
1206
1207
  
  	/*
  	 * For OK queue, we are done. For error, kill it. Any other
  	 * error (busy), just add it to our list as we previously
  	 * would have done
  	 */
  	ret = q->mq_ops->queue_rq(hctx, &bd);
7b371636f   Jens Axboe   blk-mq: return ta...
1208
1209
  	if (ret == BLK_MQ_RQ_QUEUE_OK) {
  		*cookie = new_cookie;
f984df1f0   Shaohua Li   blk-mq: do limite...
1210
  		return 0;
7b371636f   Jens Axboe   blk-mq: return ta...
1211
  	}
f984df1f0   Shaohua Li   blk-mq: do limite...
1212

7b371636f   Jens Axboe   blk-mq: return ta...
1213
1214
1215
1216
1217
1218
1219
  	__blk_mq_requeue_request(rq);
  
  	if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
  		*cookie = BLK_QC_T_NONE;
  		rq->errors = -EIO;
  		blk_mq_end_request(rq, rq->errors);
  		return 0;
f984df1f0   Shaohua Li   blk-mq: do limite...
1220
  	}
7b371636f   Jens Axboe   blk-mq: return ta...
1221
1222
  
  	return -1;
f984df1f0   Shaohua Li   blk-mq: do limite...
1223
  }
07068d5b8   Jens Axboe   blk-mq: split mak...
1224
1225
1226
1227
1228
  /*
   * Multiple hardware queue variant. This will not use per-process plugs,
   * but will attempt to bypass the hctx queueing if we can go straight to
   * hardware for SYNC IO.
   */
dece16353   Jens Axboe   block: change ->m...
1229
  static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
07068d5b8   Jens Axboe   blk-mq: split mak...
1230
  {
1eff9d322   Jens Axboe   block: rename bio...
1231
1232
  	const int is_sync = rw_is_sync(bio_op(bio), bio->bi_opf);
  	const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
07068d5b8   Jens Axboe   blk-mq: split mak...
1233
1234
  	struct blk_map_ctx data;
  	struct request *rq;
f984df1f0   Shaohua Li   blk-mq: do limite...
1235
1236
  	unsigned int request_count = 0;
  	struct blk_plug *plug;
5b3f341f0   Shaohua Li   blk-mq: make plug...
1237
  	struct request *same_queue_rq = NULL;
7b371636f   Jens Axboe   blk-mq: return ta...
1238
  	blk_qc_t cookie;
07068d5b8   Jens Axboe   blk-mq: split mak...
1239
1240
1241
1242
  
  	blk_queue_bounce(q, &bio);
  
  	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
4246a0b63   Christoph Hellwig   block: add a bi_e...
1243
  		bio_io_error(bio);
dece16353   Jens Axboe   block: change ->m...
1244
  		return BLK_QC_T_NONE;
07068d5b8   Jens Axboe   blk-mq: split mak...
1245
  	}
54efd50bf   Kent Overstreet   block: make gener...
1246
  	blk_queue_split(q, &bio, q->bio_split);
87c279e61   Omar Sandoval   blk-mq: really fi...
1247
1248
1249
  	if (!is_flush_fua && !blk_queue_nomerges(q) &&
  	    blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
  		return BLK_QC_T_NONE;
f984df1f0   Shaohua Li   blk-mq: do limite...
1250

07068d5b8   Jens Axboe   blk-mq: split mak...
1251
1252
  	rq = blk_mq_map_request(q, bio, &data);
  	if (unlikely(!rq))
dece16353   Jens Axboe   block: change ->m...
1253
  		return BLK_QC_T_NONE;
07068d5b8   Jens Axboe   blk-mq: split mak...
1254

7b371636f   Jens Axboe   blk-mq: return ta...
1255
  	cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
07068d5b8   Jens Axboe   blk-mq: split mak...
1256
1257
1258
1259
1260
1261
  
  	if (unlikely(is_flush_fua)) {
  		blk_mq_bio_to_request(rq, bio);
  		blk_insert_flush(rq);
  		goto run_queue;
  	}
f984df1f0   Shaohua Li   blk-mq: do limite...
1262
  	plug = current->plug;
e167dfb53   Jens Axboe   blk-mq: add BLK_M...
1263
1264
1265
1266
1267
  	/*
  	 * If the driver supports defer issued based on 'last', then
  	 * queue it up like normal since we can potentially save some
  	 * CPU this way.
  	 */
f984df1f0   Shaohua Li   blk-mq: do limite...
1268
1269
1270
  	if (((plug && !blk_queue_nomerges(q)) || is_sync) &&
  	    !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) {
  		struct request *old_rq = NULL;
07068d5b8   Jens Axboe   blk-mq: split mak...
1271
1272
  
  		blk_mq_bio_to_request(rq, bio);
07068d5b8   Jens Axboe   blk-mq: split mak...
1273
1274
  
  		/*
b094f89ca   Jens Axboe   blk-mq: fix calli...
1275
  		 * We do limited pluging. If the bio can be merged, do that.
f984df1f0   Shaohua Li   blk-mq: do limite...
1276
1277
  		 * Otherwise the existing request in the plug list will be
  		 * issued. So the plug list will have one request at most
07068d5b8   Jens Axboe   blk-mq: split mak...
1278
  		 */
f984df1f0   Shaohua Li   blk-mq: do limite...
1279
  		if (plug) {
5b3f341f0   Shaohua Li   blk-mq: make plug...
1280
1281
  			/*
  			 * The plug list might get flushed before this. If that
b094f89ca   Jens Axboe   blk-mq: fix calli...
1282
1283
1284
  			 * happens, same_queue_rq is invalid and plug list is
  			 * empty
  			 */
5b3f341f0   Shaohua Li   blk-mq: make plug...
1285
1286
  			if (same_queue_rq && !list_empty(&plug->mq_list)) {
  				old_rq = same_queue_rq;
f984df1f0   Shaohua Li   blk-mq: do limite...
1287
  				list_del_init(&old_rq->queuelist);
07068d5b8   Jens Axboe   blk-mq: split mak...
1288
  			}
f984df1f0   Shaohua Li   blk-mq: do limite...
1289
1290
1291
1292
1293
  			list_add_tail(&rq->queuelist, &plug->mq_list);
  		} else /* is_sync */
  			old_rq = rq;
  		blk_mq_put_ctx(data.ctx);
  		if (!old_rq)
7b371636f   Jens Axboe   blk-mq: return ta...
1294
1295
1296
  			goto done;
  		if (!blk_mq_direct_issue_request(old_rq, &cookie))
  			goto done;
f984df1f0   Shaohua Li   blk-mq: do limite...
1297
  		blk_mq_insert_request(old_rq, false, true, true);
7b371636f   Jens Axboe   blk-mq: return ta...
1298
  		goto done;
07068d5b8   Jens Axboe   blk-mq: split mak...
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
  	}
  
  	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
  		/*
  		 * For a SYNC request, send it to the hardware immediately. For
  		 * an ASYNC request, just ensure that we run it later on. The
  		 * latter allows for merging opportunities and more efficient
  		 * dispatching.
  		 */
  run_queue:
  		blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
  	}
07068d5b8   Jens Axboe   blk-mq: split mak...
1311
  	blk_mq_put_ctx(data.ctx);
7b371636f   Jens Axboe   blk-mq: return ta...
1312
1313
  done:
  	return cookie;
07068d5b8   Jens Axboe   blk-mq: split mak...
1314
1315
1316
1317
1318
1319
  }
  
  /*
   * Single hardware queue variant. This will attempt to use any per-process
   * plug for merging and IO deferral.
   */
dece16353   Jens Axboe   block: change ->m...
1320
  static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
07068d5b8   Jens Axboe   blk-mq: split mak...
1321
  {
1eff9d322   Jens Axboe   block: rename bio...
1322
1323
  	const int is_sync = rw_is_sync(bio_op(bio), bio->bi_opf);
  	const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
e6c4438ba   Jeff Moyer   blk-mq: fix plugg...
1324
1325
  	struct blk_plug *plug;
  	unsigned int request_count = 0;
07068d5b8   Jens Axboe   blk-mq: split mak...
1326
1327
  	struct blk_map_ctx data;
  	struct request *rq;
7b371636f   Jens Axboe   blk-mq: return ta...
1328
  	blk_qc_t cookie;
07068d5b8   Jens Axboe   blk-mq: split mak...
1329

07068d5b8   Jens Axboe   blk-mq: split mak...
1330
1331
1332
  	blk_queue_bounce(q, &bio);
  
  	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
4246a0b63   Christoph Hellwig   block: add a bi_e...
1333
  		bio_io_error(bio);
dece16353   Jens Axboe   block: change ->m...
1334
  		return BLK_QC_T_NONE;
07068d5b8   Jens Axboe   blk-mq: split mak...
1335
  	}
54efd50bf   Kent Overstreet   block: make gener...
1336
  	blk_queue_split(q, &bio, q->bio_split);
87c279e61   Omar Sandoval   blk-mq: really fi...
1337
1338
1339
1340
1341
  	if (!is_flush_fua && !blk_queue_nomerges(q)) {
  		if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
  			return BLK_QC_T_NONE;
  	} else
  		request_count = blk_plug_queued_count(q);
07068d5b8   Jens Axboe   blk-mq: split mak...
1342
1343
  
  	rq = blk_mq_map_request(q, bio, &data);
ff87bcec1   Jens Axboe   blk-mq: handle NU...
1344
  	if (unlikely(!rq))
dece16353   Jens Axboe   block: change ->m...
1345
  		return BLK_QC_T_NONE;
320ae51fe   Jens Axboe   blk-mq: new multi...
1346

7b371636f   Jens Axboe   blk-mq: return ta...
1347
  	cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
320ae51fe   Jens Axboe   blk-mq: new multi...
1348
1349
1350
  
  	if (unlikely(is_flush_fua)) {
  		blk_mq_bio_to_request(rq, bio);
320ae51fe   Jens Axboe   blk-mq: new multi...
1351
1352
1353
1354
1355
1356
1357
1358
1359
  		blk_insert_flush(rq);
  		goto run_queue;
  	}
  
  	/*
  	 * A task plug currently exists. Since this is completely lockless,
  	 * utilize that to temporarily store requests until the task is
  	 * either done or scheduled away.
  	 */
e6c4438ba   Jeff Moyer   blk-mq: fix plugg...
1360
1361
1362
  	plug = current->plug;
  	if (plug) {
  		blk_mq_bio_to_request(rq, bio);
676d06077   Ming Lei   blk-mq: fix for t...
1363
  		if (!request_count)
e6c4438ba   Jeff Moyer   blk-mq: fix plugg...
1364
  			trace_block_plug(q);
b094f89ca   Jens Axboe   blk-mq: fix calli...
1365
1366
1367
1368
  
  		blk_mq_put_ctx(data.ctx);
  
  		if (request_count >= BLK_MAX_REQUEST_COUNT) {
e6c4438ba   Jeff Moyer   blk-mq: fix plugg...
1369
1370
  			blk_flush_plug_list(plug, false);
  			trace_block_plug(q);
320ae51fe   Jens Axboe   blk-mq: new multi...
1371
  		}
b094f89ca   Jens Axboe   blk-mq: fix calli...
1372

e6c4438ba   Jeff Moyer   blk-mq: fix plugg...
1373
  		list_add_tail(&rq->queuelist, &plug->mq_list);
7b371636f   Jens Axboe   blk-mq: return ta...
1374
  		return cookie;
320ae51fe   Jens Axboe   blk-mq: new multi...
1375
  	}
07068d5b8   Jens Axboe   blk-mq: split mak...
1376
1377
1378
1379
1380
1381
1382
1383
1384
  	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
  		/*
  		 * For a SYNC request, send it to the hardware immediately. For
  		 * an ASYNC request, just ensure that we run it later on. The
  		 * latter allows for merging opportunities and more efficient
  		 * dispatching.
  		 */
  run_queue:
  		blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
320ae51fe   Jens Axboe   blk-mq: new multi...
1385
  	}
07068d5b8   Jens Axboe   blk-mq: split mak...
1386
  	blk_mq_put_ctx(data.ctx);
7b371636f   Jens Axboe   blk-mq: return ta...
1387
  	return cookie;
320ae51fe   Jens Axboe   blk-mq: new multi...
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
  }
  
  /*
   * Default mapping to a software queue, since we use one per CPU.
   */
  struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, const int cpu)
  {
  	return q->queue_hw_ctx[q->mq_map[cpu]];
  }
  EXPORT_SYMBOL(blk_mq_map_queue);
24d2f9030   Christoph Hellwig   blk-mq: split out...
1398
1399
  static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
  		struct blk_mq_tags *tags, unsigned int hctx_idx)
95363efde   Jens Axboe   blk-mq: allow blk...
1400
  {
e9b267d91   Christoph Hellwig   blk-mq: add ->ini...
1401
  	struct page *page;
320ae51fe   Jens Axboe   blk-mq: new multi...
1402

24d2f9030   Christoph Hellwig   blk-mq: split out...
1403
  	if (tags->rqs && set->ops->exit_request) {
e9b267d91   Christoph Hellwig   blk-mq: add ->ini...
1404
  		int i;
320ae51fe   Jens Axboe   blk-mq: new multi...
1405

24d2f9030   Christoph Hellwig   blk-mq: split out...
1406
1407
  		for (i = 0; i < tags->nr_tags; i++) {
  			if (!tags->rqs[i])
e9b267d91   Christoph Hellwig   blk-mq: add ->ini...
1408
  				continue;
24d2f9030   Christoph Hellwig   blk-mq: split out...
1409
1410
  			set->ops->exit_request(set->driver_data, tags->rqs[i],
  						hctx_idx, i);
a51644054   Jens Axboe   blk-mq: scale dep...
1411
  			tags->rqs[i] = NULL;
e9b267d91   Christoph Hellwig   blk-mq: add ->ini...
1412
  		}
320ae51fe   Jens Axboe   blk-mq: new multi...
1413
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
1414

24d2f9030   Christoph Hellwig   blk-mq: split out...
1415
1416
  	while (!list_empty(&tags->page_list)) {
  		page = list_first_entry(&tags->page_list, struct page, lru);
6753471c0   Dave Hansen   blk-mq: uses page...
1417
  		list_del_init(&page->lru);
f75782e4e   Catalin Marinas   block: kmemleak: ...
1418
1419
1420
1421
1422
  		/*
  		 * Remove kmemleak object previously allocated in
  		 * blk_mq_init_rq_map().
  		 */
  		kmemleak_free(page_address(page));
320ae51fe   Jens Axboe   blk-mq: new multi...
1423
1424
  		__free_pages(page, page->private);
  	}
24d2f9030   Christoph Hellwig   blk-mq: split out...
1425
  	kfree(tags->rqs);
320ae51fe   Jens Axboe   blk-mq: new multi...
1426

24d2f9030   Christoph Hellwig   blk-mq: split out...
1427
  	blk_mq_free_tags(tags);
320ae51fe   Jens Axboe   blk-mq: new multi...
1428
1429
1430
1431
  }
  
  static size_t order_to_size(unsigned int order)
  {
4ca085009   Ming Lei   blk-mq: user (1 <...
1432
  	return (size_t)PAGE_SIZE << order;
320ae51fe   Jens Axboe   blk-mq: new multi...
1433
  }
24d2f9030   Christoph Hellwig   blk-mq: split out...
1434
1435
  static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
  		unsigned int hctx_idx)
320ae51fe   Jens Axboe   blk-mq: new multi...
1436
  {
24d2f9030   Christoph Hellwig   blk-mq: split out...
1437
  	struct blk_mq_tags *tags;
320ae51fe   Jens Axboe   blk-mq: new multi...
1438
1439
  	unsigned int i, j, entries_per_page, max_order = 4;
  	size_t rq_size, left;
24d2f9030   Christoph Hellwig   blk-mq: split out...
1440
  	tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags,
24391c0dc   Shaohua Li   blk-mq: add tag a...
1441
1442
  				set->numa_node,
  				BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
24d2f9030   Christoph Hellwig   blk-mq: split out...
1443
1444
  	if (!tags)
  		return NULL;
320ae51fe   Jens Axboe   blk-mq: new multi...
1445

24d2f9030   Christoph Hellwig   blk-mq: split out...
1446
  	INIT_LIST_HEAD(&tags->page_list);
a51644054   Jens Axboe   blk-mq: scale dep...
1447
1448
1449
  	tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *),
  				 GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
  				 set->numa_node);
24d2f9030   Christoph Hellwig   blk-mq: split out...
1450
1451
1452
1453
  	if (!tags->rqs) {
  		blk_mq_free_tags(tags);
  		return NULL;
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
1454
1455
1456
1457
1458
  
  	/*
  	 * rq_size is the size of the request plus driver payload, rounded
  	 * to the cacheline size
  	 */
24d2f9030   Christoph Hellwig   blk-mq: split out...
1459
  	rq_size = round_up(sizeof(struct request) + set->cmd_size,
320ae51fe   Jens Axboe   blk-mq: new multi...
1460
  				cache_line_size());
24d2f9030   Christoph Hellwig   blk-mq: split out...
1461
  	left = rq_size * set->queue_depth;
320ae51fe   Jens Axboe   blk-mq: new multi...
1462

24d2f9030   Christoph Hellwig   blk-mq: split out...
1463
  	for (i = 0; i < set->queue_depth; ) {
320ae51fe   Jens Axboe   blk-mq: new multi...
1464
1465
1466
1467
  		int this_order = max_order;
  		struct page *page;
  		int to_do;
  		void *p;
b3a834b15   Bartlomiej Zolnierkiewicz   blk-mq: fix undef...
1468
  		while (this_order && left < order_to_size(this_order - 1))
320ae51fe   Jens Axboe   blk-mq: new multi...
1469
1470
1471
  			this_order--;
  
  		do {
a51644054   Jens Axboe   blk-mq: scale dep...
1472
  			page = alloc_pages_node(set->numa_node,
ac2111753   Linus Torvalds   blk-mq: initializ...
1473
  				GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
a51644054   Jens Axboe   blk-mq: scale dep...
1474
  				this_order);
320ae51fe   Jens Axboe   blk-mq: new multi...
1475
1476
1477
1478
1479
1480
1481
1482
1483
  			if (page)
  				break;
  			if (!this_order--)
  				break;
  			if (order_to_size(this_order) < rq_size)
  				break;
  		} while (1);
  
  		if (!page)
24d2f9030   Christoph Hellwig   blk-mq: split out...
1484
  			goto fail;
320ae51fe   Jens Axboe   blk-mq: new multi...
1485
1486
  
  		page->private = this_order;
24d2f9030   Christoph Hellwig   blk-mq: split out...
1487
  		list_add_tail(&page->lru, &tags->page_list);
320ae51fe   Jens Axboe   blk-mq: new multi...
1488
1489
  
  		p = page_address(page);
f75782e4e   Catalin Marinas   block: kmemleak: ...
1490
1491
1492
1493
1494
  		/*
  		 * Allow kmemleak to scan these pages as they contain pointers
  		 * to additional allocations like via ops->init_request().
  		 */
  		kmemleak_alloc(p, order_to_size(this_order), 1, GFP_KERNEL);
320ae51fe   Jens Axboe   blk-mq: new multi...
1495
  		entries_per_page = order_to_size(this_order) / rq_size;
24d2f9030   Christoph Hellwig   blk-mq: split out...
1496
  		to_do = min(entries_per_page, set->queue_depth - i);
320ae51fe   Jens Axboe   blk-mq: new multi...
1497
1498
  		left -= to_do * rq_size;
  		for (j = 0; j < to_do; j++) {
24d2f9030   Christoph Hellwig   blk-mq: split out...
1499
1500
1501
1502
  			tags->rqs[i] = p;
  			if (set->ops->init_request) {
  				if (set->ops->init_request(set->driver_data,
  						tags->rqs[i], hctx_idx, i,
a51644054   Jens Axboe   blk-mq: scale dep...
1503
1504
  						set->numa_node)) {
  					tags->rqs[i] = NULL;
24d2f9030   Christoph Hellwig   blk-mq: split out...
1505
  					goto fail;
a51644054   Jens Axboe   blk-mq: scale dep...
1506
  				}
e9b267d91   Christoph Hellwig   blk-mq: add ->ini...
1507
  			}
320ae51fe   Jens Axboe   blk-mq: new multi...
1508
1509
1510
1511
  			p += rq_size;
  			i++;
  		}
  	}
24d2f9030   Christoph Hellwig   blk-mq: split out...
1512
  	return tags;
320ae51fe   Jens Axboe   blk-mq: new multi...
1513

24d2f9030   Christoph Hellwig   blk-mq: split out...
1514
  fail:
24d2f9030   Christoph Hellwig   blk-mq: split out...
1515
1516
  	blk_mq_free_rq_map(set, tags, hctx_idx);
  	return NULL;
320ae51fe   Jens Axboe   blk-mq: new multi...
1517
  }
1429d7c94   Jens Axboe   blk-mq: switch ct...
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
  static void blk_mq_free_bitmap(struct blk_mq_ctxmap *bitmap)
  {
  	kfree(bitmap->map);
  }
  
  static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node)
  {
  	unsigned int bpw = 8, total, num_maps, i;
  
  	bitmap->bits_per_word = bpw;
  
  	num_maps = ALIGN(nr_cpu_ids, bpw) / bpw;
  	bitmap->map = kzalloc_node(num_maps * sizeof(struct blk_align_bitmap),
  					GFP_KERNEL, node);
  	if (!bitmap->map)
  		return -ENOMEM;
1429d7c94   Jens Axboe   blk-mq: switch ct...
1534
1535
1536
1537
1538
1539
1540
1541
  	total = nr_cpu_ids;
  	for (i = 0; i < num_maps; i++) {
  		bitmap->map[i].depth = min(total, bitmap->bits_per_word);
  		total -= bitmap->map[i].depth;
  	}
  
  	return 0;
  }
e57690fe0   Jens Axboe   blk-mq: don't ove...
1542
1543
1544
1545
1546
  /*
   * 'cpu' is going away. splice any existing rq_list entries from this
   * software queue to the hw queue dispatch list, and ensure that it
   * gets run.
   */
484b4061e   Jens Axboe   blk-mq: save memo...
1547
1548
  static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
  {
484b4061e   Jens Axboe   blk-mq: save memo...
1549
1550
  	struct blk_mq_ctx *ctx;
  	LIST_HEAD(tmp);
e57690fe0   Jens Axboe   blk-mq: don't ove...
1551
  	ctx = __blk_mq_get_ctx(hctx->queue, cpu);
484b4061e   Jens Axboe   blk-mq: save memo...
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
  
  	spin_lock(&ctx->lock);
  	if (!list_empty(&ctx->rq_list)) {
  		list_splice_init(&ctx->rq_list, &tmp);
  		blk_mq_hctx_clear_pending(hctx, ctx);
  	}
  	spin_unlock(&ctx->lock);
  
  	if (list_empty(&tmp))
  		return NOTIFY_OK;
e57690fe0   Jens Axboe   blk-mq: don't ove...
1562
1563
1564
  	spin_lock(&hctx->lock);
  	list_splice_tail_init(&tmp, &hctx->dispatch);
  	spin_unlock(&hctx->lock);
484b4061e   Jens Axboe   blk-mq: save memo...
1565
1566
  
  	blk_mq_run_hw_queue(hctx, true);
484b4061e   Jens Axboe   blk-mq: save memo...
1567
1568
  	return NOTIFY_OK;
  }
484b4061e   Jens Axboe   blk-mq: save memo...
1569
1570
1571
1572
1573
1574
1575
  static int blk_mq_hctx_notify(void *data, unsigned long action,
  			      unsigned int cpu)
  {
  	struct blk_mq_hw_ctx *hctx = data;
  
  	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
  		return blk_mq_hctx_cpu_offline(hctx, cpu);
2a34c0872   Ming Lei   blk-mq: fix CPU h...
1576
1577
1578
1579
1580
  
  	/*
  	 * In case of CPU online, tags may be reallocated
  	 * in blk_mq_map_swqueue() after mapping is updated.
  	 */
484b4061e   Jens Axboe   blk-mq: save memo...
1581
1582
1583
  
  	return NOTIFY_OK;
  }
c3b4afca7   Ming Lei   blk-mq: free hctx...
1584
  /* hctx->ctxs will be freed in queue's release handler */
08e98fc60   Ming Lei   blk-mq: handle fa...
1585
1586
1587
1588
  static void blk_mq_exit_hctx(struct request_queue *q,
  		struct blk_mq_tag_set *set,
  		struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
  {
f70ced091   Ming Lei   blk-mq: support p...
1589
  	unsigned flush_start_tag = set->queue_depth;
08e98fc60   Ming Lei   blk-mq: handle fa...
1590
  	blk_mq_tag_idle(hctx);
f70ced091   Ming Lei   blk-mq: support p...
1591
1592
1593
1594
  	if (set->ops->exit_request)
  		set->ops->exit_request(set->driver_data,
  				       hctx->fq->flush_rq, hctx_idx,
  				       flush_start_tag + hctx_idx);
08e98fc60   Ming Lei   blk-mq: handle fa...
1595
1596
1597
1598
  	if (set->ops->exit_hctx)
  		set->ops->exit_hctx(hctx, hctx_idx);
  
  	blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
f70ced091   Ming Lei   blk-mq: support p...
1599
  	blk_free_flush_queue(hctx->fq);
08e98fc60   Ming Lei   blk-mq: handle fa...
1600
1601
  	blk_mq_free_bitmap(&hctx->ctx_map);
  }
624dbe475   Ming Lei   blk-mq: avoid cod...
1602
1603
1604
1605
1606
1607
1608
1609
1610
  static void blk_mq_exit_hw_queues(struct request_queue *q,
  		struct blk_mq_tag_set *set, int nr_queue)
  {
  	struct blk_mq_hw_ctx *hctx;
  	unsigned int i;
  
  	queue_for_each_hw_ctx(q, hctx, i) {
  		if (i == nr_queue)
  			break;
08e98fc60   Ming Lei   blk-mq: handle fa...
1611
  		blk_mq_exit_hctx(q, set, hctx, i);
624dbe475   Ming Lei   blk-mq: avoid cod...
1612
  	}
624dbe475   Ming Lei   blk-mq: avoid cod...
1613
1614
1615
1616
1617
1618
1619
  }
  
  static void blk_mq_free_hw_queues(struct request_queue *q,
  		struct blk_mq_tag_set *set)
  {
  	struct blk_mq_hw_ctx *hctx;
  	unsigned int i;
e09aae7ed   Ming Lei   blk-mq: release m...
1620
  	queue_for_each_hw_ctx(q, hctx, i)
624dbe475   Ming Lei   blk-mq: avoid cod...
1621
  		free_cpumask_var(hctx->cpumask);
624dbe475   Ming Lei   blk-mq: avoid cod...
1622
  }
08e98fc60   Ming Lei   blk-mq: handle fa...
1623
1624
1625
  static int blk_mq_init_hctx(struct request_queue *q,
  		struct blk_mq_tag_set *set,
  		struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
320ae51fe   Jens Axboe   blk-mq: new multi...
1626
  {
08e98fc60   Ming Lei   blk-mq: handle fa...
1627
  	int node;
f70ced091   Ming Lei   blk-mq: support p...
1628
  	unsigned flush_start_tag = set->queue_depth;
08e98fc60   Ming Lei   blk-mq: handle fa...
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
  
  	node = hctx->numa_node;
  	if (node == NUMA_NO_NODE)
  		node = hctx->numa_node = set->numa_node;
  
  	INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
  	INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn);
  	spin_lock_init(&hctx->lock);
  	INIT_LIST_HEAD(&hctx->dispatch);
  	hctx->queue = q;
  	hctx->queue_num = hctx_idx;
2404e607a   Jeff Moyer   blk-mq: avoid exc...
1640
  	hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;
08e98fc60   Ming Lei   blk-mq: handle fa...
1641
1642
1643
1644
1645
1646
  
  	blk_mq_init_cpu_notifier(&hctx->cpu_notifier,
  					blk_mq_hctx_notify, hctx);
  	blk_mq_register_cpu_notifier(&hctx->cpu_notifier);
  
  	hctx->tags = set->tags[hctx_idx];
320ae51fe   Jens Axboe   blk-mq: new multi...
1647
1648
  
  	/*
08e98fc60   Ming Lei   blk-mq: handle fa...
1649
1650
  	 * Allocate space for all possible cpus to avoid allocation at
  	 * runtime
320ae51fe   Jens Axboe   blk-mq: new multi...
1651
  	 */
08e98fc60   Ming Lei   blk-mq: handle fa...
1652
1653
1654
1655
  	hctx->ctxs = kmalloc_node(nr_cpu_ids * sizeof(void *),
  					GFP_KERNEL, node);
  	if (!hctx->ctxs)
  		goto unregister_cpu_notifier;
320ae51fe   Jens Axboe   blk-mq: new multi...
1656

08e98fc60   Ming Lei   blk-mq: handle fa...
1657
1658
  	if (blk_mq_alloc_bitmap(&hctx->ctx_map, node))
  		goto free_ctxs;
320ae51fe   Jens Axboe   blk-mq: new multi...
1659

08e98fc60   Ming Lei   blk-mq: handle fa...
1660
  	hctx->nr_ctx = 0;
320ae51fe   Jens Axboe   blk-mq: new multi...
1661

08e98fc60   Ming Lei   blk-mq: handle fa...
1662
1663
1664
  	if (set->ops->init_hctx &&
  	    set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
  		goto free_bitmap;
320ae51fe   Jens Axboe   blk-mq: new multi...
1665

f70ced091   Ming Lei   blk-mq: support p...
1666
1667
1668
  	hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
  	if (!hctx->fq)
  		goto exit_hctx;
320ae51fe   Jens Axboe   blk-mq: new multi...
1669

f70ced091   Ming Lei   blk-mq: support p...
1670
1671
1672
1673
1674
  	if (set->ops->init_request &&
  	    set->ops->init_request(set->driver_data,
  				   hctx->fq->flush_rq, hctx_idx,
  				   flush_start_tag + hctx_idx, node))
  		goto free_fq;
320ae51fe   Jens Axboe   blk-mq: new multi...
1675

08e98fc60   Ming Lei   blk-mq: handle fa...
1676
  	return 0;
320ae51fe   Jens Axboe   blk-mq: new multi...
1677

f70ced091   Ming Lei   blk-mq: support p...
1678
1679
1680
1681
1682
   free_fq:
  	kfree(hctx->fq);
   exit_hctx:
  	if (set->ops->exit_hctx)
  		set->ops->exit_hctx(hctx, hctx_idx);
08e98fc60   Ming Lei   blk-mq: handle fa...
1683
1684
1685
1686
1687
1688
   free_bitmap:
  	blk_mq_free_bitmap(&hctx->ctx_map);
   free_ctxs:
  	kfree(hctx->ctxs);
   unregister_cpu_notifier:
  	blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
320ae51fe   Jens Axboe   blk-mq: new multi...
1689

08e98fc60   Ming Lei   blk-mq: handle fa...
1690
1691
  	return -1;
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
1692

320ae51fe   Jens Axboe   blk-mq: new multi...
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
  static void blk_mq_init_cpu_queues(struct request_queue *q,
  				   unsigned int nr_hw_queues)
  {
  	unsigned int i;
  
  	for_each_possible_cpu(i) {
  		struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
  		struct blk_mq_hw_ctx *hctx;
  
  		memset(__ctx, 0, sizeof(*__ctx));
  		__ctx->cpu = i;
  		spin_lock_init(&__ctx->lock);
  		INIT_LIST_HEAD(&__ctx->rq_list);
  		__ctx->queue = q;
  
  		/* If the cpu isn't online, the cpu is mapped to first hctx */
320ae51fe   Jens Axboe   blk-mq: new multi...
1709
1710
  		if (!cpu_online(i))
  			continue;
e4043dcf3   Jens Axboe   blk-mq: ensure th...
1711
  		hctx = q->mq_ops->map_queue(q, i);
e4043dcf3   Jens Axboe   blk-mq: ensure th...
1712

320ae51fe   Jens Axboe   blk-mq: new multi...
1713
1714
1715
1716
1717
  		/*
  		 * Set local node, IFF we have more than one hw queue. If
  		 * not, we remain on the home node of the device
  		 */
  		if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
bffed4571   Raghavendra K T   blk-mq: Avoid mem...
1718
  			hctx->numa_node = local_memory_node(cpu_to_node(i));
320ae51fe   Jens Axboe   blk-mq: new multi...
1719
1720
  	}
  }
5778322e6   Akinobu Mita   blk-mq: avoid ins...
1721
1722
  static void blk_mq_map_swqueue(struct request_queue *q,
  			       const struct cpumask *online_mask)
320ae51fe   Jens Axboe   blk-mq: new multi...
1723
1724
1725
1726
  {
  	unsigned int i;
  	struct blk_mq_hw_ctx *hctx;
  	struct blk_mq_ctx *ctx;
2a34c0872   Ming Lei   blk-mq: fix CPU h...
1727
  	struct blk_mq_tag_set *set = q->tag_set;
320ae51fe   Jens Axboe   blk-mq: new multi...
1728

60de074ba   Akinobu Mita   blk-mq: fix deadl...
1729
1730
1731
1732
  	/*
  	 * Avoid others reading imcomplete hctx->cpumask through sysfs
  	 */
  	mutex_lock(&q->sysfs_lock);
320ae51fe   Jens Axboe   blk-mq: new multi...
1733
  	queue_for_each_hw_ctx(q, hctx, i) {
e4043dcf3   Jens Axboe   blk-mq: ensure th...
1734
  		cpumask_clear(hctx->cpumask);
320ae51fe   Jens Axboe   blk-mq: new multi...
1735
1736
1737
1738
1739
1740
  		hctx->nr_ctx = 0;
  	}
  
  	/*
  	 * Map software to hardware queues
  	 */
897bb0c7f   Thomas Gleixner   blk-mq: Use prope...
1741
  	for_each_possible_cpu(i) {
320ae51fe   Jens Axboe   blk-mq: new multi...
1742
  		/* If the cpu isn't online, the cpu is mapped to first hctx */
5778322e6   Akinobu Mita   blk-mq: avoid ins...
1743
  		if (!cpumask_test_cpu(i, online_mask))
e4043dcf3   Jens Axboe   blk-mq: ensure th...
1744
  			continue;
897bb0c7f   Thomas Gleixner   blk-mq: Use prope...
1745
  		ctx = per_cpu_ptr(q->queue_ctx, i);
320ae51fe   Jens Axboe   blk-mq: new multi...
1746
  		hctx = q->mq_ops->map_queue(q, i);
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1747

e4043dcf3   Jens Axboe   blk-mq: ensure th...
1748
  		cpumask_set_cpu(i, hctx->cpumask);
320ae51fe   Jens Axboe   blk-mq: new multi...
1749
1750
1751
  		ctx->index_hw = hctx->nr_ctx;
  		hctx->ctxs[hctx->nr_ctx++] = ctx;
  	}
506e931f9   Jens Axboe   blk-mq: add basic...
1752

60de074ba   Akinobu Mita   blk-mq: fix deadl...
1753
  	mutex_unlock(&q->sysfs_lock);
506e931f9   Jens Axboe   blk-mq: add basic...
1754
  	queue_for_each_hw_ctx(q, hctx, i) {
889fa31f0   Chong Yuan   blk-mq: reduce un...
1755
  		struct blk_mq_ctxmap *map = &hctx->ctx_map;
484b4061e   Jens Axboe   blk-mq: save memo...
1756
  		/*
a68aafa5b   Jens Axboe   blk-mq: correct a...
1757
1758
  		 * If no software queues are mapped to this hardware queue,
  		 * disable it and free the request entries.
484b4061e   Jens Axboe   blk-mq: save memo...
1759
1760
  		 */
  		if (!hctx->nr_ctx) {
484b4061e   Jens Axboe   blk-mq: save memo...
1761
1762
1763
  			if (set->tags[i]) {
  				blk_mq_free_rq_map(set, set->tags[i], i);
  				set->tags[i] = NULL;
484b4061e   Jens Axboe   blk-mq: save memo...
1764
  			}
2a34c0872   Ming Lei   blk-mq: fix CPU h...
1765
  			hctx->tags = NULL;
484b4061e   Jens Axboe   blk-mq: save memo...
1766
1767
  			continue;
  		}
2a34c0872   Ming Lei   blk-mq: fix CPU h...
1768
1769
1770
1771
1772
  		/* unmapped hw queue can be remapped after CPU topo changed */
  		if (!set->tags[i])
  			set->tags[i] = blk_mq_init_rq_map(set, i);
  		hctx->tags = set->tags[i];
  		WARN_ON(!hctx->tags);
e0e827b9f   Raghavendra K T   blk-mq: Reuse har...
1773
  		cpumask_copy(hctx->tags->cpumask, hctx->cpumask);
484b4061e   Jens Axboe   blk-mq: save memo...
1774
  		/*
889fa31f0   Chong Yuan   blk-mq: reduce un...
1775
1776
1777
1778
  		 * Set the map size to the number of mapped software queues.
  		 * This is more accurate and more efficient than looping
  		 * over all possibly mapped software queues.
  		 */
569fd0ce9   Jens Axboe   blk-mq: fix itera...
1779
  		map->size = DIV_ROUND_UP(hctx->nr_ctx, map->bits_per_word);
889fa31f0   Chong Yuan   blk-mq: reduce un...
1780
1781
  
  		/*
484b4061e   Jens Axboe   blk-mq: save memo...
1782
1783
  		 * Initialize batch roundrobin counts
  		 */
506e931f9   Jens Axboe   blk-mq: add basic...
1784
1785
1786
  		hctx->next_cpu = cpumask_first(hctx->cpumask);
  		hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
1787
  }
2404e607a   Jeff Moyer   blk-mq: avoid exc...
1788
  static void queue_set_hctx_shared(struct request_queue *q, bool shared)
0d2602ca3   Jens Axboe   blk-mq: improve s...
1789
1790
  {
  	struct blk_mq_hw_ctx *hctx;
0d2602ca3   Jens Axboe   blk-mq: improve s...
1791
  	int i;
2404e607a   Jeff Moyer   blk-mq: avoid exc...
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
  	queue_for_each_hw_ctx(q, hctx, i) {
  		if (shared)
  			hctx->flags |= BLK_MQ_F_TAG_SHARED;
  		else
  			hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
  	}
  }
  
  static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared)
  {
  	struct request_queue *q;
0d2602ca3   Jens Axboe   blk-mq: improve s...
1803
1804
1805
  
  	list_for_each_entry(q, &set->tag_list, tag_set_list) {
  		blk_mq_freeze_queue(q);
2404e607a   Jeff Moyer   blk-mq: avoid exc...
1806
  		queue_set_hctx_shared(q, shared);
0d2602ca3   Jens Axboe   blk-mq: improve s...
1807
1808
1809
1810
1811
1812
1813
  		blk_mq_unfreeze_queue(q);
  	}
  }
  
  static void blk_mq_del_queue_tag_set(struct request_queue *q)
  {
  	struct blk_mq_tag_set *set = q->tag_set;
0d2602ca3   Jens Axboe   blk-mq: improve s...
1814
1815
  	mutex_lock(&set->tag_list_lock);
  	list_del_init(&q->tag_set_list);
2404e607a   Jeff Moyer   blk-mq: avoid exc...
1816
1817
1818
1819
1820
1821
  	if (list_is_singular(&set->tag_list)) {
  		/* just transitioned to unshared */
  		set->flags &= ~BLK_MQ_F_TAG_SHARED;
  		/* update existing queue */
  		blk_mq_update_tag_set_depth(set, false);
  	}
0d2602ca3   Jens Axboe   blk-mq: improve s...
1822
  	mutex_unlock(&set->tag_list_lock);
0d2602ca3   Jens Axboe   blk-mq: improve s...
1823
1824
1825
1826
1827
1828
1829
1830
  }
  
  static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
  				     struct request_queue *q)
  {
  	q->tag_set = set;
  
  	mutex_lock(&set->tag_list_lock);
2404e607a   Jeff Moyer   blk-mq: avoid exc...
1831
1832
1833
1834
1835
1836
1837
1838
1839
  
  	/* Check to see if we're transitioning to shared (from 1 to 2 queues). */
  	if (!list_empty(&set->tag_list) && !(set->flags & BLK_MQ_F_TAG_SHARED)) {
  		set->flags |= BLK_MQ_F_TAG_SHARED;
  		/* update existing queue */
  		blk_mq_update_tag_set_depth(set, true);
  	}
  	if (set->flags & BLK_MQ_F_TAG_SHARED)
  		queue_set_hctx_shared(q, true);
0d2602ca3   Jens Axboe   blk-mq: improve s...
1840
  	list_add_tail(&q->tag_set_list, &set->tag_list);
2404e607a   Jeff Moyer   blk-mq: avoid exc...
1841

0d2602ca3   Jens Axboe   blk-mq: improve s...
1842
1843
  	mutex_unlock(&set->tag_list_lock);
  }
e09aae7ed   Ming Lei   blk-mq: release m...
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
  /*
   * It is the actual release handler for mq, but we do it from
   * request queue's release handler for avoiding use-after-free
   * and headache because q->mq_kobj shouldn't have been introduced,
   * but we can't group ctx/kctx kobj without it.
   */
  void blk_mq_release(struct request_queue *q)
  {
  	struct blk_mq_hw_ctx *hctx;
  	unsigned int i;
  
  	/* hctx kobj stays in hctx */
c3b4afca7   Ming Lei   blk-mq: free hctx...
1856
1857
1858
1859
  	queue_for_each_hw_ctx(q, hctx, i) {
  		if (!hctx)
  			continue;
  		kfree(hctx->ctxs);
e09aae7ed   Ming Lei   blk-mq: release m...
1860
  		kfree(hctx);
c3b4afca7   Ming Lei   blk-mq: free hctx...
1861
  	}
e09aae7ed   Ming Lei   blk-mq: release m...
1862

a723bab3d   Akinobu Mita   blk-mq: Fix use a...
1863
1864
  	kfree(q->mq_map);
  	q->mq_map = NULL;
e09aae7ed   Ming Lei   blk-mq: release m...
1865
1866
1867
1868
1869
  	kfree(q->queue_hw_ctx);
  
  	/* ctx kobj stays in queue_ctx */
  	free_percpu(q->queue_ctx);
  }
24d2f9030   Christoph Hellwig   blk-mq: split out...
1870
  struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
320ae51fe   Jens Axboe   blk-mq: new multi...
1871
  {
b62c21b71   Mike Snitzer   blk-mq: add blk_m...
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
  	struct request_queue *uninit_q, *q;
  
  	uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node);
  	if (!uninit_q)
  		return ERR_PTR(-ENOMEM);
  
  	q = blk_mq_init_allocated_queue(set, uninit_q);
  	if (IS_ERR(q))
  		blk_cleanup_queue(uninit_q);
  
  	return q;
  }
  EXPORT_SYMBOL(blk_mq_init_queue);
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1885
1886
  static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
  						struct request_queue *q)
b62c21b71   Mike Snitzer   blk-mq: add blk_m...
1887
  {
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1888
1889
  	int i, j;
  	struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
f14bbe77a   Jens Axboe   blk-mq: pass in s...
1890

868f2f0b7   Keith Busch   blk-mq: dynamic h...
1891
  	blk_mq_sysfs_unregister(q);
24d2f9030   Christoph Hellwig   blk-mq: split out...
1892
  	for (i = 0; i < set->nr_hw_queues; i++) {
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1893
  		int node;
f14bbe77a   Jens Axboe   blk-mq: pass in s...
1894

868f2f0b7   Keith Busch   blk-mq: dynamic h...
1895
1896
1897
1898
  		if (hctxs[i])
  			continue;
  
  		node = blk_mq_hw_queue_to_node(q->mq_map, i);
cdef54dd8   Christoph Hellwig   blk-mq: remove al...
1899
1900
  		hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx),
  					GFP_KERNEL, node);
320ae51fe   Jens Axboe   blk-mq: new multi...
1901
  		if (!hctxs[i])
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1902
  			break;
320ae51fe   Jens Axboe   blk-mq: new multi...
1903

a86073e48   Jens Axboe   blk-mq: allocate ...
1904
  		if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, GFP_KERNEL,
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1905
1906
1907
1908
1909
  						node)) {
  			kfree(hctxs[i]);
  			hctxs[i] = NULL;
  			break;
  		}
e4043dcf3   Jens Axboe   blk-mq: ensure th...
1910

0d2602ca3   Jens Axboe   blk-mq: improve s...
1911
  		atomic_set(&hctxs[i]->nr_active, 0);
f14bbe77a   Jens Axboe   blk-mq: pass in s...
1912
  		hctxs[i]->numa_node = node;
320ae51fe   Jens Axboe   blk-mq: new multi...
1913
  		hctxs[i]->queue_num = i;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1914
1915
1916
1917
1918
1919
1920
1921
  
  		if (blk_mq_init_hctx(q, set, hctxs[i], i)) {
  			free_cpumask_var(hctxs[i]->cpumask);
  			kfree(hctxs[i]);
  			hctxs[i] = NULL;
  			break;
  		}
  		blk_mq_hctx_kobj_init(hctxs[i]);
320ae51fe   Jens Axboe   blk-mq: new multi...
1922
  	}
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
  	for (j = i; j < q->nr_hw_queues; j++) {
  		struct blk_mq_hw_ctx *hctx = hctxs[j];
  
  		if (hctx) {
  			if (hctx->tags) {
  				blk_mq_free_rq_map(set, hctx->tags, j);
  				set->tags[j] = NULL;
  			}
  			blk_mq_exit_hctx(q, set, hctx, j);
  			free_cpumask_var(hctx->cpumask);
  			kobject_put(&hctx->kobj);
  			kfree(hctx->ctxs);
  			kfree(hctx);
  			hctxs[j] = NULL;
  
  		}
  	}
  	q->nr_hw_queues = i;
  	blk_mq_sysfs_register(q);
  }
  
  struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
  						  struct request_queue *q)
  {
668416721   Ming Lei   blk-mq: mark requ...
1947
1948
  	/* mark the queue as mq asap */
  	q->mq_ops = set->ops;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1949
1950
  	q->queue_ctx = alloc_percpu(struct blk_mq_ctx);
  	if (!q->queue_ctx)
c7de57263   Ming Lin   blk-mq: clear q->...
1951
  		goto err_exit;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
  
  	q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)),
  						GFP_KERNEL, set->numa_node);
  	if (!q->queue_hw_ctx)
  		goto err_percpu;
  
  	q->mq_map = blk_mq_make_queue_map(set);
  	if (!q->mq_map)
  		goto err_map;
  
  	blk_mq_realloc_hw_ctxs(set, q);
  	if (!q->nr_hw_queues)
  		goto err_hctxs;
320ae51fe   Jens Axboe   blk-mq: new multi...
1965

287922eb0   Christoph Hellwig   block: defer time...
1966
  	INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
e56f698bd   Ming Lei   blk-mq: set defau...
1967
  	blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
320ae51fe   Jens Axboe   blk-mq: new multi...
1968
1969
  
  	q->nr_queues = nr_cpu_ids;
320ae51fe   Jens Axboe   blk-mq: new multi...
1970

94eddfbea   Jens Axboe   blk-mq: ensure th...
1971
  	q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
320ae51fe   Jens Axboe   blk-mq: new multi...
1972

05f1dd531   Jens Axboe   block: add queue ...
1973
1974
  	if (!(set->flags & BLK_MQ_F_SG_MERGE))
  		q->queue_flags |= 1 << QUEUE_FLAG_NO_SG_MERGE;
1be036e94   Christoph Hellwig   blk-mq: initializ...
1975
  	q->sg_reserved_size = INT_MAX;
6fca6a611   Christoph Hellwig   blk-mq: add helpe...
1976
1977
1978
  	INIT_WORK(&q->requeue_work, blk_mq_requeue_work);
  	INIT_LIST_HEAD(&q->requeue_list);
  	spin_lock_init(&q->requeue_lock);
07068d5b8   Jens Axboe   blk-mq: split mak...
1979
1980
1981
1982
  	if (q->nr_hw_queues > 1)
  		blk_queue_make_request(q, blk_mq_make_request);
  	else
  		blk_queue_make_request(q, blk_sq_make_request);
eba717682   Jens Axboe   blk-mq: initializ...
1983
1984
1985
1986
  	/*
  	 * Do this after blk_queue_make_request() overrides it...
  	 */
  	q->nr_requests = set->queue_depth;
24d2f9030   Christoph Hellwig   blk-mq: split out...
1987
1988
  	if (set->ops->complete)
  		blk_queue_softirq_done(q, set->ops->complete);
30a91cb4e   Christoph Hellwig   blk-mq: rework I/...
1989

24d2f9030   Christoph Hellwig   blk-mq: split out...
1990
  	blk_mq_init_cpu_queues(q, set->nr_hw_queues);
320ae51fe   Jens Axboe   blk-mq: new multi...
1991

5778322e6   Akinobu Mita   blk-mq: avoid ins...
1992
  	get_online_cpus();
320ae51fe   Jens Axboe   blk-mq: new multi...
1993
  	mutex_lock(&all_q_mutex);
320ae51fe   Jens Axboe   blk-mq: new multi...
1994

4593fdbe7   Akinobu Mita   blk-mq: fix sysfs...
1995
  	list_add_tail(&q->all_q_node, &all_q_list);
0d2602ca3   Jens Axboe   blk-mq: improve s...
1996
  	blk_mq_add_queue_tag_set(set, q);
5778322e6   Akinobu Mita   blk-mq: avoid ins...
1997
  	blk_mq_map_swqueue(q, cpu_online_mask);
484b4061e   Jens Axboe   blk-mq: save memo...
1998

4593fdbe7   Akinobu Mita   blk-mq: fix sysfs...
1999
  	mutex_unlock(&all_q_mutex);
5778322e6   Akinobu Mita   blk-mq: avoid ins...
2000
  	put_online_cpus();
4593fdbe7   Akinobu Mita   blk-mq: fix sysfs...
2001

320ae51fe   Jens Axboe   blk-mq: new multi...
2002
  	return q;
18741986a   Christoph Hellwig   blk-mq: rework fl...
2003

320ae51fe   Jens Axboe   blk-mq: new multi...
2004
  err_hctxs:
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2005
  	kfree(q->mq_map);
f14bbe77a   Jens Axboe   blk-mq: pass in s...
2006
  err_map:
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2007
  	kfree(q->queue_hw_ctx);
320ae51fe   Jens Axboe   blk-mq: new multi...
2008
  err_percpu:
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2009
  	free_percpu(q->queue_ctx);
c7de57263   Ming Lin   blk-mq: clear q->...
2010
2011
  err_exit:
  	q->mq_ops = NULL;
320ae51fe   Jens Axboe   blk-mq: new multi...
2012
2013
  	return ERR_PTR(-ENOMEM);
  }
b62c21b71   Mike Snitzer   blk-mq: add blk_m...
2014
  EXPORT_SYMBOL(blk_mq_init_allocated_queue);
320ae51fe   Jens Axboe   blk-mq: new multi...
2015
2016
2017
  
  void blk_mq_free_queue(struct request_queue *q)
  {
624dbe475   Ming Lei   blk-mq: avoid cod...
2018
  	struct blk_mq_tag_set	*set = q->tag_set;
320ae51fe   Jens Axboe   blk-mq: new multi...
2019

0e6263682   Akinobu Mita   blk-mq: fix q->mq...
2020
2021
2022
  	mutex_lock(&all_q_mutex);
  	list_del_init(&q->all_q_node);
  	mutex_unlock(&all_q_mutex);
0d2602ca3   Jens Axboe   blk-mq: improve s...
2023
  	blk_mq_del_queue_tag_set(q);
624dbe475   Ming Lei   blk-mq: avoid cod...
2024
2025
  	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
  	blk_mq_free_hw_queues(q, set);
320ae51fe   Jens Axboe   blk-mq: new multi...
2026
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
2027
2028
  
  /* Basically redo blk_mq_init_queue with queue frozen */
5778322e6   Akinobu Mita   blk-mq: avoid ins...
2029
2030
  static void blk_mq_queue_reinit(struct request_queue *q,
  				const struct cpumask *online_mask)
320ae51fe   Jens Axboe   blk-mq: new multi...
2031
  {
4ecd4fef3   Christoph Hellwig   block: use an ato...
2032
  	WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth));
320ae51fe   Jens Axboe   blk-mq: new multi...
2033

67aec14ce   Jens Axboe   blk-mq: make the ...
2034
  	blk_mq_sysfs_unregister(q);
5778322e6   Akinobu Mita   blk-mq: avoid ins...
2035
  	blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues, online_mask);
320ae51fe   Jens Axboe   blk-mq: new multi...
2036
2037
2038
2039
2040
2041
  
  	/*
  	 * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe
  	 * we should change hctx numa_node according to new topology (this
  	 * involves free and re-allocate memory, worthy doing?)
  	 */
5778322e6   Akinobu Mita   blk-mq: avoid ins...
2042
  	blk_mq_map_swqueue(q, online_mask);
320ae51fe   Jens Axboe   blk-mq: new multi...
2043

67aec14ce   Jens Axboe   blk-mq: make the ...
2044
  	blk_mq_sysfs_register(q);
320ae51fe   Jens Axboe   blk-mq: new multi...
2045
  }
f618ef7c4   Paul Gortmaker   blk-mq: remove ne...
2046
2047
  static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
  				      unsigned long action, void *hcpu)
320ae51fe   Jens Axboe   blk-mq: new multi...
2048
2049
  {
  	struct request_queue *q;
5778322e6   Akinobu Mita   blk-mq: avoid ins...
2050
2051
2052
2053
2054
2055
2056
  	int cpu = (unsigned long)hcpu;
  	/*
  	 * New online cpumask which is going to be set in this hotplug event.
  	 * Declare this cpumasks as global as cpu-hotplug operation is invoked
  	 * one-by-one and dynamically allocating this could result in a failure.
  	 */
  	static struct cpumask online_new;
320ae51fe   Jens Axboe   blk-mq: new multi...
2057
2058
  
  	/*
5778322e6   Akinobu Mita   blk-mq: avoid ins...
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
  	 * Before hotadded cpu starts handling requests, new mappings must
  	 * be established.  Otherwise, these requests in hw queue might
  	 * never be dispatched.
  	 *
  	 * For example, there is a single hw queue (hctx) and two CPU queues
  	 * (ctx0 for CPU0, and ctx1 for CPU1).
  	 *
  	 * Now CPU1 is just onlined and a request is inserted into
  	 * ctx1->rq_list and set bit0 in pending bitmap as ctx1->index_hw is
  	 * still zero.
  	 *
  	 * And then while running hw queue, flush_busy_ctxs() finds bit0 is
  	 * set in pending bitmap and tries to retrieve requests in
  	 * hctx->ctxs[0]->rq_list.  But htx->ctxs[0] is a pointer to ctx0,
  	 * so the request in ctx1->rq_list is ignored.
320ae51fe   Jens Axboe   blk-mq: new multi...
2074
  	 */
5778322e6   Akinobu Mita   blk-mq: avoid ins...
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
  	switch (action & ~CPU_TASKS_FROZEN) {
  	case CPU_DEAD:
  	case CPU_UP_CANCELED:
  		cpumask_copy(&online_new, cpu_online_mask);
  		break;
  	case CPU_UP_PREPARE:
  		cpumask_copy(&online_new, cpu_online_mask);
  		cpumask_set_cpu(cpu, &online_new);
  		break;
  	default:
320ae51fe   Jens Axboe   blk-mq: new multi...
2085
  		return NOTIFY_OK;
5778322e6   Akinobu Mita   blk-mq: avoid ins...
2086
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
2087
2088
  
  	mutex_lock(&all_q_mutex);
f3af020b9   Tejun Heo   blk-mq: make mq_q...
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
  
  	/*
  	 * We need to freeze and reinit all existing queues.  Freezing
  	 * involves synchronous wait for an RCU grace period and doing it
  	 * one by one may take a long time.  Start freezing all queues in
  	 * one swoop and then wait for the completions so that freezing can
  	 * take place in parallel.
  	 */
  	list_for_each_entry(q, &all_q_list, all_q_node)
  		blk_mq_freeze_queue_start(q);
f054b56c9   Ming Lei   blk-mq: fix race ...
2099
  	list_for_each_entry(q, &all_q_list, all_q_node) {
f3af020b9   Tejun Heo   blk-mq: make mq_q...
2100
  		blk_mq_freeze_queue_wait(q);
f054b56c9   Ming Lei   blk-mq: fix race ...
2101
2102
2103
2104
2105
2106
  		/*
  		 * timeout handler can't touch hw queue during the
  		 * reinitialization
  		 */
  		del_timer_sync(&q->timeout);
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
2107
  	list_for_each_entry(q, &all_q_list, all_q_node)
5778322e6   Akinobu Mita   blk-mq: avoid ins...
2108
  		blk_mq_queue_reinit(q, &online_new);
f3af020b9   Tejun Heo   blk-mq: make mq_q...
2109
2110
2111
  
  	list_for_each_entry(q, &all_q_list, all_q_node)
  		blk_mq_unfreeze_queue(q);
320ae51fe   Jens Axboe   blk-mq: new multi...
2112
2113
2114
  	mutex_unlock(&all_q_mutex);
  	return NOTIFY_OK;
  }
a51644054   Jens Axboe   blk-mq: scale dep...
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
  static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
  {
  	int i;
  
  	for (i = 0; i < set->nr_hw_queues; i++) {
  		set->tags[i] = blk_mq_init_rq_map(set, i);
  		if (!set->tags[i])
  			goto out_unwind;
  	}
  
  	return 0;
  
  out_unwind:
  	while (--i >= 0)
  		blk_mq_free_rq_map(set, set->tags[i], i);
a51644054   Jens Axboe   blk-mq: scale dep...
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
  	return -ENOMEM;
  }
  
  /*
   * Allocate the request maps associated with this tag_set. Note that this
   * may reduce the depth asked for, if memory is tight. set->queue_depth
   * will be updated to reflect the allocated depth.
   */
  static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
  {
  	unsigned int depth;
  	int err;
  
  	depth = set->queue_depth;
  	do {
  		err = __blk_mq_alloc_rq_maps(set);
  		if (!err)
  			break;
  
  		set->queue_depth >>= 1;
  		if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) {
  			err = -ENOMEM;
  			break;
  		}
  	} while (set->queue_depth);
  
  	if (!set->queue_depth || err) {
  		pr_err("blk-mq: failed to allocate request map
  ");
  		return -ENOMEM;
  	}
  
  	if (depth != set->queue_depth)
  		pr_info("blk-mq: reduced tag depth (%u -> %u)
  ",
  						depth, set->queue_depth);
  
  	return 0;
  }
f26cdc853   Keith Busch   blk-mq: Shared ta...
2169
2170
2171
2172
2173
  struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags)
  {
  	return tags->cpumask;
  }
  EXPORT_SYMBOL_GPL(blk_mq_tags_cpumask);
a4391c646   Jens Axboe   blk-mq: bump max ...
2174
2175
2176
2177
2178
2179
  /*
   * Alloc a tag set to be associated with one or more request queues.
   * May fail with EINVAL for various error conditions. May adjust the
   * requested depth down, if if it too large. In that case, the set
   * value will be stored in set->queue_depth.
   */
24d2f9030   Christoph Hellwig   blk-mq: split out...
2180
2181
  int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
  {
205fb5f5b   Bart Van Assche   blk-mq: add blk_m...
2182
  	BUILD_BUG_ON(BLK_MQ_MAX_DEPTH > 1 << BLK_MQ_UNIQUE_TAG_BITS);
24d2f9030   Christoph Hellwig   blk-mq: split out...
2183
2184
  	if (!set->nr_hw_queues)
  		return -EINVAL;
a4391c646   Jens Axboe   blk-mq: bump max ...
2185
  	if (!set->queue_depth)
24d2f9030   Christoph Hellwig   blk-mq: split out...
2186
2187
2188
  		return -EINVAL;
  	if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
  		return -EINVAL;
f9018ac93   Xiaoguang Wang   block: remove red...
2189
  	if (!set->ops->queue_rq || !set->ops->map_queue)
24d2f9030   Christoph Hellwig   blk-mq: split out...
2190
  		return -EINVAL;
a4391c646   Jens Axboe   blk-mq: bump max ...
2191
2192
2193
2194
2195
2196
  	if (set->queue_depth > BLK_MQ_MAX_DEPTH) {
  		pr_info("blk-mq: reduced tag depth to %u
  ",
  			BLK_MQ_MAX_DEPTH);
  		set->queue_depth = BLK_MQ_MAX_DEPTH;
  	}
24d2f9030   Christoph Hellwig   blk-mq: split out...
2197

6637fadf2   Shaohua Li   blk-mq: move the ...
2198
2199
2200
2201
2202
2203
2204
2205
2206
  	/*
  	 * If a crashdump is active, then we are potentially in a very
  	 * memory constrained environment. Limit us to 1 queue and
  	 * 64 tags to prevent using too much memory.
  	 */
  	if (is_kdump_kernel()) {
  		set->nr_hw_queues = 1;
  		set->queue_depth = min(64U, set->queue_depth);
  	}
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2207
2208
2209
2210
2211
  	/*
  	 * There is no use for more h/w queues than cpus.
  	 */
  	if (set->nr_hw_queues > nr_cpu_ids)
  		set->nr_hw_queues = nr_cpu_ids;
6637fadf2   Shaohua Li   blk-mq: move the ...
2212

868f2f0b7   Keith Busch   blk-mq: dynamic h...
2213
  	set->tags = kzalloc_node(nr_cpu_ids * sizeof(struct blk_mq_tags *),
24d2f9030   Christoph Hellwig   blk-mq: split out...
2214
2215
  				 GFP_KERNEL, set->numa_node);
  	if (!set->tags)
a51644054   Jens Axboe   blk-mq: scale dep...
2216
  		return -ENOMEM;
24d2f9030   Christoph Hellwig   blk-mq: split out...
2217

a51644054   Jens Axboe   blk-mq: scale dep...
2218
2219
  	if (blk_mq_alloc_rq_maps(set))
  		goto enomem;
24d2f9030   Christoph Hellwig   blk-mq: split out...
2220

0d2602ca3   Jens Axboe   blk-mq: improve s...
2221
2222
  	mutex_init(&set->tag_list_lock);
  	INIT_LIST_HEAD(&set->tag_list);
24d2f9030   Christoph Hellwig   blk-mq: split out...
2223
  	return 0;
a51644054   Jens Axboe   blk-mq: scale dep...
2224
  enomem:
5676e7b6d   Robert Elliott   blk-mq: cleanup a...
2225
2226
  	kfree(set->tags);
  	set->tags = NULL;
24d2f9030   Christoph Hellwig   blk-mq: split out...
2227
2228
2229
2230
2231
2232
2233
  	return -ENOMEM;
  }
  EXPORT_SYMBOL(blk_mq_alloc_tag_set);
  
  void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
  {
  	int i;
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2234
  	for (i = 0; i < nr_cpu_ids; i++) {
f42d79ab6   Junichi Nomura   blk-mq: fix use-a...
2235
  		if (set->tags[i])
484b4061e   Jens Axboe   blk-mq: save memo...
2236
2237
  			blk_mq_free_rq_map(set, set->tags[i], i);
  	}
981bd189f   Ming Lei   blk-mq: fix leak ...
2238
  	kfree(set->tags);
5676e7b6d   Robert Elliott   blk-mq: cleanup a...
2239
  	set->tags = NULL;
24d2f9030   Christoph Hellwig   blk-mq: split out...
2240
2241
  }
  EXPORT_SYMBOL(blk_mq_free_tag_set);
e3a2b3f93   Jens Axboe   blk-mq: allow cha...
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
  int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
  {
  	struct blk_mq_tag_set *set = q->tag_set;
  	struct blk_mq_hw_ctx *hctx;
  	int i, ret;
  
  	if (!set || nr > set->queue_depth)
  		return -EINVAL;
  
  	ret = 0;
  	queue_for_each_hw_ctx(q, hctx, i) {
e9137d4b9   Keith Busch   blk-mq: Fix NULL ...
2253
2254
  		if (!hctx->tags)
  			continue;
e3a2b3f93   Jens Axboe   blk-mq: allow cha...
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
  		ret = blk_mq_tag_update_depth(hctx->tags, nr);
  		if (ret)
  			break;
  	}
  
  	if (!ret)
  		q->nr_requests = nr;
  
  	return ret;
  }
868f2f0b7   Keith Busch   blk-mq: dynamic h...
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
  void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
  {
  	struct request_queue *q;
  
  	if (nr_hw_queues > nr_cpu_ids)
  		nr_hw_queues = nr_cpu_ids;
  	if (nr_hw_queues < 1 || nr_hw_queues == set->nr_hw_queues)
  		return;
  
  	list_for_each_entry(q, &set->tag_list, tag_set_list)
  		blk_mq_freeze_queue(q);
  
  	set->nr_hw_queues = nr_hw_queues;
  	list_for_each_entry(q, &set->tag_list, tag_set_list) {
  		blk_mq_realloc_hw_ctxs(set, q);
  
  		if (q->nr_hw_queues > 1)
  			blk_queue_make_request(q, blk_mq_make_request);
  		else
  			blk_queue_make_request(q, blk_sq_make_request);
  
  		blk_mq_queue_reinit(q, cpu_online_mask);
  	}
  
  	list_for_each_entry(q, &set->tag_list, tag_set_list)
  		blk_mq_unfreeze_queue(q);
  }
  EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
676141e48   Jens Axboe   blk-mq: don't dum...
2293
2294
2295
2296
2297
2298
2299
2300
2301
  void blk_mq_disable_hotplug(void)
  {
  	mutex_lock(&all_q_mutex);
  }
  
  void blk_mq_enable_hotplug(void)
  {
  	mutex_unlock(&all_q_mutex);
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
2302
2303
  static int __init blk_mq_init(void)
  {
320ae51fe   Jens Axboe   blk-mq: new multi...
2304
  	blk_mq_cpu_init();
add703fda   Tejun Heo   blk-mq: use percp...
2305
  	hotcpu_notifier(blk_mq_queue_reinit_notify, 0);
320ae51fe   Jens Axboe   blk-mq: new multi...
2306
2307
2308
2309
  
  	return 0;
  }
  subsys_initcall(blk_mq_init);