Blame view

block/blk-core.c 48.6 KB
3dcf60bcb   Christoph Hellwig   block: add SPDX t...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3
4
5
6
   * Copyright (C) 1991, 1992 Linus Torvalds
   * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
   * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
   * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
6728cb0e6   Jens Axboe   block: make core ...
7
8
   * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>
   *	-  July2000
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
10
11
12
13
14
   * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001
   */
  
  /*
   * This handles all read/write requests to block devices
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
15
16
17
18
19
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/backing-dev.h>
  #include <linux/bio.h>
  #include <linux/blkdev.h>
320ae51fe   Jens Axboe   blk-mq: new multi...
20
  #include <linux/blk-mq.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
21
22
23
24
25
  #include <linux/highmem.h>
  #include <linux/mm.h>
  #include <linux/kernel_stat.h>
  #include <linux/string.h>
  #include <linux/init.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
26
27
28
29
  #include <linux/completion.h>
  #include <linux/slab.h>
  #include <linux/swap.h>
  #include <linux/writeback.h>
faccbd4b2   Andrew Morton   [PATCH] io-accoun...
30
  #include <linux/task_io_accounting_ops.h>
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
31
  #include <linux/fault-inject.h>
73c101011   Jens Axboe   block: initial pa...
32
  #include <linux/list_sort.h>
e3c78ca52   Tejun Heo   block: reorganize...
33
  #include <linux/delay.h>
aaf7c6806   Tejun Heo   block: fix elvpri...
34
  #include <linux/ratelimit.h>
6c9546675   Lin Ming   block: add runtim...
35
  #include <linux/pm_runtime.h>
eea8f41cc   Tejun Heo   blkcg: move block...
36
  #include <linux/blk-cgroup.h>
54d4e6ab9   Max Gurtovoy   block: centralize...
37
  #include <linux/t10-pi.h>
18fbda91c   Omar Sandoval   block: use same b...
38
  #include <linux/debugfs.h>
30abb3a67   Howard McLauchlan   block: Add should...
39
  #include <linux/bpf.h>
b8e24a930   Johannes Weiner   block: annotate r...
40
  #include <linux/psi.h>
55782138e   Li Zefan   tracing/events: c...
41
42
43
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/block.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
44

8324aa91d   Jens Axboe   block: split tag ...
45
  #include "blk.h"
43a5e4e21   Ming Lei   block: blk-mq: su...
46
  #include "blk-mq.h"
bd166ef18   Jens Axboe   blk-mq-sched: add...
47
  #include "blk-mq-sched.h"
bca6b067b   Bart Van Assche   block: Move power...
48
  #include "blk-pm.h"
c1c80384c   Josef Bacik   block: remove ext...
49
  #include "blk-rq-qos.h"
8324aa91d   Jens Axboe   block: split tag ...
50

18fbda91c   Omar Sandoval   block: use same b...
51
52
53
  #ifdef CONFIG_DEBUG_FS
  struct dentry *blk_debugfs_root;
  #endif
d07335e51   Mike Snitzer   block: Rename "bl...
54
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
b0da3f0da   Jun'ichi Nomura   Add a tracepoint ...
55
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
0a82a8d13   Linus Torvalds   Revert "block: ad...
56
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
3291fa57c   Keith Busch   NVMe: Add tracepo...
57
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_split);
cbae8d45d   NeilBrown   block: export blo...
58
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
0bfc24559   Ingo Molnar   blktrace: port to...
59

a73f730d0   Tejun Heo   block, cfq: move ...
60
  DEFINE_IDA(blk_queue_ida);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
61
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
62
63
   * For queue allocation
   */
6728cb0e6   Jens Axboe   block: make core ...
64
  struct kmem_cache *blk_requestq_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
65
66
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
67
68
   * Controlling structure to kblockd
   */
ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
69
  static struct workqueue_struct *kblockd_workqueue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
70

8814ce8a0   Bart Van Assche   block: Introduce ...
71
72
73
74
75
76
77
  /**
   * blk_queue_flag_set - atomically set a queue flag
   * @flag: flag to be set
   * @q: request queue
   */
  void blk_queue_flag_set(unsigned int flag, struct request_queue *q)
  {
57d74df90   Christoph Hellwig   block: use atomic...
78
  	set_bit(flag, &q->queue_flags);
8814ce8a0   Bart Van Assche   block: Introduce ...
79
80
81
82
83
84
85
86
87
88
  }
  EXPORT_SYMBOL(blk_queue_flag_set);
  
  /**
   * blk_queue_flag_clear - atomically clear a queue flag
   * @flag: flag to be cleared
   * @q: request queue
   */
  void blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
  {
57d74df90   Christoph Hellwig   block: use atomic...
89
  	clear_bit(flag, &q->queue_flags);
8814ce8a0   Bart Van Assche   block: Introduce ...
90
91
92
93
94
95
96
97
98
99
100
101
102
  }
  EXPORT_SYMBOL(blk_queue_flag_clear);
  
  /**
   * blk_queue_flag_test_and_set - atomically test and set a queue flag
   * @flag: flag to be set
   * @q: request queue
   *
   * Returns the previous value of @flag - 0 if the flag was not set and 1 if
   * the flag was already set.
   */
  bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q)
  {
57d74df90   Christoph Hellwig   block: use atomic...
103
  	return test_and_set_bit(flag, &q->queue_flags);
8814ce8a0   Bart Van Assche   block: Introduce ...
104
105
  }
  EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set);
2a4aa30c5   FUJITA Tomonori   block: rename and...
106
  void blk_rq_init(struct request_queue *q, struct request *rq)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
107
  {
1afb20f30   FUJITA Tomonori   block: make rq_in...
108
  	memset(rq, 0, sizeof(*rq));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
109
  	INIT_LIST_HEAD(&rq->queuelist);
63a713867   Jens Axboe   block: fixup rq_i...
110
  	rq->q = q;
a2dec7b36   Tejun Heo   block: hide reque...
111
  	rq->__sector = (sector_t) -1;
2e662b65f   Jens Axboe   [PATCH] elevator:...
112
113
  	INIT_HLIST_NODE(&rq->hash);
  	RB_CLEAR_NODE(&rq->rb_node);
63a713867   Jens Axboe   block: fixup rq_i...
114
  	rq->tag = -1;
bd166ef18   Jens Axboe   blk-mq-sched: add...
115
  	rq->internal_tag = -1;
522a77756   Omar Sandoval   block: consolidat...
116
  	rq->start_time_ns = ktime_get_ns();
09e099d4b   Jerome Marchand   block: fix accoun...
117
  	rq->part = NULL;
b554db147   Josef Bacik   block: init flush...
118
  	refcount_set(&rq->ref, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
119
  }
2a4aa30c5   FUJITA Tomonori   block: rename and...
120
  EXPORT_SYMBOL(blk_rq_init);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
121

e47bc4eda   Chaitanya Kulkarni   block: add centra...
122
123
124
125
126
127
128
129
  #define REQ_OP_NAME(name) [REQ_OP_##name] = #name
  static const char *const blk_op_name[] = {
  	REQ_OP_NAME(READ),
  	REQ_OP_NAME(WRITE),
  	REQ_OP_NAME(FLUSH),
  	REQ_OP_NAME(DISCARD),
  	REQ_OP_NAME(SECURE_ERASE),
  	REQ_OP_NAME(ZONE_RESET),
6e33dbf28   Chaitanya Kulkarni   blk-zoned: implem...
130
  	REQ_OP_NAME(ZONE_RESET_ALL),
e47bc4eda   Chaitanya Kulkarni   block: add centra...
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
  	REQ_OP_NAME(WRITE_SAME),
  	REQ_OP_NAME(WRITE_ZEROES),
  	REQ_OP_NAME(SCSI_IN),
  	REQ_OP_NAME(SCSI_OUT),
  	REQ_OP_NAME(DRV_IN),
  	REQ_OP_NAME(DRV_OUT),
  };
  #undef REQ_OP_NAME
  
  /**
   * blk_op_str - Return string XXX in the REQ_OP_XXX.
   * @op: REQ_OP_XXX.
   *
   * Description: Centralize block layer function to convert REQ_OP_XXX into
   * string format. Useful in the debugging and tracing bio or request. For
   * invalid REQ_OP_XXX it returns string "UNKNOWN".
   */
  inline const char *blk_op_str(unsigned int op)
  {
  	const char *op_str = "UNKNOWN";
  
  	if (op < ARRAY_SIZE(blk_op_name) && blk_op_name[op])
  		op_str = blk_op_name[op];
  
  	return op_str;
  }
  EXPORT_SYMBOL_GPL(blk_op_str);
2a842acab   Christoph Hellwig   block: introduce ...
158
159
160
161
162
163
164
165
166
167
168
169
170
171
  static const struct {
  	int		errno;
  	const char	*name;
  } blk_errors[] = {
  	[BLK_STS_OK]		= { 0,		"" },
  	[BLK_STS_NOTSUPP]	= { -EOPNOTSUPP, "operation not supported" },
  	[BLK_STS_TIMEOUT]	= { -ETIMEDOUT,	"timeout" },
  	[BLK_STS_NOSPC]		= { -ENOSPC,	"critical space allocation" },
  	[BLK_STS_TRANSPORT]	= { -ENOLINK,	"recoverable transport" },
  	[BLK_STS_TARGET]	= { -EREMOTEIO,	"critical target" },
  	[BLK_STS_NEXUS]		= { -EBADE,	"critical nexus" },
  	[BLK_STS_MEDIUM]	= { -ENODATA,	"critical medium" },
  	[BLK_STS_PROTECTION]	= { -EILSEQ,	"protection" },
  	[BLK_STS_RESOURCE]	= { -ENOMEM,	"kernel resource" },
86ff7c2a8   Ming Lei   blk-mq: introduce...
172
  	[BLK_STS_DEV_RESOURCE]	= { -EBUSY,	"device resource" },
03a07c92a   Goldwyn Rodrigues   block: return on ...
173
  	[BLK_STS_AGAIN]		= { -EAGAIN,	"nonblocking retry" },
2a842acab   Christoph Hellwig   block: introduce ...
174

4e4cbee93   Christoph Hellwig   block: switch bio...
175
176
  	/* device mapper special case, should not leak out: */
  	[BLK_STS_DM_REQUEUE]	= { -EREMCHG, "dm internal retry" },
2a842acab   Christoph Hellwig   block: introduce ...
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
  	/* everything else not covered above: */
  	[BLK_STS_IOERR]		= { -EIO,	"I/O" },
  };
  
  blk_status_t errno_to_blk_status(int errno)
  {
  	int i;
  
  	for (i = 0; i < ARRAY_SIZE(blk_errors); i++) {
  		if (blk_errors[i].errno == errno)
  			return (__force blk_status_t)i;
  	}
  
  	return BLK_STS_IOERR;
  }
  EXPORT_SYMBOL_GPL(errno_to_blk_status);
  
  int blk_status_to_errno(blk_status_t status)
  {
  	int idx = (__force int)status;
34bd9c1c4   Bart Van Assche   block: Fix off-by...
197
  	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
2a842acab   Christoph Hellwig   block: introduce ...
198
199
200
201
  		return -EIO;
  	return blk_errors[idx].errno;
  }
  EXPORT_SYMBOL_GPL(blk_status_to_errno);
178cc590e   Christoph Hellwig   block: improve pr...
202
203
  static void print_req_error(struct request *req, blk_status_t status,
  		const char *caller)
2a842acab   Christoph Hellwig   block: introduce ...
204
205
  {
  	int idx = (__force int)status;
34bd9c1c4   Bart Van Assche   block: Fix off-by...
206
  	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
2a842acab   Christoph Hellwig   block: introduce ...
207
  		return;
178cc590e   Christoph Hellwig   block: improve pr...
208
  	printk_ratelimited(KERN_ERR
b0e5168a7   Chaitanya Kulkarni   block: update pri...
209
210
211
  		"%s: %s error, dev %s, sector %llu op 0x%x:(%s) flags 0x%x "
  		"phys_seg %u prio class %u
  ",
178cc590e   Christoph Hellwig   block: improve pr...
212
  		caller, blk_errors[idx].name,
b0e5168a7   Chaitanya Kulkarni   block: update pri...
213
214
215
216
217
  		req->rq_disk ? req->rq_disk->disk_name : "?",
  		blk_rq_pos(req), req_op(req), blk_op_str(req_op(req)),
  		req->cmd_flags & ~REQ_OP_MASK,
  		req->nr_phys_segments,
  		IOPRIO_PRIO_CLASS(req->ioprio));
2a842acab   Christoph Hellwig   block: introduce ...
218
  }
5bb23a688   NeilBrown   Don't decrement b...
219
  static void req_bio_endio(struct request *rq, struct bio *bio,
2a842acab   Christoph Hellwig   block: introduce ...
220
  			  unsigned int nbytes, blk_status_t error)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
221
  {
78d8e58a0   Mike Snitzer   Revert "block, dm...
222
  	if (error)
4e4cbee93   Christoph Hellwig   block: switch bio...
223
  		bio->bi_status = error;
797e7dbbe   Tejun Heo   [BLOCK] reimpleme...
224

e80640213   Christoph Hellwig   block: split out ...
225
  	if (unlikely(rq->rq_flags & RQF_QUIET))
b7c44ed9d   Jens Axboe   block: manipulate...
226
  		bio_set_flag(bio, BIO_QUIET);
08bafc034   Keith Mannthey   block: Supress Bu...
227

f79ea4161   Kent Overstreet   block: Refactor b...
228
  	bio_advance(bio, nbytes);
7ba1ba12e   Martin K. Petersen   block: Block laye...
229

143a87f4c   Tejun Heo   block: improve fl...
230
  	/* don't actually finish bio if it's part of flush sequence */
e80640213   Christoph Hellwig   block: split out ...
231
  	if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
4246a0b63   Christoph Hellwig   block: add a bi_e...
232
  		bio_endio(bio);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
233
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
234

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
235
236
  void blk_dump_rq_flags(struct request *rq, char *msg)
  {
aebf526b5   Christoph Hellwig   block: fold cmd_t...
237
238
239
  	printk(KERN_INFO "%s: dev %s: flags=%llx
  ", msg,
  		rq->rq_disk ? rq->rq_disk->disk_name : "?",
5953316db   Jens Axboe   block: make rq->c...
240
  		(unsigned long long) rq->cmd_flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
241

83096ebf1   Tejun Heo   block: convert to...
242
243
244
245
  	printk(KERN_INFO "  sector %llu, nr/cnr %u/%u
  ",
  	       (unsigned long long)blk_rq_pos(rq),
  	       blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
b4f42e283   Jens Axboe   block: remove str...
246
247
248
  	printk(KERN_INFO "  bio %p, biotail %p, len %u
  ",
  	       rq->bio, rq->biotail, blk_rq_bytes(rq));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
249
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
250
  EXPORT_SYMBOL(blk_dump_rq_flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
251
252
253
254
255
256
257
258
259
  /**
   * blk_sync_queue - cancel any pending callbacks on a queue
   * @q: the queue
   *
   * Description:
   *     The block layer may perform asynchronous callback activity
   *     on a queue, such as calling the unplug function after a timeout.
   *     A block device may call blk_sync_queue to ensure that any
   *     such activity is cancelled, thus allowing it to release resources
59c51591a   Michael Opdenacker   Fix occurrences o...
260
   *     that the callbacks might use. The caller must already have made sure
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
261
262
263
   *     that its ->make_request_fn will not re-add plugging prior to calling
   *     this function.
   *
da5277700   Vivek Goyal   block: Move blk_t...
264
   *     This function does not cancel any asynchronous activity arising
da3dae54e   Masanari Iida   Documentation: Do...
265
   *     out of elevator or throttling code. That would require elevator_exit()
5efd61135   Tejun Heo   blkcg: add blkcg_...
266
   *     and blkcg_exit_queue() to be called with queue lock initialized.
da5277700   Vivek Goyal   block: Move blk_t...
267
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
268
269
270
   */
  void blk_sync_queue(struct request_queue *q)
  {
70ed28b92   Jens Axboe   block: leave the ...
271
  	del_timer_sync(&q->timeout);
4e9b6f208   Bart Van Assche   block: Fix a race...
272
  	cancel_work_sync(&q->timeout_work);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
273
274
275
276
  }
  EXPORT_SYMBOL(blk_sync_queue);
  
  /**
cd84a62e0   Bart Van Assche   block, scsi: Chan...
277
   * blk_set_pm_only - increment pm_only counter
c9254f2dd   Bart Van Assche   block: Add the QU...
278
   * @q: request queue pointer
c9254f2dd   Bart Van Assche   block: Add the QU...
279
   */
cd84a62e0   Bart Van Assche   block, scsi: Chan...
280
  void blk_set_pm_only(struct request_queue *q)
c9254f2dd   Bart Van Assche   block: Add the QU...
281
  {
cd84a62e0   Bart Van Assche   block, scsi: Chan...
282
  	atomic_inc(&q->pm_only);
c9254f2dd   Bart Van Assche   block: Add the QU...
283
  }
cd84a62e0   Bart Van Assche   block, scsi: Chan...
284
  EXPORT_SYMBOL_GPL(blk_set_pm_only);
c9254f2dd   Bart Van Assche   block: Add the QU...
285

cd84a62e0   Bart Van Assche   block, scsi: Chan...
286
  void blk_clear_pm_only(struct request_queue *q)
c9254f2dd   Bart Van Assche   block: Add the QU...
287
  {
cd84a62e0   Bart Van Assche   block, scsi: Chan...
288
289
290
291
292
293
  	int pm_only;
  
  	pm_only = atomic_dec_return(&q->pm_only);
  	WARN_ON_ONCE(pm_only < 0);
  	if (pm_only == 0)
  		wake_up_all(&q->mq_freeze_wq);
c9254f2dd   Bart Van Assche   block: Add the QU...
294
  }
cd84a62e0   Bart Van Assche   block, scsi: Chan...
295
  EXPORT_SYMBOL_GPL(blk_clear_pm_only);
c9254f2dd   Bart Van Assche   block: Add the QU...
296

165125e1e   Jens Axboe   [BLOCK] Get rid o...
297
  void blk_put_queue(struct request_queue *q)
483f4afc4   Al Viro   [PATCH] fix sysfs...
298
299
300
  {
  	kobject_put(&q->kobj);
  }
d86e0e83b   Jens Axboe   block: export blk...
301
  EXPORT_SYMBOL(blk_put_queue);
483f4afc4   Al Viro   [PATCH] fix sysfs...
302

aed3ea94b   Jens Axboe   block: wake up wa...
303
304
  void blk_set_queue_dying(struct request_queue *q)
  {
8814ce8a0   Bart Van Assche   block: Introduce ...
305
  	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
aed3ea94b   Jens Axboe   block: wake up wa...
306

d3cfb2a0a   Ming Lei   block: block new ...
307
308
309
310
311
312
  	/*
  	 * When queue DYING flag is set, we need to block new req
  	 * entering queue, so we call blk_freeze_queue_start() to
  	 * prevent I/O from crossing blk_queue_enter().
  	 */
  	blk_freeze_queue_start(q);
344e9ffcb   Jens Axboe   block: add queue_...
313
  	if (queue_is_mq(q))
aed3ea94b   Jens Axboe   block: wake up wa...
314
  		blk_mq_wake_waiters(q);
055f6e18e   Ming Lei   block: Make q_usa...
315
316
317
  
  	/* Make blk_queue_enter() reexamine the DYING flag. */
  	wake_up_all(&q->mq_freeze_wq);
aed3ea94b   Jens Axboe   block: wake up wa...
318
319
  }
  EXPORT_SYMBOL_GPL(blk_set_queue_dying);
d732580b4   Tejun Heo   block: implement ...
320
  /**
c9a929dde   Tejun Heo   block: fix reques...
321
322
323
   * blk_cleanup_queue - shutdown a request queue
   * @q: request queue to shutdown
   *
c246e80d8   Bart Van Assche   block: Avoid that...
324
325
   * Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and
   * put it.  All future requests will be failed immediately with -ENODEV.
c94a96ac9   Vivek Goyal   block: Initialize...
326
   */
6728cb0e6   Jens Axboe   block: make core ...
327
  void blk_cleanup_queue(struct request_queue *q)
483f4afc4   Al Viro   [PATCH] fix sysfs...
328
  {
3f3299d5c   Bart Van Assche   block: Rename que...
329
  	/* mark @q DYING, no new request or merges will be allowed afterwards */
483f4afc4   Al Viro   [PATCH] fix sysfs...
330
  	mutex_lock(&q->sysfs_lock);
aed3ea94b   Jens Axboe   block: wake up wa...
331
  	blk_set_queue_dying(q);
6ecf23afa   Tejun Heo   block: extend que...
332

57d74df90   Christoph Hellwig   block: use atomic...
333
334
335
  	blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
  	blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
  	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
c9a929dde   Tejun Heo   block: fix reques...
336
  	mutex_unlock(&q->sysfs_lock);
c246e80d8   Bart Van Assche   block: Avoid that...
337
338
  	/*
  	 * Drain all requests queued before DYING marking. Set DEAD flag to
67ed8b738   Bart Van Assche   block: Fix a comm...
339
340
  	 * prevent that blk_mq_run_hw_queues() accesses the hardware queues
  	 * after draining finished.
c246e80d8   Bart Van Assche   block: Avoid that...
341
  	 */
3ef28e83a   Dan Williams   block: generic re...
342
  	blk_freeze_queue(q);
c57cdf7a9   Ming Lei   block: call rq_qo...
343
344
  
  	rq_qos_exit(q);
57d74df90   Christoph Hellwig   block: use atomic...
345
  	blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
c9a929dde   Tejun Heo   block: fix reques...
346

5a48fc147   Dan Williams   block: blk_flush_...
347
348
  	/* for synchronous bio-based driver finish in-flight integrity i/o */
  	blk_flush_integrity();
c9a929dde   Tejun Heo   block: fix reques...
349
  	/* @q won't process any more request, flush async actions */
dc3b17cc8   Jan Kara   block: Use pointe...
350
  	del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer);
c9a929dde   Tejun Heo   block: fix reques...
351
  	blk_sync_queue(q);
344e9ffcb   Jens Axboe   block: add queue_...
352
  	if (queue_is_mq(q))
c7e2d94b3   Ming Lei   blk-mq: free hw q...
353
  		blk_mq_exit_queue(q);
a1ce35fa4   Jens Axboe   block: remove dea...
354

c3e221921   Ming Lei   block: free sched...
355
356
357
358
359
360
361
362
363
364
365
366
  	/*
  	 * In theory, request pool of sched_tags belongs to request queue.
  	 * However, the current implementation requires tag_set for freeing
  	 * requests, so free the pool now.
  	 *
  	 * Queue has become frozen, there can't be any in-queue requests, so
  	 * it is safe to free requests now.
  	 */
  	mutex_lock(&q->sysfs_lock);
  	if (q->elevator)
  		blk_mq_sched_free_requests(q);
  	mutex_unlock(&q->sysfs_lock);
3ef28e83a   Dan Williams   block: generic re...
367
  	percpu_ref_exit(&q->q_usage_counter);
45a9c9d90   Bart Van Assche   blk-mq: Fix a use...
368

c9a929dde   Tejun Heo   block: fix reques...
369
  	/* @q is and will stay empty, shutdown and put */
483f4afc4   Al Viro   [PATCH] fix sysfs...
370
371
  	blk_put_queue(q);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
372
  EXPORT_SYMBOL(blk_cleanup_queue);
165125e1e   Jens Axboe   [BLOCK] Get rid o...
373
  struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
374
  {
6d4696423   Christoph Hellwig   block: remove the...
375
  	return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE);
1946089a1   Christoph Lameter   [PATCH] NUMA awar...
376
377
  }
  EXPORT_SYMBOL(blk_alloc_queue);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
378

3a0a52997   Bart Van Assche   block, scsi: Make...
379
380
381
382
383
  /**
   * blk_queue_enter() - try to increase q->q_usage_counter
   * @q: request queue pointer
   * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PREEMPT
   */
9a95e4ef7   Bart Van Assche   block, nvme: Intr...
384
  int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
3ef28e83a   Dan Williams   block: generic re...
385
  {
cd84a62e0   Bart Van Assche   block, scsi: Chan...
386
  	const bool pm = flags & BLK_MQ_REQ_PREEMPT;
3a0a52997   Bart Van Assche   block, scsi: Make...
387

3ef28e83a   Dan Williams   block: generic re...
388
  	while (true) {
3a0a52997   Bart Van Assche   block, scsi: Make...
389
  		bool success = false;
3ef28e83a   Dan Williams   block: generic re...
390

818e0fa29   Bart Van Assche   block: Change a r...
391
  		rcu_read_lock();
3a0a52997   Bart Van Assche   block, scsi: Make...
392
393
  		if (percpu_ref_tryget_live(&q->q_usage_counter)) {
  			/*
cd84a62e0   Bart Van Assche   block, scsi: Chan...
394
395
396
  			 * The code that increments the pm_only counter is
  			 * responsible for ensuring that that counter is
  			 * globally visible before the queue is unfrozen.
3a0a52997   Bart Van Assche   block, scsi: Make...
397
  			 */
cd84a62e0   Bart Van Assche   block, scsi: Chan...
398
  			if (pm || !blk_queue_pm_only(q)) {
3a0a52997   Bart Van Assche   block, scsi: Make...
399
400
401
402
403
  				success = true;
  			} else {
  				percpu_ref_put(&q->q_usage_counter);
  			}
  		}
818e0fa29   Bart Van Assche   block: Change a r...
404
  		rcu_read_unlock();
3a0a52997   Bart Van Assche   block, scsi: Make...
405
406
  
  		if (success)
3ef28e83a   Dan Williams   block: generic re...
407
  			return 0;
3a0a52997   Bart Van Assche   block, scsi: Make...
408
  		if (flags & BLK_MQ_REQ_NOWAIT)
3ef28e83a   Dan Williams   block: generic re...
409
  			return -EBUSY;
5ed61d3f0   Ming Lei   block: add a read...
410
  		/*
1671d522c   Ming Lei   block: rename blk...
411
  		 * read pair of barrier in blk_freeze_queue_start(),
5ed61d3f0   Ming Lei   block: add a read...
412
  		 * we need to order reading __PERCPU_REF_DEAD flag of
d3cfb2a0a   Ming Lei   block: block new ...
413
414
415
  		 * .q_usage_counter and reading .mq_freeze_depth or
  		 * queue dying flag, otherwise the following wait may
  		 * never return if the two reads are reordered.
5ed61d3f0   Ming Lei   block: add a read...
416
417
  		 */
  		smp_rmb();
1dc3039bc   Alan Jenkins   block: do not use...
418
  		wait_event(q->mq_freeze_wq,
7996a8b55   Bob Liu   blk-mq: fix hang ...
419
  			   (!q->mq_freeze_depth &&
0d25bd072   Bart Van Assche   block: Schedule r...
420
421
  			    (pm || (blk_pm_request_resume(q),
  				    !blk_queue_pm_only(q)))) ||
1dc3039bc   Alan Jenkins   block: do not use...
422
  			   blk_queue_dying(q));
3ef28e83a   Dan Williams   block: generic re...
423
424
  		if (blk_queue_dying(q))
  			return -ENODEV;
3ef28e83a   Dan Williams   block: generic re...
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
  	}
  }
  
  void blk_queue_exit(struct request_queue *q)
  {
  	percpu_ref_put(&q->q_usage_counter);
  }
  
  static void blk_queue_usage_counter_release(struct percpu_ref *ref)
  {
  	struct request_queue *q =
  		container_of(ref, struct request_queue, q_usage_counter);
  
  	wake_up_all(&q->mq_freeze_wq);
  }
bca237a52   Kees Cook   block/laptop_mode...
440
  static void blk_rq_timed_out_timer(struct timer_list *t)
287922eb0   Christoph Hellwig   block: defer time...
441
  {
bca237a52   Kees Cook   block/laptop_mode...
442
  	struct request_queue *q = from_timer(q, t, timeout);
287922eb0   Christoph Hellwig   block: defer time...
443
444
445
  
  	kblockd_schedule_work(&q->timeout_work);
  }
2e3c18d0a   Tetsuo Handa   block: pass no-op...
446
447
448
  static void blk_timeout_work(struct work_struct *work)
  {
  }
498f6650a   Bart Van Assche   block: Fix a race...
449
450
451
452
  /**
   * blk_alloc_queue_node - allocate a request queue
   * @gfp_mask: memory allocation flags
   * @node_id: NUMA node to allocate memory from
498f6650a   Bart Van Assche   block: Fix a race...
453
   */
6d4696423   Christoph Hellwig   block: remove the...
454
  struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
1946089a1   Christoph Lameter   [PATCH] NUMA awar...
455
  {
165125e1e   Jens Axboe   [BLOCK] Get rid o...
456
  	struct request_queue *q;
338aa96d5   Kent Overstreet   block: convert bo...
457
  	int ret;
1946089a1   Christoph Lameter   [PATCH] NUMA awar...
458

8324aa91d   Jens Axboe   block: split tag ...
459
  	q = kmem_cache_alloc_node(blk_requestq_cachep,
94f6030ca   Christoph Lameter   Slab allocators: ...
460
  				gfp_mask | __GFP_ZERO, node_id);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
461
462
  	if (!q)
  		return NULL;
cbf62af35   Christoph Hellwig   block: move initi...
463
  	q->last_merge = NULL;
cbf62af35   Christoph Hellwig   block: move initi...
464

00380a404   Dan Carpenter   block: blk_alloc_...
465
  	q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
a73f730d0   Tejun Heo   block, cfq: move ...
466
  	if (q->id < 0)
3d2936f45   Ming Lei   block: only alloc...
467
  		goto fail_q;
a73f730d0   Tejun Heo   block, cfq: move ...
468

338aa96d5   Kent Overstreet   block: convert bo...
469
470
  	ret = bioset_init(&q->bio_split, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
  	if (ret)
54efd50bf   Kent Overstreet   block: make gener...
471
  		goto fail_id;
d03f6cdc1   Jan Kara   block: Dynamicall...
472
473
474
  	q->backing_dev_info = bdi_alloc_node(gfp_mask, node_id);
  	if (!q->backing_dev_info)
  		goto fail_split;
a83b576c9   Jens Axboe   block: fix stacke...
475
476
477
  	q->stats = blk_alloc_queue_stats();
  	if (!q->stats)
  		goto fail_stats;
b5420237e   Nikolay Borisov   mm: refactor read...
478
  	q->backing_dev_info->ra_pages = VM_READAHEAD_PAGES;
5f5f27228   Jens Axboe   block: ensure bdi...
479
  	q->backing_dev_info->io_pages = VM_READAHEAD_PAGES;
dc3b17cc8   Jan Kara   block: Use pointe...
480
481
  	q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK;
  	q->backing_dev_info->name = "block";
5151412dd   Mike Snitzer   block: initialize...
482
  	q->node = node_id;
0989a025d   Jens Axboe   block: don't over...
483

bca237a52   Kees Cook   block/laptop_mode...
484
485
486
  	timer_setup(&q->backing_dev_info->laptop_mode_wb_timer,
  		    laptop_mode_timer_fn, 0);
  	timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
2e3c18d0a   Tetsuo Handa   block: pass no-op...
487
  	INIT_WORK(&q->timeout_work, blk_timeout_work);
a612fddf0   Tejun Heo   block, cfq: move ...
488
  	INIT_LIST_HEAD(&q->icq_list);
4eef30499   Tejun Heo   blkcg: move per-q...
489
  #ifdef CONFIG_BLK_CGROUP
e8989fae3   Tejun Heo   blkcg: unify blkg...
490
  	INIT_LIST_HEAD(&q->blkg_list);
4eef30499   Tejun Heo   blkcg: move per-q...
491
  #endif
483f4afc4   Al Viro   [PATCH] fix sysfs...
492

8324aa91d   Jens Axboe   block: split tag ...
493
  	kobject_init(&q->kobj, &blk_queue_ktype);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
494

5acb3cc2c   Waiman Long   blktrace: Fix pot...
495
496
497
  #ifdef CONFIG_BLK_DEV_IO_TRACE
  	mutex_init(&q->blk_trace_mutex);
  #endif
483f4afc4   Al Viro   [PATCH] fix sysfs...
498
  	mutex_init(&q->sysfs_lock);
cecf5d87f   Ming Lei   block: split .sys...
499
  	mutex_init(&q->sysfs_dir_lock);
0d945c1f9   Christoph Hellwig   block: remove the...
500
  	spin_lock_init(&q->queue_lock);
c94a96ac9   Vivek Goyal   block: Initialize...
501

320ae51fe   Jens Axboe   blk-mq: new multi...
502
  	init_waitqueue_head(&q->mq_freeze_wq);
7996a8b55   Bob Liu   blk-mq: fix hang ...
503
  	mutex_init(&q->mq_freeze_lock);
320ae51fe   Jens Axboe   blk-mq: new multi...
504

3ef28e83a   Dan Williams   block: generic re...
505
506
507
508
509
510
511
  	/*
  	 * Init percpu_ref in atomic mode so that it's faster to shutdown.
  	 * See blk_register_queue() for details.
  	 */
  	if (percpu_ref_init(&q->q_usage_counter,
  				blk_queue_usage_counter_release,
  				PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
fff4996b7   Mikulas Patocka   blk-core: Fix mem...
512
  		goto fail_bdi;
f51b802c1   Tejun Heo   blkcg: use the us...
513

3ef28e83a   Dan Williams   block: generic re...
514
515
  	if (blkcg_init_queue(q))
  		goto fail_ref;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
516
  	return q;
a73f730d0   Tejun Heo   block, cfq: move ...
517

3ef28e83a   Dan Williams   block: generic re...
518
519
  fail_ref:
  	percpu_ref_exit(&q->q_usage_counter);
fff4996b7   Mikulas Patocka   blk-core: Fix mem...
520
  fail_bdi:
a83b576c9   Jens Axboe   block: fix stacke...
521
522
  	blk_free_queue_stats(q->stats);
  fail_stats:
d03f6cdc1   Jan Kara   block: Dynamicall...
523
  	bdi_put(q->backing_dev_info);
54efd50bf   Kent Overstreet   block: make gener...
524
  fail_split:
338aa96d5   Kent Overstreet   block: convert bo...
525
  	bioset_exit(&q->bio_split);
a73f730d0   Tejun Heo   block, cfq: move ...
526
527
528
529
530
  fail_id:
  	ida_simple_remove(&blk_queue_ida, q->id);
  fail_q:
  	kmem_cache_free(blk_requestq_cachep, q);
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
531
  }
1946089a1   Christoph Lameter   [PATCH] NUMA awar...
532
  EXPORT_SYMBOL(blk_alloc_queue_node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
533

09ac46c42   Tejun Heo   block: misc updat...
534
  bool blk_get_queue(struct request_queue *q)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
535
  {
3f3299d5c   Bart Van Assche   block: Rename que...
536
  	if (likely(!blk_queue_dying(q))) {
09ac46c42   Tejun Heo   block: misc updat...
537
538
  		__blk_get_queue(q);
  		return true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
539
  	}
09ac46c42   Tejun Heo   block: misc updat...
540
  	return false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
541
  }
d86e0e83b   Jens Axboe   block: export blk...
542
  EXPORT_SYMBOL(blk_get_queue);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
543

a1ce35fa4   Jens Axboe   block: remove dea...
544
545
546
547
548
  /**
   * blk_get_request - allocate a request
   * @q: request queue to allocate a request for
   * @op: operation (REQ_OP_*) and REQ_* flags, e.g. REQ_SYNC.
   * @flags: BLK_MQ_REQ_* flags, e.g. BLK_MQ_REQ_NOWAIT.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
549
   */
a1ce35fa4   Jens Axboe   block: remove dea...
550
551
  struct request *blk_get_request(struct request_queue *q, unsigned int op,
  				blk_mq_req_flags_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
552
  {
a1ce35fa4   Jens Axboe   block: remove dea...
553
  	struct request *req;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
554

a1ce35fa4   Jens Axboe   block: remove dea...
555
556
  	WARN_ON_ONCE(op & REQ_NOWAIT);
  	WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PREEMPT));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
557

a1ce35fa4   Jens Axboe   block: remove dea...
558
559
560
  	req = blk_mq_alloc_request(q, op, flags);
  	if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
  		q->mq_ops->initialize_rq_fn(req);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
561

a1ce35fa4   Jens Axboe   block: remove dea...
562
  	return req;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
563
  }
a1ce35fa4   Jens Axboe   block: remove dea...
564
  EXPORT_SYMBOL(blk_get_request);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
565

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
566
567
  void blk_put_request(struct request *req)
  {
a1ce35fa4   Jens Axboe   block: remove dea...
568
  	blk_mq_free_request(req);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
569
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
570
  EXPORT_SYMBOL(blk_put_request);
14ccb66b3   Christoph Hellwig   block: remove the...
571
572
  bool bio_attempt_back_merge(struct request *req, struct bio *bio,
  		unsigned int nr_segs)
73c101011   Jens Axboe   block: initial pa...
573
  {
1eff9d322   Jens Axboe   block: rename bio...
574
  	const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
73c101011   Jens Axboe   block: initial pa...
575

14ccb66b3   Christoph Hellwig   block: remove the...
576
  	if (!ll_back_merge_fn(req, bio, nr_segs))
73c101011   Jens Axboe   block: initial pa...
577
  		return false;
14ccb66b3   Christoph Hellwig   block: remove the...
578
  	trace_block_bio_backmerge(req->q, req, bio);
d3e65ffff   Tejun Heo   block/rq_qos: add...
579
  	rq_qos_merge(req->q, req, bio);
73c101011   Jens Axboe   block: initial pa...
580
581
582
583
584
585
  
  	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
  		blk_rq_set_mixed_merge(req);
  
  	req->biotail->bi_next = bio;
  	req->biotail = bio;
4f024f379   Kent Overstreet   block: Abstract o...
586
  	req->__data_len += bio->bi_iter.bi_size;
73c101011   Jens Axboe   block: initial pa...
587

320ae51fe   Jens Axboe   blk-mq: new multi...
588
  	blk_account_io_start(req, false);
73c101011   Jens Axboe   block: initial pa...
589
590
  	return true;
  }
14ccb66b3   Christoph Hellwig   block: remove the...
591
592
  bool bio_attempt_front_merge(struct request *req, struct bio *bio,
  		unsigned int nr_segs)
73c101011   Jens Axboe   block: initial pa...
593
  {
1eff9d322   Jens Axboe   block: rename bio...
594
  	const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
73c101011   Jens Axboe   block: initial pa...
595

14ccb66b3   Christoph Hellwig   block: remove the...
596
  	if (!ll_front_merge_fn(req, bio, nr_segs))
73c101011   Jens Axboe   block: initial pa...
597
  		return false;
14ccb66b3   Christoph Hellwig   block: remove the...
598
  	trace_block_bio_frontmerge(req->q, req, bio);
d3e65ffff   Tejun Heo   block/rq_qos: add...
599
  	rq_qos_merge(req->q, req, bio);
73c101011   Jens Axboe   block: initial pa...
600
601
602
  
  	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
  		blk_rq_set_mixed_merge(req);
73c101011   Jens Axboe   block: initial pa...
603
604
  	bio->bi_next = req->bio;
  	req->bio = bio;
4f024f379   Kent Overstreet   block: Abstract o...
605
606
  	req->__sector = bio->bi_iter.bi_sector;
  	req->__data_len += bio->bi_iter.bi_size;
73c101011   Jens Axboe   block: initial pa...
607

320ae51fe   Jens Axboe   blk-mq: new multi...
608
  	blk_account_io_start(req, false);
73c101011   Jens Axboe   block: initial pa...
609
610
  	return true;
  }
1e739730c   Christoph Hellwig   block: optionally...
611
612
613
614
615
616
617
618
619
620
  bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
  		struct bio *bio)
  {
  	unsigned short segments = blk_rq_nr_discard_segments(req);
  
  	if (segments >= queue_max_discard_segments(q))
  		goto no_merge;
  	if (blk_rq_sectors(req) + bio_sectors(bio) >
  	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
  		goto no_merge;
d3e65ffff   Tejun Heo   block/rq_qos: add...
621
  	rq_qos_merge(q, req, bio);
1e739730c   Christoph Hellwig   block: optionally...
622
623
624
  	req->biotail->bi_next = bio;
  	req->biotail = bio;
  	req->__data_len += bio->bi_iter.bi_size;
1e739730c   Christoph Hellwig   block: optionally...
625
626
627
628
629
630
631
632
  	req->nr_phys_segments = segments + 1;
  
  	blk_account_io_start(req, false);
  	return true;
  no_merge:
  	req_set_nomerge(q, req);
  	return false;
  }
bd87b5898   Tejun Heo   block: drop @tsk ...
633
  /**
320ae51fe   Jens Axboe   blk-mq: new multi...
634
   * blk_attempt_plug_merge - try to merge with %current's plugged list
bd87b5898   Tejun Heo   block: drop @tsk ...
635
636
   * @q: request_queue new bio is being queued at
   * @bio: new bio being queued
14ccb66b3   Christoph Hellwig   block: remove the...
637
   * @nr_segs: number of segments in @bio
ccc2600b8   Randy Dunlap   block: fix blk-co...
638
639
640
   * @same_queue_rq: pointer to &struct request that gets filled in when
   * another request associated with @q is found on the plug list
   * (optional, may be %NULL)
bd87b5898   Tejun Heo   block: drop @tsk ...
641
642
643
644
645
   *
   * Determine whether @bio being queued on @q can be merged with a request
   * on %current's plugged list.  Returns %true if merge was successful,
   * otherwise %false.
   *
07c2bd373   Tejun Heo   block: don't call...
646
647
648
649
650
651
   * Plugging coalesces IOs from the same issuer for the same purpose without
   * going through @q->queue_lock.  As such it's more of an issuing mechanism
   * than scheduling, and the request, while may have elvpriv data, is not
   * added on the elevator at this point.  In addition, we don't have
   * reliable access to the elevator outside queue lock.  Only check basic
   * merging parameters without querying the elevator.
da41a589f   Robert Elliott   blk-mq: Micro-opt...
652
653
   *
   * Caller must ensure !blk_queue_nomerges(q) beforehand.
73c101011   Jens Axboe   block: initial pa...
654
   */
320ae51fe   Jens Axboe   blk-mq: new multi...
655
  bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
14ccb66b3   Christoph Hellwig   block: remove the...
656
  		unsigned int nr_segs, struct request **same_queue_rq)
73c101011   Jens Axboe   block: initial pa...
657
658
659
  {
  	struct blk_plug *plug;
  	struct request *rq;
92f399c72   Shaohua Li   blk-mq: mq plug l...
660
  	struct list_head *plug_list;
73c101011   Jens Axboe   block: initial pa...
661

b49773e7b   Damien Le Moal   block: Disable wr...
662
  	plug = blk_mq_plug(q, bio);
73c101011   Jens Axboe   block: initial pa...
663
  	if (!plug)
34fe7c054   Christoph Hellwig   block: enumify EL...
664
  		return false;
73c101011   Jens Axboe   block: initial pa...
665

a1ce35fa4   Jens Axboe   block: remove dea...
666
  	plug_list = &plug->mq_list;
92f399c72   Shaohua Li   blk-mq: mq plug l...
667
668
  
  	list_for_each_entry_reverse(rq, plug_list, queuelist) {
34fe7c054   Christoph Hellwig   block: enumify EL...
669
  		bool merged = false;
73c101011   Jens Axboe   block: initial pa...
670

5f0ed774e   Jens Axboe   block: sum reques...
671
  		if (rq->q == q && same_queue_rq) {
5b3f341f0   Shaohua Li   blk-mq: make plug...
672
673
674
675
676
  			/*
  			 * Only blk-mq multiple hardware queues case checks the
  			 * rq in the same queue, there should be only one such
  			 * rq in a queue
  			 **/
5f0ed774e   Jens Axboe   block: sum reques...
677
  			*same_queue_rq = rq;
5b3f341f0   Shaohua Li   blk-mq: make plug...
678
  		}
56ebdaf2f   Shaohua Li   block: simplify f...
679

07c2bd373   Tejun Heo   block: don't call...
680
  		if (rq->q != q || !blk_rq_merge_ok(rq, bio))
73c101011   Jens Axboe   block: initial pa...
681
  			continue;
34fe7c054   Christoph Hellwig   block: enumify EL...
682
683
  		switch (blk_try_merge(rq, bio)) {
  		case ELEVATOR_BACK_MERGE:
14ccb66b3   Christoph Hellwig   block: remove the...
684
  			merged = bio_attempt_back_merge(rq, bio, nr_segs);
34fe7c054   Christoph Hellwig   block: enumify EL...
685
686
  			break;
  		case ELEVATOR_FRONT_MERGE:
14ccb66b3   Christoph Hellwig   block: remove the...
687
  			merged = bio_attempt_front_merge(rq, bio, nr_segs);
34fe7c054   Christoph Hellwig   block: enumify EL...
688
  			break;
1e739730c   Christoph Hellwig   block: optionally...
689
690
691
  		case ELEVATOR_DISCARD_MERGE:
  			merged = bio_attempt_discard_merge(q, rq, bio);
  			break;
34fe7c054   Christoph Hellwig   block: enumify EL...
692
693
  		default:
  			break;
73c101011   Jens Axboe   block: initial pa...
694
  		}
34fe7c054   Christoph Hellwig   block: enumify EL...
695
696
697
  
  		if (merged)
  			return true;
73c101011   Jens Axboe   block: initial pa...
698
  	}
34fe7c054   Christoph Hellwig   block: enumify EL...
699
700
  
  	return false;
73c101011   Jens Axboe   block: initial pa...
701
  }
52c5e62d4   Christoph Hellwig   block: bio_check_...
702
  static void handle_bad_sector(struct bio *bio, sector_t maxsector)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
703
704
705
706
707
  {
  	char b[BDEVNAME_SIZE];
  
  	printk(KERN_INFO "attempt to access beyond end of device
  ");
6296b9604   Mike Christie   block, drivers, f...
708
709
  	printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu
  ",
74d46992e   Christoph Hellwig   block: replace bi...
710
  			bio_devname(bio, b), bio->bi_opf,
f73a1c7d1   Kent Overstreet   block: Add bio_en...
711
  			(unsigned long long)bio_end_sector(bio),
52c5e62d4   Christoph Hellwig   block: bio_check_...
712
  			(long long)maxsector);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
713
  }
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
714
715
716
717
718
719
720
721
722
  #ifdef CONFIG_FAIL_MAKE_REQUEST
  
  static DECLARE_FAULT_ATTR(fail_make_request);
  
  static int __init setup_fail_make_request(char *str)
  {
  	return setup_fault_attr(&fail_make_request, str);
  }
  __setup("fail_make_request=", setup_fail_make_request);
b2c9cd379   Akinobu Mita   fail_make_request...
723
  static bool should_fail_request(struct hd_struct *part, unsigned int bytes)
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
724
  {
b2c9cd379   Akinobu Mita   fail_make_request...
725
  	return part->make_it_fail && should_fail(&fail_make_request, bytes);
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
726
727
728
729
  }
  
  static int __init fail_make_request_debugfs(void)
  {
dd48c085c   Akinobu Mita   fault-injection: ...
730
731
  	struct dentry *dir = fault_create_debugfs_attr("fail_make_request",
  						NULL, &fail_make_request);
21f9fcd81   Duan Jiong   block: replace IS...
732
  	return PTR_ERR_OR_ZERO(dir);
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
733
734
735
736
737
  }
  
  late_initcall(fail_make_request_debugfs);
  
  #else /* CONFIG_FAIL_MAKE_REQUEST */
b2c9cd379   Akinobu Mita   fail_make_request...
738
739
  static inline bool should_fail_request(struct hd_struct *part,
  					unsigned int bytes)
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
740
  {
b2c9cd379   Akinobu Mita   fail_make_request...
741
  	return false;
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
742
743
744
  }
  
  #endif /* CONFIG_FAIL_MAKE_REQUEST */
721c7fc70   Ilya Dryomov   block: fail op_is...
745
746
  static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
  {
b089cfd95   Jens Axboe   block: don't warn...
747
  	const int op = bio_op(bio);
8b2ded1c9   Mikulas Patocka   block: don't warn...
748
  	if (part->policy && op_is_write(op)) {
721c7fc70   Ilya Dryomov   block: fail op_is...
749
  		char b[BDEVNAME_SIZE];
8b2ded1c9   Mikulas Patocka   block: don't warn...
750
751
  		if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
  			return false;
a32e236eb   Linus Torvalds   Partially revert ...
752
  		WARN_ONCE(1,
721c7fc70   Ilya Dryomov   block: fail op_is...
753
754
755
756
  		       "generic_make_request: Trying to write "
  			"to read-only block-device %s (partno %d)
  ",
  			bio_devname(bio, b), part->partno);
a32e236eb   Linus Torvalds   Partially revert ...
757
758
  		/* Older lvm-tools actually trigger this */
  		return false;
721c7fc70   Ilya Dryomov   block: fail op_is...
759
760
761
762
  	}
  
  	return false;
  }
30abb3a67   Howard McLauchlan   block: Add should...
763
764
765
766
767
768
769
  static noinline int should_fail_bio(struct bio *bio)
  {
  	if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
  		return -EIO;
  	return 0;
  }
  ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO);
c07e2b412   Jens Axboe   block: factor our...
770
  /*
52c5e62d4   Christoph Hellwig   block: bio_check_...
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
   * Check whether this bio extends beyond the end of the device or partition.
   * This may well happen - the kernel calls bread() without checking the size of
   * the device, e.g., when mounting a file system.
   */
  static inline int bio_check_eod(struct bio *bio, sector_t maxsector)
  {
  	unsigned int nr_sectors = bio_sectors(bio);
  
  	if (nr_sectors && maxsector &&
  	    (nr_sectors > maxsector ||
  	     bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
  		handle_bad_sector(bio, maxsector);
  		return -EIO;
  	}
  	return 0;
  }
  
  /*
74d46992e   Christoph Hellwig   block: replace bi...
789
790
791
792
793
   * Remap block n of partition p to block n+start(p) of the disk.
   */
  static inline int blk_partition_remap(struct bio *bio)
  {
  	struct hd_struct *p;
52c5e62d4   Christoph Hellwig   block: bio_check_...
794
  	int ret = -EIO;
74d46992e   Christoph Hellwig   block: replace bi...
795

721c7fc70   Ilya Dryomov   block: fail op_is...
796
797
  	rcu_read_lock();
  	p = __disk_get_part(bio->bi_disk, bio->bi_partno);
52c5e62d4   Christoph Hellwig   block: bio_check_...
798
799
800
801
802
  	if (unlikely(!p))
  		goto out;
  	if (unlikely(should_fail_request(p, bio->bi_iter.bi_size)))
  		goto out;
  	if (unlikely(bio_check_ro(bio, p)))
721c7fc70   Ilya Dryomov   block: fail op_is...
803
  		goto out;
721c7fc70   Ilya Dryomov   block: fail op_is...
804

74d46992e   Christoph Hellwig   block: replace bi...
805
806
807
808
  	/*
  	 * Zone reset does not include bi_size so bio_sectors() is always 0.
  	 * Include a test for the reset op code and perform the remap if needed.
  	 */
52c5e62d4   Christoph Hellwig   block: bio_check_...
809
810
811
812
  	if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) {
  		if (bio_check_eod(bio, part_nr_sects_read(p)))
  			goto out;
  		bio->bi_iter.bi_sector += p->start_sect;
52c5e62d4   Christoph Hellwig   block: bio_check_...
813
814
815
  		trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
  				      bio->bi_iter.bi_sector - p->start_sect);
  	}
c04fa44b7   Hannes Reinecke   block: always set...
816
  	bio->bi_partno = 0;
52c5e62d4   Christoph Hellwig   block: bio_check_...
817
  	ret = 0;
721c7fc70   Ilya Dryomov   block: fail op_is...
818
819
  out:
  	rcu_read_unlock();
74d46992e   Christoph Hellwig   block: replace bi...
820
821
  	return ret;
  }
27a84d54c   Christoph Hellwig   block: refactor g...
822
823
  static noinline_for_stack bool
  generic_make_request_checks(struct bio *bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
824
  {
165125e1e   Jens Axboe   [BLOCK] Get rid o...
825
  	struct request_queue *q;
5a7bbad27   Christoph Hellwig   block: remove sup...
826
  	int nr_sectors = bio_sectors(bio);
4e4cbee93   Christoph Hellwig   block: switch bio...
827
  	blk_status_t status = BLK_STS_IOERR;
5a7bbad27   Christoph Hellwig   block: remove sup...
828
  	char b[BDEVNAME_SIZE];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
829
830
  
  	might_sleep();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
831

74d46992e   Christoph Hellwig   block: replace bi...
832
  	q = bio->bi_disk->queue;
5a7bbad27   Christoph Hellwig   block: remove sup...
833
834
835
836
837
  	if (unlikely(!q)) {
  		printk(KERN_ERR
  		       "generic_make_request: Trying to access "
  			"nonexistent block-device %s (%Lu)
  ",
74d46992e   Christoph Hellwig   block: replace bi...
838
  			bio_devname(bio, b), (long long)bio->bi_iter.bi_sector);
5a7bbad27   Christoph Hellwig   block: remove sup...
839
840
  		goto end_io;
  	}
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
841

03a07c92a   Goldwyn Rodrigues   block: return on ...
842
  	/*
bba91cdba   Jens Axboe   Revert "block: en...
843
844
  	 * For a REQ_NOWAIT based request, return -EOPNOTSUPP
  	 * if queue is not a request based queue.
03a07c92a   Goldwyn Rodrigues   block: return on ...
845
  	 */
bba91cdba   Jens Axboe   Revert "block: en...
846
847
  	if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q))
  		goto not_supported;
03a07c92a   Goldwyn Rodrigues   block: return on ...
848

30abb3a67   Howard McLauchlan   block: Add should...
849
  	if (should_fail_bio(bio))
5a7bbad27   Christoph Hellwig   block: remove sup...
850
  		goto end_io;
2056a782f   Jens Axboe   [PATCH] Block que...
851

52c5e62d4   Christoph Hellwig   block: bio_check_...
852
853
  	if (bio->bi_partno) {
  		if (unlikely(blk_partition_remap(bio)))
721c7fc70   Ilya Dryomov   block: fail op_is...
854
855
  			goto end_io;
  	} else {
52c5e62d4   Christoph Hellwig   block: bio_check_...
856
857
858
  		if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0)))
  			goto end_io;
  		if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk))))
721c7fc70   Ilya Dryomov   block: fail op_is...
859
860
  			goto end_io;
  	}
2056a782f   Jens Axboe   [PATCH] Block que...
861

5a7bbad27   Christoph Hellwig   block: remove sup...
862
863
864
865
866
  	/*
  	 * Filter flush bio's early so that make_request based
  	 * drivers without flush support don't have to worry
  	 * about them.
  	 */
f3a8ab7d5   Jens Axboe   block: cleanup re...
867
  	if (op_is_flush(bio->bi_opf) &&
c888a8f95   Jens Axboe   block: kill off q...
868
  	    !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
1eff9d322   Jens Axboe   block: rename bio...
869
  		bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
5a7bbad27   Christoph Hellwig   block: remove sup...
870
  		if (!nr_sectors) {
4e4cbee93   Christoph Hellwig   block: switch bio...
871
  			status = BLK_STS_OK;
51fd77bd9   Jens Axboe   [BLOCK] Don't all...
872
873
  			goto end_io;
  		}
5a7bbad27   Christoph Hellwig   block: remove sup...
874
  	}
5ddfe9691   NeilBrown   [PATCH] md: check...
875

d04c406f2   Christoph Hellwig   block: clear REQ_...
876
877
  	if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
  		bio->bi_opf &= ~REQ_HIPRI;
288dab8a3   Christoph Hellwig   block: add a sepa...
878
879
880
881
882
883
884
885
886
887
  	switch (bio_op(bio)) {
  	case REQ_OP_DISCARD:
  		if (!blk_queue_discard(q))
  			goto not_supported;
  		break;
  	case REQ_OP_SECURE_ERASE:
  		if (!blk_queue_secure_erase(q))
  			goto not_supported;
  		break;
  	case REQ_OP_WRITE_SAME:
74d46992e   Christoph Hellwig   block: replace bi...
888
  		if (!q->limits.max_write_same_sectors)
288dab8a3   Christoph Hellwig   block: add a sepa...
889
  			goto not_supported;
58886785d   Nicolai Stange   block: fix uninte...
890
  		break;
2d253440b   Shaun Tancheff   block: Define zon...
891
  	case REQ_OP_ZONE_RESET:
74d46992e   Christoph Hellwig   block: replace bi...
892
  		if (!blk_queue_is_zoned(q))
2d253440b   Shaun Tancheff   block: Define zon...
893
  			goto not_supported;
288dab8a3   Christoph Hellwig   block: add a sepa...
894
  		break;
6e33dbf28   Chaitanya Kulkarni   blk-zoned: implem...
895
896
897
898
  	case REQ_OP_ZONE_RESET_ALL:
  		if (!blk_queue_is_zoned(q) || !blk_queue_zone_resetall(q))
  			goto not_supported;
  		break;
a6f0788ec   Chaitanya Kulkarni   block: add suppor...
899
  	case REQ_OP_WRITE_ZEROES:
74d46992e   Christoph Hellwig   block: replace bi...
900
  		if (!q->limits.max_write_zeroes_sectors)
a6f0788ec   Chaitanya Kulkarni   block: add suppor...
901
902
  			goto not_supported;
  		break;
288dab8a3   Christoph Hellwig   block: add a sepa...
903
904
  	default:
  		break;
5a7bbad27   Christoph Hellwig   block: remove sup...
905
  	}
01edede41   Minchan Kim   block: trace bio ...
906

7f4b35d15   Tejun Heo   block: allocate i...
907
908
909
910
911
912
913
  	/*
  	 * Various block parts want %current->io_context and lazy ioc
  	 * allocation ends up trading a lot of pain for a small amount of
  	 * memory.  Just allocate it upfront.  This may fail and block
  	 * layer knows how to live with it.
  	 */
  	create_io_context(GFP_ATOMIC, q->node);
ae1188963   Tejun Heo   blkcg: consolidat...
914
915
  	if (!blkcg_bio_issue_check(q, bio))
  		return false;
27a84d54c   Christoph Hellwig   block: refactor g...
916

fbbaf700e   NeilBrown   block: trace comp...
917
918
919
920
921
922
923
  	if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
  		trace_block_bio_queue(q, bio);
  		/* Now that enqueuing has been traced, we need to trace
  		 * completion as well.
  		 */
  		bio_set_flag(bio, BIO_TRACE_COMPLETION);
  	}
27a84d54c   Christoph Hellwig   block: refactor g...
924
  	return true;
a7384677b   Tejun Heo   block: remove dup...
925

288dab8a3   Christoph Hellwig   block: add a sepa...
926
  not_supported:
4e4cbee93   Christoph Hellwig   block: switch bio...
927
  	status = BLK_STS_NOTSUPP;
a7384677b   Tejun Heo   block: remove dup...
928
  end_io:
4e4cbee93   Christoph Hellwig   block: switch bio...
929
  	bio->bi_status = status;
4246a0b63   Christoph Hellwig   block: add a bi_e...
930
  	bio_endio(bio);
27a84d54c   Christoph Hellwig   block: refactor g...
931
  	return false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
932
  }
27a84d54c   Christoph Hellwig   block: refactor g...
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
  /**
   * generic_make_request - hand a buffer to its device driver for I/O
   * @bio:  The bio describing the location in memory and on the device.
   *
   * generic_make_request() is used to make I/O requests of block
   * devices. It is passed a &struct bio, which describes the I/O that needs
   * to be done.
   *
   * generic_make_request() does not return any status.  The
   * success/failure status of the request, along with notification of
   * completion, is delivered asynchronously through the bio->bi_end_io
   * function described (one day) else where.
   *
   * The caller of generic_make_request must make sure that bi_io_vec
   * are set to describe the memory buffer, and that bi_dev and bi_sector are
   * set to describe the device address, and the
   * bi_end_io and optionally bi_private are set to describe how
   * completion notification should be signaled.
   *
   * generic_make_request and the drivers it calls may use bi_next if this
   * bio happens to be merged with someone else, and may resubmit the bio to
   * a lower device by calling into generic_make_request recursively, which
   * means the bio should NOT be touched after the call to ->make_request_fn.
d89d87965   Neil Brown   When stacked bloc...
956
   */
dece16353   Jens Axboe   block: change ->m...
957
  blk_qc_t generic_make_request(struct bio *bio)
d89d87965   Neil Brown   When stacked bloc...
958
  {
f5fe1b519   NeilBrown   blk: Ensure users...
959
960
961
962
963
964
965
966
  	/*
  	 * bio_list_on_stack[0] contains bios submitted by the current
  	 * make_request_fn.
  	 * bio_list_on_stack[1] contains bios that were submitted before
  	 * the current make_request_fn, but that haven't been processed
  	 * yet.
  	 */
  	struct bio_list bio_list_on_stack[2];
dece16353   Jens Axboe   block: change ->m...
967
  	blk_qc_t ret = BLK_QC_T_NONE;
bddd87c7e   Akinobu Mita   blk-core: use BIO...
968

27a84d54c   Christoph Hellwig   block: refactor g...
969
  	if (!generic_make_request_checks(bio))
dece16353   Jens Axboe   block: change ->m...
970
  		goto out;
27a84d54c   Christoph Hellwig   block: refactor g...
971
972
973
974
975
976
977
978
979
980
981
  
  	/*
  	 * We only want one ->make_request_fn to be active at a time, else
  	 * stack usage with stacked devices could be a problem.  So use
  	 * current->bio_list to keep a list of requests submited by a
  	 * make_request_fn function.  current->bio_list is also used as a
  	 * flag to say if generic_make_request is currently active in this
  	 * task or not.  If it is NULL, then no make_request is active.  If
  	 * it is non-NULL, then a make_request is active, and new requests
  	 * should be added at the tail
  	 */
bddd87c7e   Akinobu Mita   blk-core: use BIO...
982
  	if (current->bio_list) {
f5fe1b519   NeilBrown   blk: Ensure users...
983
  		bio_list_add(&current->bio_list[0], bio);
dece16353   Jens Axboe   block: change ->m...
984
  		goto out;
d89d87965   Neil Brown   When stacked bloc...
985
  	}
27a84d54c   Christoph Hellwig   block: refactor g...
986

d89d87965   Neil Brown   When stacked bloc...
987
988
989
990
991
  	/* following loop may be a bit non-obvious, and so deserves some
  	 * explanation.
  	 * Before entering the loop, bio->bi_next is NULL (as all callers
  	 * ensure that) so we have a list with a single bio.
  	 * We pretend that we have just taken it off a longer list, so
bddd87c7e   Akinobu Mita   blk-core: use BIO...
992
993
  	 * we assign bio_list to a pointer to the bio_list_on_stack,
  	 * thus initialising the bio_list of new bios to be
27a84d54c   Christoph Hellwig   block: refactor g...
994
  	 * added.  ->make_request() may indeed add some more bios
d89d87965   Neil Brown   When stacked bloc...
995
996
997
  	 * through a recursive call to generic_make_request.  If it
  	 * did, we find a non-NULL value in bio_list and re-enter the loop
  	 * from the top.  In this case we really did just take the bio
bddd87c7e   Akinobu Mita   blk-core: use BIO...
998
  	 * of the top of the list (no pretending) and so remove it from
27a84d54c   Christoph Hellwig   block: refactor g...
999
  	 * bio_list, and call into ->make_request() again.
d89d87965   Neil Brown   When stacked bloc...
1000
1001
  	 */
  	BUG_ON(bio->bi_next);
f5fe1b519   NeilBrown   blk: Ensure users...
1002
1003
  	bio_list_init(&bio_list_on_stack[0]);
  	current->bio_list = bio_list_on_stack;
d89d87965   Neil Brown   When stacked bloc...
1004
  	do {
fe2008640   Ming Lei   block: don't prot...
1005
1006
1007
  		struct request_queue *q = bio->bi_disk->queue;
  		blk_mq_req_flags_t flags = bio->bi_opf & REQ_NOWAIT ?
  			BLK_MQ_REQ_NOWAIT : 0;
27a84d54c   Christoph Hellwig   block: refactor g...
1008

fe2008640   Ming Lei   block: don't prot...
1009
  		if (likely(blk_queue_enter(q, flags) == 0)) {
79bd99596   NeilBrown   blk: improve orde...
1010
1011
1012
  			struct bio_list lower, same;
  
  			/* Create a fresh bio_list for all subordinate requests */
f5fe1b519   NeilBrown   blk: Ensure users...
1013
1014
  			bio_list_on_stack[1] = bio_list_on_stack[0];
  			bio_list_init(&bio_list_on_stack[0]);
dece16353   Jens Axboe   block: change ->m...
1015
  			ret = q->make_request_fn(q, bio);
3ef28e83a   Dan Williams   block: generic re...
1016

fe2008640   Ming Lei   block: don't prot...
1017
  			blk_queue_exit(q);
79bd99596   NeilBrown   blk: improve orde...
1018
1019
1020
1021
1022
  			/* sort new bios into those for a lower level
  			 * and those for the same level
  			 */
  			bio_list_init(&lower);
  			bio_list_init(&same);
f5fe1b519   NeilBrown   blk: Ensure users...
1023
  			while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
74d46992e   Christoph Hellwig   block: replace bi...
1024
  				if (q == bio->bi_disk->queue)
79bd99596   NeilBrown   blk: improve orde...
1025
1026
1027
1028
  					bio_list_add(&same, bio);
  				else
  					bio_list_add(&lower, bio);
  			/* now assemble so we handle the lowest level first */
f5fe1b519   NeilBrown   blk: Ensure users...
1029
1030
1031
  			bio_list_merge(&bio_list_on_stack[0], &lower);
  			bio_list_merge(&bio_list_on_stack[0], &same);
  			bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
3ef28e83a   Dan Williams   block: generic re...
1032
  		} else {
03a07c92a   Goldwyn Rodrigues   block: return on ...
1033
1034
1035
1036
1037
  			if (unlikely(!blk_queue_dying(q) &&
  					(bio->bi_opf & REQ_NOWAIT)))
  				bio_wouldblock_error(bio);
  			else
  				bio_io_error(bio);
3ef28e83a   Dan Williams   block: generic re...
1038
  		}
f5fe1b519   NeilBrown   blk: Ensure users...
1039
  		bio = bio_list_pop(&bio_list_on_stack[0]);
d89d87965   Neil Brown   When stacked bloc...
1040
  	} while (bio);
bddd87c7e   Akinobu Mita   blk-core: use BIO...
1041
  	current->bio_list = NULL; /* deactivate */
dece16353   Jens Axboe   block: change ->m...
1042
1043
1044
  
  out:
  	return ret;
d89d87965   Neil Brown   When stacked bloc...
1045
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1046
1047
1048
  EXPORT_SYMBOL(generic_make_request);
  
  /**
f421e1d9a   Christoph Hellwig   block: provide a ...
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
   * direct_make_request - hand a buffer directly to its device driver for I/O
   * @bio:  The bio describing the location in memory and on the device.
   *
   * This function behaves like generic_make_request(), but does not protect
   * against recursion.  Must only be used if the called driver is known
   * to not call generic_make_request (or direct_make_request) again from
   * its make_request function.  (Calling direct_make_request again from
   * a workqueue is perfectly fine as that doesn't recurse).
   */
  blk_qc_t direct_make_request(struct bio *bio)
  {
  	struct request_queue *q = bio->bi_disk->queue;
  	bool nowait = bio->bi_opf & REQ_NOWAIT;
  	blk_qc_t ret;
  
  	if (!generic_make_request_checks(bio))
  		return BLK_QC_T_NONE;
3a0a52997   Bart Van Assche   block, scsi: Make...
1066
  	if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) {
f421e1d9a   Christoph Hellwig   block: provide a ...
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
  		if (nowait && !blk_queue_dying(q))
  			bio->bi_status = BLK_STS_AGAIN;
  		else
  			bio->bi_status = BLK_STS_IOERR;
  		bio_endio(bio);
  		return BLK_QC_T_NONE;
  	}
  
  	ret = q->make_request_fn(q, bio);
  	blk_queue_exit(q);
  	return ret;
  }
  EXPORT_SYMBOL_GPL(direct_make_request);
  
  /**
710027a48   Randy Dunlap   Add some block/ s...
1082
   * submit_bio - submit a bio to the block device layer for I/O
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1083
1084
1085
1086
   * @bio: The &struct bio which describes the I/O
   *
   * submit_bio() is very similar in purpose to generic_make_request(), and
   * uses that function to do most of the work. Both are fairly rough
710027a48   Randy Dunlap   Add some block/ s...
1087
   * interfaces; @bio must be presetup and ready for I/O.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1088
1089
   *
   */
4e49ea4a3   Mike Christie   block/fs/drivers:...
1090
  blk_qc_t submit_bio(struct bio *bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1091
  {
b8e24a930   Johannes Weiner   block: annotate r...
1092
1093
1094
  	bool workingset_read = false;
  	unsigned long pflags;
  	blk_qc_t ret;
d3f77dfdc   Tejun Heo   blkcg: implement ...
1095
1096
  	if (blkcg_punt_bio_submit(bio))
  		return BLK_QC_T_NONE;
bf2de6f5a   Jens Axboe   block: Initial su...
1097
1098
1099
1100
  	/*
  	 * If it's a regular read/write or a barrier with data attached,
  	 * go through the normal accounting stuff before submission.
  	 */
e2a60da74   Martin K. Petersen   block: Clean up s...
1101
  	if (bio_has_data(bio)) {
4363ac7c1   Martin K. Petersen   block: Implement ...
1102
  		unsigned int count;
95fe6c1a2   Mike Christie   block, fs, mm, dr...
1103
  		if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
7c5a0dcf5   Jiufei Xue   block: fix the co...
1104
  			count = queue_logical_block_size(bio->bi_disk->queue) >> 9;
4363ac7c1   Martin K. Petersen   block: Implement ...
1105
1106
  		else
  			count = bio_sectors(bio);
a8ebb056a   Mike Christie   block, drivers, c...
1107
  		if (op_is_write(bio_op(bio))) {
bf2de6f5a   Jens Axboe   block: Initial su...
1108
1109
  			count_vm_events(PGPGOUT, count);
  		} else {
b8e24a930   Johannes Weiner   block: annotate r...
1110
1111
  			if (bio_flagged(bio, BIO_WORKINGSET))
  				workingset_read = true;
4f024f379   Kent Overstreet   block: Abstract o...
1112
  			task_io_account_read(bio->bi_iter.bi_size);
bf2de6f5a   Jens Axboe   block: Initial su...
1113
1114
1115
1116
1117
  			count_vm_events(PGPGIN, count);
  		}
  
  		if (unlikely(block_dump)) {
  			char b[BDEVNAME_SIZE];
8dcbdc742   San Mehat   block: block_dump...
1118
1119
  			printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)
  ",
ba25f9dcc   Pavel Emelyanov   Use helpers to ob...
1120
  			current->comm, task_pid_nr(current),
a8ebb056a   Mike Christie   block, drivers, c...
1121
  				op_is_write(bio_op(bio)) ? "WRITE" : "READ",
4f024f379   Kent Overstreet   block: Abstract o...
1122
  				(unsigned long long)bio->bi_iter.bi_sector,
74d46992e   Christoph Hellwig   block: replace bi...
1123
  				bio_devname(bio, b), count);
bf2de6f5a   Jens Axboe   block: Initial su...
1124
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1125
  	}
b8e24a930   Johannes Weiner   block: annotate r...
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
  	/*
  	 * If we're reading data that is part of the userspace
  	 * workingset, count submission time as memory stall. When the
  	 * device is congested, or the submitting cgroup IO-throttled,
  	 * submission can be a significant part of overall IO time.
  	 */
  	if (workingset_read)
  		psi_memstall_enter(&pflags);
  
  	ret = generic_make_request(bio);
  
  	if (workingset_read)
  		psi_memstall_leave(&pflags);
  
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1141
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1142
  EXPORT_SYMBOL(submit_bio);
1052b8ac5   Jens Axboe   blk-mq: when poll...
1143
  /**
bf4e6b4e7   Hannes Reinecke   block: Always che...
1144
1145
   * blk_cloned_rq_check_limits - Helper function to check a cloned request
   *                              for new the queue limits
82124d603   Kiyoshi Ueda   block: add reques...
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
   * @q:  the queue
   * @rq: the request being checked
   *
   * Description:
   *    @rq may have been made based on weaker limitations of upper-level queues
   *    in request stacking drivers, and it may violate the limitation of @q.
   *    Since the block layer and the underlying device driver trust @rq
   *    after it is inserted to @q, it should be checked against @q before
   *    the insertion using this generic function.
   *
82124d603   Kiyoshi Ueda   block: add reques...
1156
   *    Request stacking drivers like request-based dm may change the queue
bf4e6b4e7   Hannes Reinecke   block: Always che...
1157
1158
   *    limits when retrying requests on other queues. Those requests need
   *    to be checked against the new queue limits again during dispatch.
82124d603   Kiyoshi Ueda   block: add reques...
1159
   */
bf4e6b4e7   Hannes Reinecke   block: Always che...
1160
1161
  static int blk_cloned_rq_check_limits(struct request_queue *q,
  				      struct request *rq)
82124d603   Kiyoshi Ueda   block: add reques...
1162
  {
8fe0d473f   Mike Christie   block: convert me...
1163
  	if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, req_op(rq))) {
61939b12d   John Pittman   block: print offe...
1164
1165
1166
1167
  		printk(KERN_ERR "%s: over max size limit. (%u > %u)
  ",
  			__func__, blk_rq_sectors(rq),
  			blk_queue_get_max_sectors(q, req_op(rq)));
82124d603   Kiyoshi Ueda   block: add reques...
1168
1169
1170
1171
1172
1173
1174
1175
1176
  		return -EIO;
  	}
  
  	/*
  	 * queue's settings related to segment counting like q->bounce_pfn
  	 * may differ from that of other stacking queues.
  	 * Recalculate it to check the request correctly on this queue's
  	 * limitation.
  	 */
e9cd19c0c   Christoph Hellwig   block: simplify b...
1177
  	rq->nr_phys_segments = blk_recalc_rq_segments(rq);
8a78362c4   Martin K. Petersen   block: Consolidat...
1178
  	if (rq->nr_phys_segments > queue_max_segments(q)) {
61939b12d   John Pittman   block: print offe...
1179
1180
1181
  		printk(KERN_ERR "%s: over max segments limit. (%hu > %hu)
  ",
  			__func__, rq->nr_phys_segments, queue_max_segments(q));
82124d603   Kiyoshi Ueda   block: add reques...
1182
1183
1184
1185
1186
  		return -EIO;
  	}
  
  	return 0;
  }
82124d603   Kiyoshi Ueda   block: add reques...
1187
1188
1189
1190
1191
1192
  
  /**
   * blk_insert_cloned_request - Helper for stacking drivers to submit a request
   * @q:  the queue to submit the request
   * @rq: the request being queued
   */
2a842acab   Christoph Hellwig   block: introduce ...
1193
  blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
82124d603   Kiyoshi Ueda   block: add reques...
1194
  {
bf4e6b4e7   Hannes Reinecke   block: Always che...
1195
  	if (blk_cloned_rq_check_limits(q, rq))
2a842acab   Christoph Hellwig   block: introduce ...
1196
  		return BLK_STS_IOERR;
82124d603   Kiyoshi Ueda   block: add reques...
1197

b2c9cd379   Akinobu Mita   fail_make_request...
1198
1199
  	if (rq->rq_disk &&
  	    should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
2a842acab   Christoph Hellwig   block: introduce ...
1200
  		return BLK_STS_IOERR;
82124d603   Kiyoshi Ueda   block: add reques...
1201

a1ce35fa4   Jens Axboe   block: remove dea...
1202
1203
  	if (blk_queue_io_stat(q))
  		blk_account_io_start(rq, true);
82124d603   Kiyoshi Ueda   block: add reques...
1204
1205
  
  	/*
a1ce35fa4   Jens Axboe   block: remove dea...
1206
1207
1208
  	 * Since we have a scheduler attached on the top device,
  	 * bypass a potential scheduler on the bottom device for
  	 * insert.
82124d603   Kiyoshi Ueda   block: add reques...
1209
  	 */
fd9c40f64   Bart Van Assche   block: Revert v5....
1210
  	return blk_mq_request_issue_directly(rq, true);
82124d603   Kiyoshi Ueda   block: add reques...
1211
1212
  }
  EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
80a761fd3   Tejun Heo   block: implement ...
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
  /**
   * blk_rq_err_bytes - determine number of bytes till the next failure boundary
   * @rq: request to examine
   *
   * Description:
   *     A request could be merge of IOs which require different failure
   *     handling.  This function determines the number of bytes which
   *     can be failed from the beginning of the request without
   *     crossing into area which need to be retried further.
   *
   * Return:
   *     The number of bytes to fail.
80a761fd3   Tejun Heo   block: implement ...
1225
1226
1227
1228
1229
1230
   */
  unsigned int blk_rq_err_bytes(const struct request *rq)
  {
  	unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
  	unsigned int bytes = 0;
  	struct bio *bio;
e80640213   Christoph Hellwig   block: split out ...
1231
  	if (!(rq->rq_flags & RQF_MIXED_MERGE))
80a761fd3   Tejun Heo   block: implement ...
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
  		return blk_rq_bytes(rq);
  
  	/*
  	 * Currently the only 'mixing' which can happen is between
  	 * different fastfail types.  We can safely fail portions
  	 * which have all the failfast bits that the first one has -
  	 * the ones which are at least as eager to fail as the first
  	 * one.
  	 */
  	for (bio = rq->bio; bio; bio = bio->bi_next) {
1eff9d322   Jens Axboe   block: rename bio...
1242
  		if ((bio->bi_opf & ff) != ff)
80a761fd3   Tejun Heo   block: implement ...
1243
  			break;
4f024f379   Kent Overstreet   block: Abstract o...
1244
  		bytes += bio->bi_iter.bi_size;
80a761fd3   Tejun Heo   block: implement ...
1245
1246
1247
1248
1249
1250
1251
  	}
  
  	/* this could lead to infinite loop */
  	BUG_ON(blk_rq_bytes(rq) && !bytes);
  	return bytes;
  }
  EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
320ae51fe   Jens Axboe   blk-mq: new multi...
1252
  void blk_account_io_completion(struct request *req, unsigned int bytes)
bc58ba946   Jens Axboe   block: add sysfs ...
1253
  {
c2553b584   Jens Axboe   block: make blk_d...
1254
  	if (blk_do_io_stat(req)) {
ddcf35d39   Michael Callahan   block: Add and us...
1255
  		const int sgrp = op_stat_group(req_op(req));
bc58ba946   Jens Axboe   block: add sysfs ...
1256
  		struct hd_struct *part;
bc58ba946   Jens Axboe   block: add sysfs ...
1257

112f158f6   Mike Snitzer   block: stop passi...
1258
  		part_stat_lock();
09e099d4b   Jerome Marchand   block: fix accoun...
1259
  		part = req->part;
112f158f6   Mike Snitzer   block: stop passi...
1260
  		part_stat_add(part, sectors[sgrp], bytes >> 9);
bc58ba946   Jens Axboe   block: add sysfs ...
1261
1262
1263
  		part_stat_unlock();
  	}
  }
522a77756   Omar Sandoval   block: consolidat...
1264
  void blk_account_io_done(struct request *req, u64 now)
bc58ba946   Jens Axboe   block: add sysfs ...
1265
  {
bc58ba946   Jens Axboe   block: add sysfs ...
1266
  	/*
dd4c133f3   Tejun Heo   block: rename bar...
1267
1268
1269
  	 * Account IO completion.  flush_rq isn't accounted as a
  	 * normal IO on queueing nor completion.  Accounting the
  	 * containing request is enough.
bc58ba946   Jens Axboe   block: add sysfs ...
1270
  	 */
e80640213   Christoph Hellwig   block: split out ...
1271
  	if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
ddcf35d39   Michael Callahan   block: Add and us...
1272
  		const int sgrp = op_stat_group(req_op(req));
bc58ba946   Jens Axboe   block: add sysfs ...
1273
  		struct hd_struct *part;
bc58ba946   Jens Axboe   block: add sysfs ...
1274

112f158f6   Mike Snitzer   block: stop passi...
1275
  		part_stat_lock();
09e099d4b   Jerome Marchand   block: fix accoun...
1276
  		part = req->part;
bc58ba946   Jens Axboe   block: add sysfs ...
1277

2334b2d5a   Konstantin Khlebnikov   block/diskstats: ...
1278
  		update_io_ticks(part, jiffies, true);
112f158f6   Mike Snitzer   block: stop passi...
1279
1280
  		part_stat_inc(part, ios[sgrp]);
  		part_stat_add(part, nsecs[sgrp], now - req->start_time_ns);
5b18b5a73   Mikulas Patocka   block: delete par...
1281
  		part_stat_add(part, time_in_queue, nsecs_to_jiffies64(now - req->start_time_ns));
ddcf35d39   Michael Callahan   block: Add and us...
1282
  		part_dec_in_flight(req->q, part, rq_data_dir(req));
bc58ba946   Jens Axboe   block: add sysfs ...
1283

6c23a9681   Jens Axboe   block: add intern...
1284
  		hd_struct_put(part);
bc58ba946   Jens Axboe   block: add sysfs ...
1285
1286
1287
  		part_stat_unlock();
  	}
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
1288
1289
1290
1291
  void blk_account_io_start(struct request *rq, bool new_io)
  {
  	struct hd_struct *part;
  	int rw = rq_data_dir(rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
1292
1293
1294
  
  	if (!blk_do_io_stat(rq))
  		return;
112f158f6   Mike Snitzer   block: stop passi...
1295
  	part_stat_lock();
320ae51fe   Jens Axboe   blk-mq: new multi...
1296
1297
1298
  
  	if (!new_io) {
  		part = rq->part;
112f158f6   Mike Snitzer   block: stop passi...
1299
  		part_stat_inc(part, merges[rw]);
320ae51fe   Jens Axboe   blk-mq: new multi...
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
  	} else {
  		part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
  		if (!hd_struct_try_get(part)) {
  			/*
  			 * The partition is already being removed,
  			 * the request will be accounted on the disk only
  			 *
  			 * We take a reference on disk->part0 although that
  			 * partition will never be deleted, so we can treat
  			 * it as any other partition.
  			 */
  			part = &rq->rq_disk->part0;
  			hd_struct_get(part);
  		}
d62e26b3f   Jens Axboe   block: pass in qu...
1314
  		part_inc_in_flight(rq->q, part, rw);
320ae51fe   Jens Axboe   blk-mq: new multi...
1315
1316
  		rq->part = part;
  	}
2334b2d5a   Konstantin Khlebnikov   block/diskstats: ...
1317
  	update_io_ticks(part, jiffies, false);
5b18b5a73   Mikulas Patocka   block: delete par...
1318

320ae51fe   Jens Axboe   blk-mq: new multi...
1319
1320
  	part_stat_unlock();
  }
ef71de8b1   Christoph Hellwig   block: add a blk_...
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
  /*
   * Steal bios from a request and add them to a bio list.
   * The request must not have been partially completed before.
   */
  void blk_steal_bios(struct bio_list *list, struct request *rq)
  {
  	if (rq->bio) {
  		if (list->tail)
  			list->tail->bi_next = rq->bio;
  		else
  			list->head = rq->bio;
  		list->tail = rq->biotail;
  
  		rq->bio = NULL;
  		rq->biotail = NULL;
  	}
  
  	rq->__data_len = 0;
  }
  EXPORT_SYMBOL_GPL(blk_steal_bios);
9934c8c04   Tejun Heo   block: implement ...
1341
  /**
2e60e0229   Tejun Heo   block: clean up r...
1342
   * blk_update_request - Special helper function for request stacking drivers
8ebf97560   Randy Dunlap   block: fix kernel...
1343
   * @req:      the request being processed
2a842acab   Christoph Hellwig   block: introduce ...
1344
   * @error:    block status code
8ebf97560   Randy Dunlap   block: fix kernel...
1345
   * @nr_bytes: number of bytes to complete @req
3bcddeac1   Kiyoshi Ueda   blk_end_request: ...
1346
1347
   *
   * Description:
8ebf97560   Randy Dunlap   block: fix kernel...
1348
1349
1350
   *     Ends I/O on a number of bytes attached to @req, but doesn't complete
   *     the request structure even if @req doesn't have leftover.
   *     If @req has leftover, sets it up for the next range of segments.
2e60e0229   Tejun Heo   block: clean up r...
1351
1352
1353
   *
   *     This special helper function is only for request stacking drivers
   *     (e.g. request-based dm) so that they can handle partial completion.
3a211b715   Pavel Begunkov   blk-core: Remove ...
1354
   *     Actual device drivers should use blk_mq_end_request instead.
2e60e0229   Tejun Heo   block: clean up r...
1355
1356
1357
   *
   *     Passing the result of blk_rq_bytes() as @nr_bytes guarantees
   *     %false return from this function.
3bcddeac1   Kiyoshi Ueda   blk_end_request: ...
1358
   *
1954e9a99   Bart Van Assche   block: Document h...
1359
1360
1361
1362
   * Note:
   *	The RQF_SPECIAL_PAYLOAD flag is ignored on purpose in both
   *	blk_rq_bytes() and in blk_update_request().
   *
3bcddeac1   Kiyoshi Ueda   blk_end_request: ...
1363
   * Return:
2e60e0229   Tejun Heo   block: clean up r...
1364
1365
   *     %false - this request doesn't have any more data
   *     %true  - this request has more data
3bcddeac1   Kiyoshi Ueda   blk_end_request: ...
1366
   **/
2a842acab   Christoph Hellwig   block: introduce ...
1367
1368
  bool blk_update_request(struct request *req, blk_status_t error,
  		unsigned int nr_bytes)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1369
  {
f79ea4161   Kent Overstreet   block: Refactor b...
1370
  	int total_bytes;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1371

2a842acab   Christoph Hellwig   block: introduce ...
1372
  	trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes);
4a0efdc93   Hannes Reinecke   block: misplaced ...
1373

2e60e0229   Tejun Heo   block: clean up r...
1374
1375
  	if (!req->bio)
  		return false;
54d4e6ab9   Max Gurtovoy   block: centralize...
1376
1377
1378
1379
1380
  #ifdef CONFIG_BLK_DEV_INTEGRITY
  	if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
  	    error == BLK_STS_OK)
  		req->q->integrity.profile->complete_fn(req, nr_bytes);
  #endif
2a842acab   Christoph Hellwig   block: introduce ...
1381
1382
  	if (unlikely(error && !blk_rq_is_passthrough(req) &&
  		     !(req->rq_flags & RQF_QUIET)))
178cc590e   Christoph Hellwig   block: improve pr...
1383
  		print_req_error(req, error, __func__);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1384

bc58ba946   Jens Axboe   block: add sysfs ...
1385
  	blk_account_io_completion(req, nr_bytes);
d72d904a5   Jens Axboe   [BLOCK] Update re...
1386

f79ea4161   Kent Overstreet   block: Refactor b...
1387
1388
1389
  	total_bytes = 0;
  	while (req->bio) {
  		struct bio *bio = req->bio;
4f024f379   Kent Overstreet   block: Abstract o...
1390
  		unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1391

9c24c10a2   Bart Van Assche   Revert "block: Ad...
1392
  		if (bio_bytes == bio->bi_iter.bi_size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1393
  			req->bio = bio->bi_next;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1394

fbbaf700e   NeilBrown   block: trace comp...
1395
1396
  		/* Completion has already been traced */
  		bio_clear_flag(bio, BIO_TRACE_COMPLETION);
f79ea4161   Kent Overstreet   block: Refactor b...
1397
  		req_bio_endio(req, bio, bio_bytes, error);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1398

f79ea4161   Kent Overstreet   block: Refactor b...
1399
1400
  		total_bytes += bio_bytes;
  		nr_bytes -= bio_bytes;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1401

f79ea4161   Kent Overstreet   block: Refactor b...
1402
1403
  		if (!nr_bytes)
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1404
1405
1406
1407
1408
  	}
  
  	/*
  	 * completely done
  	 */
2e60e0229   Tejun Heo   block: clean up r...
1409
1410
1411
1412
1413
1414
  	if (!req->bio) {
  		/*
  		 * Reset counters so that the request stacking driver
  		 * can find how many bytes remain in the request
  		 * later.
  		 */
a2dec7b36   Tejun Heo   block: hide reque...
1415
  		req->__data_len = 0;
2e60e0229   Tejun Heo   block: clean up r...
1416
1417
  		return false;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1418

a2dec7b36   Tejun Heo   block: hide reque...
1419
  	req->__data_len -= total_bytes;
2e46e8b27   Tejun Heo   block: drop reque...
1420
1421
  
  	/* update sector only for requests with clear definition of sector */
57292b58d   Christoph Hellwig   block: introduce ...
1422
  	if (!blk_rq_is_passthrough(req))
a2dec7b36   Tejun Heo   block: hide reque...
1423
  		req->__sector += total_bytes >> 9;
2e46e8b27   Tejun Heo   block: drop reque...
1424

80a761fd3   Tejun Heo   block: implement ...
1425
  	/* mixed attributes always follow the first bio */
e80640213   Christoph Hellwig   block: split out ...
1426
  	if (req->rq_flags & RQF_MIXED_MERGE) {
80a761fd3   Tejun Heo   block: implement ...
1427
  		req->cmd_flags &= ~REQ_FAILFAST_MASK;
1eff9d322   Jens Axboe   block: rename bio...
1428
  		req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK;
80a761fd3   Tejun Heo   block: implement ...
1429
  	}
ed6565e73   Christoph Hellwig   block: handle par...
1430
1431
1432
1433
1434
1435
1436
1437
1438
  	if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) {
  		/*
  		 * If total number of sectors is less than the first segment
  		 * size, something has gone terribly wrong.
  		 */
  		if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
  			blk_dump_rq_flags(req, "request botched");
  			req->__data_len = blk_rq_cur_bytes(req);
  		}
2e46e8b27   Tejun Heo   block: drop reque...
1439

ed6565e73   Christoph Hellwig   block: handle par...
1440
  		/* recalculate the number of segments */
e9cd19c0c   Christoph Hellwig   block: simplify b...
1441
  		req->nr_phys_segments = blk_recalc_rq_segments(req);
ed6565e73   Christoph Hellwig   block: handle par...
1442
  	}
2e46e8b27   Tejun Heo   block: drop reque...
1443

2e60e0229   Tejun Heo   block: clean up r...
1444
  	return true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1445
  }
2e60e0229   Tejun Heo   block: clean up r...
1446
  EXPORT_SYMBOL_GPL(blk_update_request);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1447

2d4dc890b   Ilya Loginov   block: add helper...
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
  #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
  /**
   * rq_flush_dcache_pages - Helper function to flush all pages in a request
   * @rq: the request to be flushed
   *
   * Description:
   *     Flush all pages in @rq.
   */
  void rq_flush_dcache_pages(struct request *rq)
  {
  	struct req_iterator iter;
7988613b0   Kent Overstreet   block: Convert bi...
1459
  	struct bio_vec bvec;
2d4dc890b   Ilya Loginov   block: add helper...
1460
1461
  
  	rq_for_each_segment(bvec, rq, iter)
7988613b0   Kent Overstreet   block: Convert bi...
1462
  		flush_dcache_page(bvec.bv_page);
2d4dc890b   Ilya Loginov   block: add helper...
1463
1464
1465
  }
  EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
  #endif
ef9e3facd   Kiyoshi Ueda   block: add lld bu...
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
  /**
   * blk_lld_busy - Check if underlying low-level drivers of a device are busy
   * @q : the queue of the device being checked
   *
   * Description:
   *    Check if underlying low-level drivers of a device are busy.
   *    If the drivers want to export their busy state, they must set own
   *    exporting function using blk_queue_lld_busy() first.
   *
   *    Basically, this function is used only by request stacking drivers
   *    to stop dispatching requests to underlying devices when underlying
   *    devices are busy.  This behavior helps more I/O merging on the queue
   *    of the request stacking driver and prevents I/O throughput regression
   *    on burst I/O load.
   *
   * Return:
   *    0 - Not busy (The request stacking driver should dispatch request)
   *    1 - Busy (The request stacking driver should stop dispatching request)
   */
  int blk_lld_busy(struct request_queue *q)
  {
344e9ffcb   Jens Axboe   block: add queue_...
1487
  	if (queue_is_mq(q) && q->mq_ops->busy)
9ba20527f   Jens Axboe   blk-mq: provide m...
1488
  		return q->mq_ops->busy(q);
ef9e3facd   Kiyoshi Ueda   block: add lld bu...
1489
1490
1491
1492
  
  	return 0;
  }
  EXPORT_SYMBOL_GPL(blk_lld_busy);
78d8e58a0   Mike Snitzer   Revert "block, dm...
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
  /**
   * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
   * @rq: the clone request to be cleaned up
   *
   * Description:
   *     Free all bios in @rq for a cloned request.
   */
  void blk_rq_unprep_clone(struct request *rq)
  {
  	struct bio *bio;
  
  	while ((bio = rq->bio) != NULL) {
  		rq->bio = bio->bi_next;
  
  		bio_put(bio);
  	}
  }
  EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
  
  /*
   * Copy attributes of the original request to the clone request.
   * The actual data parts (e.g. ->cmd, ->sense) are not copied.
   */
  static void __blk_rq_prep_clone(struct request *dst, struct request *src)
b0fd271d5   Kiyoshi Ueda   block: add reques...
1517
  {
b0fd271d5   Kiyoshi Ueda   block: add reques...
1518
1519
  	dst->__sector = blk_rq_pos(src);
  	dst->__data_len = blk_rq_bytes(src);
297ba57dc   Bart Van Assche   block: Fix clonin...
1520
1521
1522
1523
  	if (src->rq_flags & RQF_SPECIAL_PAYLOAD) {
  		dst->rq_flags |= RQF_SPECIAL_PAYLOAD;
  		dst->special_vec = src->special_vec;
  	}
b0fd271d5   Kiyoshi Ueda   block: add reques...
1524
1525
1526
  	dst->nr_phys_segments = src->nr_phys_segments;
  	dst->ioprio = src->ioprio;
  	dst->extra_len = src->extra_len;
78d8e58a0   Mike Snitzer   Revert "block, dm...
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
  }
  
  /**
   * blk_rq_prep_clone - Helper function to setup clone request
   * @rq: the request to be setup
   * @rq_src: original request to be cloned
   * @bs: bio_set that bios for clone are allocated from
   * @gfp_mask: memory allocation mask for bio
   * @bio_ctr: setup function to be called for each clone bio.
   *           Returns %0 for success, non %0 for failure.
   * @data: private data to be passed to @bio_ctr
   *
   * Description:
   *     Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
   *     The actual data parts of @rq_src (e.g. ->cmd, ->sense)
   *     are not copied, and copying such parts is the caller's responsibility.
   *     Also, pages which the original bios are pointing to are not copied
   *     and the cloned bios just point same pages.
   *     So cloned bios must be completed before original bios, which means
   *     the caller must complete @rq before @rq_src.
   */
  int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
  		      struct bio_set *bs, gfp_t gfp_mask,
  		      int (*bio_ctr)(struct bio *, struct bio *, void *),
  		      void *data)
  {
  	struct bio *bio, *bio_src;
  
  	if (!bs)
f4f8154a0   Kent Overstreet   block: Use bioset...
1556
  		bs = &fs_bio_set;
78d8e58a0   Mike Snitzer   Revert "block, dm...
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
  
  	__rq_for_each_bio(bio_src, rq_src) {
  		bio = bio_clone_fast(bio_src, gfp_mask, bs);
  		if (!bio)
  			goto free_and_out;
  
  		if (bio_ctr && bio_ctr(bio, bio_src, data))
  			goto free_and_out;
  
  		if (rq->bio) {
  			rq->biotail->bi_next = bio;
  			rq->biotail = bio;
  		} else
  			rq->bio = rq->biotail = bio;
  	}
  
  	__blk_rq_prep_clone(rq, rq_src);
  
  	return 0;
  
  free_and_out:
  	if (bio)
  		bio_put(bio);
  	blk_rq_unprep_clone(rq);
  
  	return -ENOMEM;
b0fd271d5   Kiyoshi Ueda   block: add reques...
1583
1584
  }
  EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
59c3d45e4   Jens Axboe   block: remove 'q'...
1585
  int kblockd_schedule_work(struct work_struct *work)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1586
1587
1588
  {
  	return queue_work(kblockd_workqueue, work);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1589
  EXPORT_SYMBOL(kblockd_schedule_work);
ee63cfa7f   Jens Axboe   block: add kblock...
1590
1591
1592
1593
1594
  int kblockd_schedule_work_on(int cpu, struct work_struct *work)
  {
  	return queue_work_on(cpu, kblockd_workqueue, work);
  }
  EXPORT_SYMBOL(kblockd_schedule_work_on);
818cd1cba   Jens Axboe   block: add kblock...
1595
1596
1597
1598
1599
1600
  int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork,
  				unsigned long delay)
  {
  	return mod_delayed_work_on(cpu, kblockd_workqueue, dwork, delay);
  }
  EXPORT_SYMBOL(kblockd_mod_delayed_work_on);
75df71362   Suresh Jayaraman   block: document b...
1601
1602
1603
1604
1605
  /**
   * blk_start_plug - initialize blk_plug and track it inside the task_struct
   * @plug:	The &struct blk_plug that needs to be initialized
   *
   * Description:
40405851a   Jeff Moyer   block: clarify do...
1606
1607
1608
1609
1610
1611
1612
1613
1614
   *   blk_start_plug() indicates to the block layer an intent by the caller
   *   to submit multiple I/O requests in a batch.  The block layer may use
   *   this hint to defer submitting I/Os from the caller until blk_finish_plug()
   *   is called.  However, the block layer may choose to submit requests
   *   before a call to blk_finish_plug() if the number of queued I/Os
   *   exceeds %BLK_MAX_REQUEST_COUNT, or if the size of the I/O is larger than
   *   %BLK_PLUG_FLUSH_SIZE.  The queued I/Os may also be submitted early if
   *   the task schedules (see below).
   *
75df71362   Suresh Jayaraman   block: document b...
1615
1616
1617
1618
1619
1620
1621
1622
1623
   *   Tracking blk_plug inside the task_struct will help with auto-flushing the
   *   pending I/O should the task end up blocking between blk_start_plug() and
   *   blk_finish_plug(). This is important from a performance perspective, but
   *   also ensures that we don't deadlock. For instance, if the task is blocking
   *   for a memory allocation, memory reclaim could end up wanting to free a
   *   page belonging to that request that is currently residing in our private
   *   plug. By flushing the pending I/O when the process goes to sleep, we avoid
   *   this kind of deadlock.
   */
73c101011   Jens Axboe   block: initial pa...
1624
1625
1626
  void blk_start_plug(struct blk_plug *plug)
  {
  	struct task_struct *tsk = current;
dd6cf3e18   Shaohua Li   blk: clean up plug
1627
1628
1629
1630
1631
  	/*
  	 * If this is a nested plug, don't actually assign it.
  	 */
  	if (tsk->plug)
  		return;
320ae51fe   Jens Axboe   blk-mq: new multi...
1632
  	INIT_LIST_HEAD(&plug->mq_list);
048c9374a   NeilBrown   block: Enhance ne...
1633
  	INIT_LIST_HEAD(&plug->cb_list);
5f0ed774e   Jens Axboe   block: sum reques...
1634
  	plug->rq_count = 0;
ce5b009cf   Jens Axboe   block: improve lo...
1635
  	plug->multiple_queues = false;
5f0ed774e   Jens Axboe   block: sum reques...
1636

73c101011   Jens Axboe   block: initial pa...
1637
  	/*
dd6cf3e18   Shaohua Li   blk: clean up plug
1638
1639
  	 * Store ordering should not be needed here, since a potential
  	 * preempt will imply a full memory barrier
73c101011   Jens Axboe   block: initial pa...
1640
  	 */
dd6cf3e18   Shaohua Li   blk: clean up plug
1641
  	tsk->plug = plug;
73c101011   Jens Axboe   block: initial pa...
1642
1643
  }
  EXPORT_SYMBOL(blk_start_plug);
74018dc30   NeilBrown   blk: pass from_sc...
1644
  static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
048c9374a   NeilBrown   block: Enhance ne...
1645
1646
  {
  	LIST_HEAD(callbacks);
2a7d5559b   Shaohua Li   block: stack unplug
1647
1648
  	while (!list_empty(&plug->cb_list)) {
  		list_splice_init(&plug->cb_list, &callbacks);
048c9374a   NeilBrown   block: Enhance ne...
1649

2a7d5559b   Shaohua Li   block: stack unplug
1650
1651
  		while (!list_empty(&callbacks)) {
  			struct blk_plug_cb *cb = list_first_entry(&callbacks,
048c9374a   NeilBrown   block: Enhance ne...
1652
1653
  							  struct blk_plug_cb,
  							  list);
2a7d5559b   Shaohua Li   block: stack unplug
1654
  			list_del(&cb->list);
74018dc30   NeilBrown   blk: pass from_sc...
1655
  			cb->callback(cb, from_schedule);
2a7d5559b   Shaohua Li   block: stack unplug
1656
  		}
048c9374a   NeilBrown   block: Enhance ne...
1657
1658
  	}
  }
9cbb17508   NeilBrown   blk: centralize n...
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
  struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data,
  				      int size)
  {
  	struct blk_plug *plug = current->plug;
  	struct blk_plug_cb *cb;
  
  	if (!plug)
  		return NULL;
  
  	list_for_each_entry(cb, &plug->cb_list, list)
  		if (cb->callback == unplug && cb->data == data)
  			return cb;
  
  	/* Not currently on the callback list */
  	BUG_ON(size < sizeof(*cb));
  	cb = kzalloc(size, GFP_ATOMIC);
  	if (cb) {
  		cb->data = data;
  		cb->callback = unplug;
  		list_add(&cb->list, &plug->cb_list);
  	}
  	return cb;
  }
  EXPORT_SYMBOL(blk_check_plugged);
49cac01e1   Jens Axboe   block: make unplu...
1683
  void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
73c101011   Jens Axboe   block: initial pa...
1684
  {
74018dc30   NeilBrown   blk: pass from_sc...
1685
  	flush_plug_callbacks(plug, from_schedule);
320ae51fe   Jens Axboe   blk-mq: new multi...
1686
1687
1688
  
  	if (!list_empty(&plug->mq_list))
  		blk_mq_flush_plug_list(plug, from_schedule);
73c101011   Jens Axboe   block: initial pa...
1689
  }
73c101011   Jens Axboe   block: initial pa...
1690

40405851a   Jeff Moyer   block: clarify do...
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
  /**
   * blk_finish_plug - mark the end of a batch of submitted I/O
   * @plug:	The &struct blk_plug passed to blk_start_plug()
   *
   * Description:
   * Indicate that a batch of I/O submissions is complete.  This function
   * must be paired with an initial call to blk_start_plug().  The intent
   * is to allow the block layer to optimize I/O submission.  See the
   * documentation for blk_start_plug() for more information.
   */
73c101011   Jens Axboe   block: initial pa...
1701
1702
  void blk_finish_plug(struct blk_plug *plug)
  {
dd6cf3e18   Shaohua Li   blk: clean up plug
1703
1704
  	if (plug != current->plug)
  		return;
f6603783f   Jens Axboe   block: only force...
1705
  	blk_flush_plug_list(plug, false);
73c101011   Jens Axboe   block: initial pa...
1706

dd6cf3e18   Shaohua Li   blk: clean up plug
1707
  	current->plug = NULL;
73c101011   Jens Axboe   block: initial pa...
1708
  }
88b996cd0   Christoph Hellwig   block: cleanup th...
1709
  EXPORT_SYMBOL(blk_finish_plug);
73c101011   Jens Axboe   block: initial pa...
1710

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1711
1712
  int __init blk_dev_init(void)
  {
ef295ecf0   Christoph Hellwig   block: better op ...
1713
1714
  	BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
  	BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
0762b23d2   Maninder Singh   block: use FIELD_...
1715
  			FIELD_SIZEOF(struct request, cmd_flags));
ef295ecf0   Christoph Hellwig   block: better op ...
1716
1717
  	BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
  			FIELD_SIZEOF(struct bio, bi_opf));
9eb55b030   Nikanth Karthikesan   block: catch tryi...
1718

89b90be2d   Tejun Heo   block: make kbloc...
1719
1720
  	/* used for unplugging and affects IO latency/throughput - HIGHPRI */
  	kblockd_workqueue = alloc_workqueue("kblockd",
28747fcd2   Matias Bjørling   block: remove WQ_...
1721
  					    WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1722
1723
1724
  	if (!kblockd_workqueue)
  		panic("Failed to create kblockd
  ");
c2789bd40   Ilya Dryomov   block: rename req...
1725
  	blk_requestq_cachep = kmem_cache_create("request_queue",
165125e1e   Jens Axboe   [BLOCK] Get rid o...
1726
  			sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1727

18fbda91c   Omar Sandoval   block: use same b...
1728
1729
1730
  #ifdef CONFIG_DEBUG_FS
  	blk_debugfs_root = debugfs_create_dir("block", NULL);
  #endif
d38ecf935   Jens Axboe   io context sharin...
1731
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1732
  }