Blame view

block/blk-core.c 48 KB
3dcf60bcb   Christoph Hellwig   block: add SPDX t...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3
4
5
6
   * Copyright (C) 1991, 1992 Linus Torvalds
   * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
   * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
   * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
6728cb0e6   Jens Axboe   block: make core ...
7
8
   * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>
   *	-  July2000
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
10
11
12
13
14
   * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001
   */
  
  /*
   * This handles all read/write requests to block devices
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
15
16
17
18
19
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/backing-dev.h>
  #include <linux/bio.h>
  #include <linux/blkdev.h>
320ae51fe   Jens Axboe   blk-mq: new multi...
20
  #include <linux/blk-mq.h>
d55d15a33   Alan Stern   scsi: block: Do n...
21
  #include <linux/blk-pm.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
22
23
  #include <linux/highmem.h>
  #include <linux/mm.h>
cee9a0c4e   Matthew Wilcox (Oracle)   mm: move readahea...
24
  #include <linux/pagemap.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
25
26
27
  #include <linux/kernel_stat.h>
  #include <linux/string.h>
  #include <linux/init.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
28
29
30
31
  #include <linux/completion.h>
  #include <linux/slab.h>
  #include <linux/swap.h>
  #include <linux/writeback.h>
faccbd4b2   Andrew Morton   [PATCH] io-accoun...
32
  #include <linux/task_io_accounting_ops.h>
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
33
  #include <linux/fault-inject.h>
73c101011   Jens Axboe   block: initial pa...
34
  #include <linux/list_sort.h>
e3c78ca52   Tejun Heo   block: reorganize...
35
  #include <linux/delay.h>
aaf7c6806   Tejun Heo   block: fix elvpri...
36
  #include <linux/ratelimit.h>
6c9546675   Lin Ming   block: add runtim...
37
  #include <linux/pm_runtime.h>
eea8f41cc   Tejun Heo   blkcg: move block...
38
  #include <linux/blk-cgroup.h>
54d4e6ab9   Max Gurtovoy   block: centralize...
39
  #include <linux/t10-pi.h>
18fbda91c   Omar Sandoval   block: use same b...
40
  #include <linux/debugfs.h>
30abb3a67   Howard McLauchlan   block: Add should...
41
  #include <linux/bpf.h>
b8e24a930   Johannes Weiner   block: annotate r...
42
  #include <linux/psi.h>
71ac860af   Ming Lei   block: move blk_i...
43
  #include <linux/sched/sysctl.h>
a892c8d52   Satya Tangirala   block: Inline enc...
44
  #include <linux/blk-crypto.h>
55782138e   Li Zefan   tracing/events: c...
45
46
47
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/block.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
48

8324aa91d   Jens Axboe   block: split tag ...
49
  #include "blk.h"
43a5e4e21   Ming Lei   block: blk-mq: su...
50
  #include "blk-mq.h"
bd166ef18   Jens Axboe   blk-mq-sched: add...
51
  #include "blk-mq-sched.h"
bca6b067b   Bart Van Assche   block: Move power...
52
  #include "blk-pm.h"
c1c80384c   Josef Bacik   block: remove ext...
53
  #include "blk-rq-qos.h"
8324aa91d   Jens Axboe   block: split tag ...
54

18fbda91c   Omar Sandoval   block: use same b...
55
  struct dentry *blk_debugfs_root;
18fbda91c   Omar Sandoval   block: use same b...
56

d07335e51   Mike Snitzer   block: Rename "bl...
57
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
b0da3f0da   Jun'ichi Nomura   Add a tracepoint ...
58
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
0a82a8d13   Linus Torvalds   Revert "block: ad...
59
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
3291fa57c   Keith Busch   NVMe: Add tracepo...
60
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_split);
cbae8d45d   NeilBrown   block: export blo...
61
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
0bfc24559   Ingo Molnar   blktrace: port to...
62

a73f730d0   Tejun Heo   block, cfq: move ...
63
  DEFINE_IDA(blk_queue_ida);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
64
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
65
66
   * For queue allocation
   */
6728cb0e6   Jens Axboe   block: make core ...
67
  struct kmem_cache *blk_requestq_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
68
69
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
70
71
   * Controlling structure to kblockd
   */
ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
72
  static struct workqueue_struct *kblockd_workqueue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
73

8814ce8a0   Bart Van Assche   block: Introduce ...
74
75
76
77
78
79
80
  /**
   * blk_queue_flag_set - atomically set a queue flag
   * @flag: flag to be set
   * @q: request queue
   */
  void blk_queue_flag_set(unsigned int flag, struct request_queue *q)
  {
57d74df90   Christoph Hellwig   block: use atomic...
81
  	set_bit(flag, &q->queue_flags);
8814ce8a0   Bart Van Assche   block: Introduce ...
82
83
84
85
86
87
88
89
90
91
  }
  EXPORT_SYMBOL(blk_queue_flag_set);
  
  /**
   * blk_queue_flag_clear - atomically clear a queue flag
   * @flag: flag to be cleared
   * @q: request queue
   */
  void blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
  {
57d74df90   Christoph Hellwig   block: use atomic...
92
  	clear_bit(flag, &q->queue_flags);
8814ce8a0   Bart Van Assche   block: Introduce ...
93
94
95
96
97
98
99
100
101
102
103
104
105
  }
  EXPORT_SYMBOL(blk_queue_flag_clear);
  
  /**
   * blk_queue_flag_test_and_set - atomically test and set a queue flag
   * @flag: flag to be set
   * @q: request queue
   *
   * Returns the previous value of @flag - 0 if the flag was not set and 1 if
   * the flag was already set.
   */
  bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q)
  {
57d74df90   Christoph Hellwig   block: use atomic...
106
  	return test_and_set_bit(flag, &q->queue_flags);
8814ce8a0   Bart Van Assche   block: Introduce ...
107
108
  }
  EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set);
2a4aa30c5   FUJITA Tomonori   block: rename and...
109
  void blk_rq_init(struct request_queue *q, struct request *rq)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
110
  {
1afb20f30   FUJITA Tomonori   block: make rq_in...
111
  	memset(rq, 0, sizeof(*rq));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
112
  	INIT_LIST_HEAD(&rq->queuelist);
63a713867   Jens Axboe   block: fixup rq_i...
113
  	rq->q = q;
a2dec7b36   Tejun Heo   block: hide reque...
114
  	rq->__sector = (sector_t) -1;
2e662b65f   Jens Axboe   [PATCH] elevator:...
115
116
  	INIT_HLIST_NODE(&rq->hash);
  	RB_CLEAR_NODE(&rq->rb_node);
e44a6a235   Xianting Tian   blk-mq: use BLK_M...
117
118
  	rq->tag = BLK_MQ_NO_TAG;
  	rq->internal_tag = BLK_MQ_NO_TAG;
522a77756   Omar Sandoval   block: consolidat...
119
  	rq->start_time_ns = ktime_get_ns();
09e099d4b   Jerome Marchand   block: fix accoun...
120
  	rq->part = NULL;
b554db147   Josef Bacik   block: init flush...
121
  	refcount_set(&rq->ref, 1);
a892c8d52   Satya Tangirala   block: Inline enc...
122
  	blk_crypto_rq_set_defaults(rq);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
123
  }
2a4aa30c5   FUJITA Tomonori   block: rename and...
124
  EXPORT_SYMBOL(blk_rq_init);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
125

e47bc4eda   Chaitanya Kulkarni   block: add centra...
126
127
128
129
130
131
132
133
  #define REQ_OP_NAME(name) [REQ_OP_##name] = #name
  static const char *const blk_op_name[] = {
  	REQ_OP_NAME(READ),
  	REQ_OP_NAME(WRITE),
  	REQ_OP_NAME(FLUSH),
  	REQ_OP_NAME(DISCARD),
  	REQ_OP_NAME(SECURE_ERASE),
  	REQ_OP_NAME(ZONE_RESET),
6e33dbf28   Chaitanya Kulkarni   blk-zoned: implem...
134
  	REQ_OP_NAME(ZONE_RESET_ALL),
6c1b1da58   Ajay Joshi   block: add zone o...
135
136
137
  	REQ_OP_NAME(ZONE_OPEN),
  	REQ_OP_NAME(ZONE_CLOSE),
  	REQ_OP_NAME(ZONE_FINISH),
0512a75b9   Keith Busch   block: Introduce ...
138
  	REQ_OP_NAME(ZONE_APPEND),
e47bc4eda   Chaitanya Kulkarni   block: add centra...
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
  	REQ_OP_NAME(WRITE_SAME),
  	REQ_OP_NAME(WRITE_ZEROES),
  	REQ_OP_NAME(SCSI_IN),
  	REQ_OP_NAME(SCSI_OUT),
  	REQ_OP_NAME(DRV_IN),
  	REQ_OP_NAME(DRV_OUT),
  };
  #undef REQ_OP_NAME
  
  /**
   * blk_op_str - Return string XXX in the REQ_OP_XXX.
   * @op: REQ_OP_XXX.
   *
   * Description: Centralize block layer function to convert REQ_OP_XXX into
   * string format. Useful in the debugging and tracing bio or request. For
   * invalid REQ_OP_XXX it returns string "UNKNOWN".
   */
  inline const char *blk_op_str(unsigned int op)
  {
  	const char *op_str = "UNKNOWN";
  
  	if (op < ARRAY_SIZE(blk_op_name) && blk_op_name[op])
  		op_str = blk_op_name[op];
  
  	return op_str;
  }
  EXPORT_SYMBOL_GPL(blk_op_str);
2a842acab   Christoph Hellwig   block: introduce ...
166
167
168
169
170
171
172
173
174
175
176
177
178
179
  static const struct {
  	int		errno;
  	const char	*name;
  } blk_errors[] = {
  	[BLK_STS_OK]		= { 0,		"" },
  	[BLK_STS_NOTSUPP]	= { -EOPNOTSUPP, "operation not supported" },
  	[BLK_STS_TIMEOUT]	= { -ETIMEDOUT,	"timeout" },
  	[BLK_STS_NOSPC]		= { -ENOSPC,	"critical space allocation" },
  	[BLK_STS_TRANSPORT]	= { -ENOLINK,	"recoverable transport" },
  	[BLK_STS_TARGET]	= { -EREMOTEIO,	"critical target" },
  	[BLK_STS_NEXUS]		= { -EBADE,	"critical nexus" },
  	[BLK_STS_MEDIUM]	= { -ENODATA,	"critical medium" },
  	[BLK_STS_PROTECTION]	= { -EILSEQ,	"protection" },
  	[BLK_STS_RESOURCE]	= { -ENOMEM,	"kernel resource" },
86ff7c2a8   Ming Lei   blk-mq: introduce...
180
  	[BLK_STS_DEV_RESOURCE]	= { -EBUSY,	"device resource" },
03a07c92a   Goldwyn Rodrigues   block: return on ...
181
  	[BLK_STS_AGAIN]		= { -EAGAIN,	"nonblocking retry" },
2a842acab   Christoph Hellwig   block: introduce ...
182

4e4cbee93   Christoph Hellwig   block: switch bio...
183
184
  	/* device mapper special case, should not leak out: */
  	[BLK_STS_DM_REQUEUE]	= { -EREMCHG, "dm internal retry" },
3b481d913   Keith Busch   block: add zone s...
185
186
187
  	/* zone device specific errors */
  	[BLK_STS_ZONE_OPEN_RESOURCE]	= { -ETOOMANYREFS, "open zones exceeded" },
  	[BLK_STS_ZONE_ACTIVE_RESOURCE]	= { -EOVERFLOW, "active zones exceeded" },
2a842acab   Christoph Hellwig   block: introduce ...
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
  	/* everything else not covered above: */
  	[BLK_STS_IOERR]		= { -EIO,	"I/O" },
  };
  
  blk_status_t errno_to_blk_status(int errno)
  {
  	int i;
  
  	for (i = 0; i < ARRAY_SIZE(blk_errors); i++) {
  		if (blk_errors[i].errno == errno)
  			return (__force blk_status_t)i;
  	}
  
  	return BLK_STS_IOERR;
  }
  EXPORT_SYMBOL_GPL(errno_to_blk_status);
  
  int blk_status_to_errno(blk_status_t status)
  {
  	int idx = (__force int)status;
34bd9c1c4   Bart Van Assche   block: Fix off-by...
208
  	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
2a842acab   Christoph Hellwig   block: introduce ...
209
210
211
212
  		return -EIO;
  	return blk_errors[idx].errno;
  }
  EXPORT_SYMBOL_GPL(blk_status_to_errno);
178cc590e   Christoph Hellwig   block: improve pr...
213
214
  static void print_req_error(struct request *req, blk_status_t status,
  		const char *caller)
2a842acab   Christoph Hellwig   block: introduce ...
215
216
  {
  	int idx = (__force int)status;
34bd9c1c4   Bart Van Assche   block: Fix off-by...
217
  	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
2a842acab   Christoph Hellwig   block: introduce ...
218
  		return;
178cc590e   Christoph Hellwig   block: improve pr...
219
  	printk_ratelimited(KERN_ERR
b0e5168a7   Chaitanya Kulkarni   block: update pri...
220
221
222
  		"%s: %s error, dev %s, sector %llu op 0x%x:(%s) flags 0x%x "
  		"phys_seg %u prio class %u
  ",
178cc590e   Christoph Hellwig   block: improve pr...
223
  		caller, blk_errors[idx].name,
b0e5168a7   Chaitanya Kulkarni   block: update pri...
224
225
226
227
228
  		req->rq_disk ? req->rq_disk->disk_name : "?",
  		blk_rq_pos(req), req_op(req), blk_op_str(req_op(req)),
  		req->cmd_flags & ~REQ_OP_MASK,
  		req->nr_phys_segments,
  		IOPRIO_PRIO_CLASS(req->ioprio));
2a842acab   Christoph Hellwig   block: introduce ...
229
  }
5bb23a688   NeilBrown   Don't decrement b...
230
  static void req_bio_endio(struct request *rq, struct bio *bio,
2a842acab   Christoph Hellwig   block: introduce ...
231
  			  unsigned int nbytes, blk_status_t error)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
232
  {
78d8e58a0   Mike Snitzer   Revert "block, dm...
233
  	if (error)
4e4cbee93   Christoph Hellwig   block: switch bio...
234
  		bio->bi_status = error;
797e7dbbe   Tejun Heo   [BLOCK] reimpleme...
235

e80640213   Christoph Hellwig   block: split out ...
236
  	if (unlikely(rq->rq_flags & RQF_QUIET))
b7c44ed9d   Jens Axboe   block: manipulate...
237
  		bio_set_flag(bio, BIO_QUIET);
08bafc034   Keith Mannthey   block: Supress Bu...
238

f79ea4161   Kent Overstreet   block: Refactor b...
239
  	bio_advance(bio, nbytes);
7ba1ba12e   Martin K. Petersen   block: Block laye...
240

0512a75b9   Keith Busch   block: Introduce ...
241
242
243
244
245
246
247
248
249
250
  	if (req_op(rq) == REQ_OP_ZONE_APPEND && error == BLK_STS_OK) {
  		/*
  		 * Partial zone append completions cannot be supported as the
  		 * BIO fragments may end up not being written sequentially.
  		 */
  		if (bio->bi_iter.bi_size)
  			bio->bi_status = BLK_STS_IOERR;
  		else
  			bio->bi_iter.bi_sector = rq->__sector;
  	}
143a87f4c   Tejun Heo   block: improve fl...
251
  	/* don't actually finish bio if it's part of flush sequence */
e80640213   Christoph Hellwig   block: split out ...
252
  	if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
4246a0b63   Christoph Hellwig   block: add a bi_e...
253
  		bio_endio(bio);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
254
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
255

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
256
257
  void blk_dump_rq_flags(struct request *rq, char *msg)
  {
aebf526b5   Christoph Hellwig   block: fold cmd_t...
258
259
260
  	printk(KERN_INFO "%s: dev %s: flags=%llx
  ", msg,
  		rq->rq_disk ? rq->rq_disk->disk_name : "?",
5953316db   Jens Axboe   block: make rq->c...
261
  		(unsigned long long) rq->cmd_flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
262

83096ebf1   Tejun Heo   block: convert to...
263
264
265
266
  	printk(KERN_INFO "  sector %llu, nr/cnr %u/%u
  ",
  	       (unsigned long long)blk_rq_pos(rq),
  	       blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
b4f42e283   Jens Axboe   block: remove str...
267
268
269
  	printk(KERN_INFO "  bio %p, biotail %p, len %u
  ",
  	       rq->bio, rq->biotail, blk_rq_bytes(rq));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
270
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
271
  EXPORT_SYMBOL(blk_dump_rq_flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
272
273
274
275
276
277
278
279
280
  /**
   * blk_sync_queue - cancel any pending callbacks on a queue
   * @q: the queue
   *
   * Description:
   *     The block layer may perform asynchronous callback activity
   *     on a queue, such as calling the unplug function after a timeout.
   *     A block device may call blk_sync_queue to ensure that any
   *     such activity is cancelled, thus allowing it to release resources
59c51591a   Michael Opdenacker   Fix occurrences o...
281
   *     that the callbacks might use. The caller must already have made sure
c62b37d96   Christoph Hellwig   block: move ->mak...
282
   *     that its ->submit_bio will not re-add plugging prior to calling
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
283
284
   *     this function.
   *
da5277700   Vivek Goyal   block: Move blk_t...
285
   *     This function does not cancel any asynchronous activity arising
da3dae54e   Masanari Iida   Documentation: Do...
286
   *     out of elevator or throttling code. That would require elevator_exit()
5efd61135   Tejun Heo   blkcg: add blkcg_...
287
   *     and blkcg_exit_queue() to be called with queue lock initialized.
da5277700   Vivek Goyal   block: Move blk_t...
288
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
289
290
291
   */
  void blk_sync_queue(struct request_queue *q)
  {
70ed28b92   Jens Axboe   block: leave the ...
292
  	del_timer_sync(&q->timeout);
4e9b6f208   Bart Van Assche   block: Fix a race...
293
  	cancel_work_sync(&q->timeout_work);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
294
295
296
297
  }
  EXPORT_SYMBOL(blk_sync_queue);
  
  /**
cd84a62e0   Bart Van Assche   block, scsi: Chan...
298
   * blk_set_pm_only - increment pm_only counter
c9254f2dd   Bart Van Assche   block: Add the QU...
299
   * @q: request queue pointer
c9254f2dd   Bart Van Assche   block: Add the QU...
300
   */
cd84a62e0   Bart Van Assche   block, scsi: Chan...
301
  void blk_set_pm_only(struct request_queue *q)
c9254f2dd   Bart Van Assche   block: Add the QU...
302
  {
cd84a62e0   Bart Van Assche   block, scsi: Chan...
303
  	atomic_inc(&q->pm_only);
c9254f2dd   Bart Van Assche   block: Add the QU...
304
  }
cd84a62e0   Bart Van Assche   block, scsi: Chan...
305
  EXPORT_SYMBOL_GPL(blk_set_pm_only);
c9254f2dd   Bart Van Assche   block: Add the QU...
306

cd84a62e0   Bart Van Assche   block, scsi: Chan...
307
  void blk_clear_pm_only(struct request_queue *q)
c9254f2dd   Bart Van Assche   block: Add the QU...
308
  {
cd84a62e0   Bart Van Assche   block, scsi: Chan...
309
310
311
312
313
314
  	int pm_only;
  
  	pm_only = atomic_dec_return(&q->pm_only);
  	WARN_ON_ONCE(pm_only < 0);
  	if (pm_only == 0)
  		wake_up_all(&q->mq_freeze_wq);
c9254f2dd   Bart Van Assche   block: Add the QU...
315
  }
cd84a62e0   Bart Van Assche   block, scsi: Chan...
316
  EXPORT_SYMBOL_GPL(blk_clear_pm_only);
c9254f2dd   Bart Van Assche   block: Add the QU...
317

b5bd357cf   Luis Chamberlain   block: add docs f...
318
319
320
321
322
323
  /**
   * blk_put_queue - decrement the request_queue refcount
   * @q: the request_queue structure to decrement the refcount for
   *
   * Decrements the refcount of the request_queue kobject. When this reaches 0
   * we'll have blk_release_queue() called.
e8c7d14ac   Luis Chamberlain   block: revert bac...
324
325
326
   *
   * Context: Any context, but the last reference must not be dropped from
   *          atomic context.
b5bd357cf   Luis Chamberlain   block: add docs f...
327
   */
165125e1e   Jens Axboe   [BLOCK] Get rid o...
328
  void blk_put_queue(struct request_queue *q)
483f4afc4   Al Viro   [PATCH] fix sysfs...
329
330
331
  {
  	kobject_put(&q->kobj);
  }
d86e0e83b   Jens Axboe   block: export blk...
332
  EXPORT_SYMBOL(blk_put_queue);
483f4afc4   Al Viro   [PATCH] fix sysfs...
333

aed3ea94b   Jens Axboe   block: wake up wa...
334
335
  void blk_set_queue_dying(struct request_queue *q)
  {
8814ce8a0   Bart Van Assche   block: Introduce ...
336
  	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
aed3ea94b   Jens Axboe   block: wake up wa...
337

d3cfb2a0a   Ming Lei   block: block new ...
338
339
340
341
342
343
  	/*
  	 * When queue DYING flag is set, we need to block new req
  	 * entering queue, so we call blk_freeze_queue_start() to
  	 * prevent I/O from crossing blk_queue_enter().
  	 */
  	blk_freeze_queue_start(q);
344e9ffcb   Jens Axboe   block: add queue_...
344
  	if (queue_is_mq(q))
aed3ea94b   Jens Axboe   block: wake up wa...
345
  		blk_mq_wake_waiters(q);
055f6e18e   Ming Lei   block: Make q_usa...
346
347
348
  
  	/* Make blk_queue_enter() reexamine the DYING flag. */
  	wake_up_all(&q->mq_freeze_wq);
aed3ea94b   Jens Axboe   block: wake up wa...
349
350
  }
  EXPORT_SYMBOL_GPL(blk_set_queue_dying);
d732580b4   Tejun Heo   block: implement ...
351
  /**
c9a929dde   Tejun Heo   block: fix reques...
352
353
354
   * blk_cleanup_queue - shutdown a request queue
   * @q: request queue to shutdown
   *
c246e80d8   Bart Van Assche   block: Avoid that...
355
356
   * Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and
   * put it.  All future requests will be failed immediately with -ENODEV.
e8c7d14ac   Luis Chamberlain   block: revert bac...
357
358
   *
   * Context: can sleep
c94a96ac9   Vivek Goyal   block: Initialize...
359
   */
6728cb0e6   Jens Axboe   block: make core ...
360
  void blk_cleanup_queue(struct request_queue *q)
483f4afc4   Al Viro   [PATCH] fix sysfs...
361
  {
e8c7d14ac   Luis Chamberlain   block: revert bac...
362
363
  	/* cannot be called from atomic context */
  	might_sleep();
bae85c156   Bart Van Assche   block: Remove "dy...
364
  	WARN_ON_ONCE(blk_queue_registered(q));
3f3299d5c   Bart Van Assche   block: Rename que...
365
  	/* mark @q DYING, no new request or merges will be allowed afterwards */
aed3ea94b   Jens Axboe   block: wake up wa...
366
  	blk_set_queue_dying(q);
6ecf23afa   Tejun Heo   block: extend que...
367

57d74df90   Christoph Hellwig   block: use atomic...
368
369
  	blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
  	blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
c9a929dde   Tejun Heo   block: fix reques...
370

c246e80d8   Bart Van Assche   block: Avoid that...
371
372
  	/*
  	 * Drain all requests queued before DYING marking. Set DEAD flag to
67ed8b738   Bart Van Assche   block: Fix a comm...
373
374
  	 * prevent that blk_mq_run_hw_queues() accesses the hardware queues
  	 * after draining finished.
c246e80d8   Bart Van Assche   block: Avoid that...
375
  	 */
3ef28e83a   Dan Williams   block: generic re...
376
  	blk_freeze_queue(q);
c57cdf7a9   Ming Lei   block: call rq_qo...
377
378
  
  	rq_qos_exit(q);
57d74df90   Christoph Hellwig   block: use atomic...
379
  	blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
c9a929dde   Tejun Heo   block: fix reques...
380

5a48fc147   Dan Williams   block: blk_flush_...
381
382
  	/* for synchronous bio-based driver finish in-flight integrity i/o */
  	blk_flush_integrity();
c9a929dde   Tejun Heo   block: fix reques...
383
  	/* @q won't process any more request, flush async actions */
dc3b17cc8   Jan Kara   block: Use pointe...
384
  	del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer);
c9a929dde   Tejun Heo   block: fix reques...
385
  	blk_sync_queue(q);
344e9ffcb   Jens Axboe   block: add queue_...
386
  	if (queue_is_mq(q))
c7e2d94b3   Ming Lei   blk-mq: free hw q...
387
  		blk_mq_exit_queue(q);
a1ce35fa4   Jens Axboe   block: remove dea...
388

c3e221921   Ming Lei   block: free sched...
389
390
391
392
393
394
395
396
397
398
399
400
  	/*
  	 * In theory, request pool of sched_tags belongs to request queue.
  	 * However, the current implementation requires tag_set for freeing
  	 * requests, so free the pool now.
  	 *
  	 * Queue has become frozen, there can't be any in-queue requests, so
  	 * it is safe to free requests now.
  	 */
  	mutex_lock(&q->sysfs_lock);
  	if (q->elevator)
  		blk_mq_sched_free_requests(q);
  	mutex_unlock(&q->sysfs_lock);
3ef28e83a   Dan Williams   block: generic re...
401
  	percpu_ref_exit(&q->q_usage_counter);
45a9c9d90   Bart Van Assche   blk-mq: Fix a use...
402

c9a929dde   Tejun Heo   block: fix reques...
403
  	/* @q is and will stay empty, shutdown and put */
483f4afc4   Al Viro   [PATCH] fix sysfs...
404
405
  	blk_put_queue(q);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
406
  EXPORT_SYMBOL(blk_cleanup_queue);
3a0a52997   Bart Van Assche   block, scsi: Make...
407
408
409
  /**
   * blk_queue_enter() - try to increase q->q_usage_counter
   * @q: request queue pointer
782c9ef2a   Bart Van Assche   scsi: block: Remo...
410
   * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PM
3a0a52997   Bart Van Assche   block, scsi: Make...
411
   */
9a95e4ef7   Bart Van Assche   block, nvme: Intr...
412
  int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
3ef28e83a   Dan Williams   block: generic re...
413
  {
782c9ef2a   Bart Van Assche   scsi: block: Remo...
414
  	const bool pm = flags & BLK_MQ_REQ_PM;
3a0a52997   Bart Van Assche   block, scsi: Make...
415

3ef28e83a   Dan Williams   block: generic re...
416
  	while (true) {
3a0a52997   Bart Van Assche   block, scsi: Make...
417
  		bool success = false;
3ef28e83a   Dan Williams   block: generic re...
418

818e0fa29   Bart Van Assche   block: Change a r...
419
  		rcu_read_lock();
3a0a52997   Bart Van Assche   block, scsi: Make...
420
421
  		if (percpu_ref_tryget_live(&q->q_usage_counter)) {
  			/*
cd84a62e0   Bart Van Assche   block, scsi: Chan...
422
423
424
  			 * The code that increments the pm_only counter is
  			 * responsible for ensuring that that counter is
  			 * globally visible before the queue is unfrozen.
3a0a52997   Bart Van Assche   block, scsi: Make...
425
  			 */
d55d15a33   Alan Stern   scsi: block: Do n...
426
427
  			if ((pm && queue_rpm_status(q) != RPM_SUSPENDED) ||
  			    !blk_queue_pm_only(q)) {
3a0a52997   Bart Van Assche   block, scsi: Make...
428
429
430
431
432
  				success = true;
  			} else {
  				percpu_ref_put(&q->q_usage_counter);
  			}
  		}
818e0fa29   Bart Van Assche   block: Change a r...
433
  		rcu_read_unlock();
3a0a52997   Bart Van Assche   block, scsi: Make...
434
435
  
  		if (success)
3ef28e83a   Dan Williams   block: generic re...
436
  			return 0;
3a0a52997   Bart Van Assche   block, scsi: Make...
437
  		if (flags & BLK_MQ_REQ_NOWAIT)
3ef28e83a   Dan Williams   block: generic re...
438
  			return -EBUSY;
5ed61d3f0   Ming Lei   block: add a read...
439
  		/*
1671d522c   Ming Lei   block: rename blk...
440
  		 * read pair of barrier in blk_freeze_queue_start(),
5ed61d3f0   Ming Lei   block: add a read...
441
  		 * we need to order reading __PERCPU_REF_DEAD flag of
d3cfb2a0a   Ming Lei   block: block new ...
442
443
444
  		 * .q_usage_counter and reading .mq_freeze_depth or
  		 * queue dying flag, otherwise the following wait may
  		 * never return if the two reads are reordered.
5ed61d3f0   Ming Lei   block: add a read...
445
446
  		 */
  		smp_rmb();
1dc3039bc   Alan Jenkins   block: do not use...
447
  		wait_event(q->mq_freeze_wq,
7996a8b55   Bob Liu   blk-mq: fix hang ...
448
  			   (!q->mq_freeze_depth &&
d55d15a33   Alan Stern   scsi: block: Do n...
449
  			    blk_pm_resume_queue(pm, q)) ||
1dc3039bc   Alan Jenkins   block: do not use...
450
  			   blk_queue_dying(q));
3ef28e83a   Dan Williams   block: generic re...
451
452
  		if (blk_queue_dying(q))
  			return -ENODEV;
3ef28e83a   Dan Williams   block: generic re...
453
454
  	}
  }
accea322f   Christoph Hellwig   block: add a bio_...
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
  static inline int bio_queue_enter(struct bio *bio)
  {
  	struct request_queue *q = bio->bi_disk->queue;
  	bool nowait = bio->bi_opf & REQ_NOWAIT;
  	int ret;
  
  	ret = blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0);
  	if (unlikely(ret)) {
  		if (nowait && !blk_queue_dying(q))
  			bio_wouldblock_error(bio);
  		else
  			bio_io_error(bio);
  	}
  
  	return ret;
  }
3ef28e83a   Dan Williams   block: generic re...
471
472
473
474
475
476
477
478
479
480
481
482
  void blk_queue_exit(struct request_queue *q)
  {
  	percpu_ref_put(&q->q_usage_counter);
  }
  
  static void blk_queue_usage_counter_release(struct percpu_ref *ref)
  {
  	struct request_queue *q =
  		container_of(ref, struct request_queue, q_usage_counter);
  
  	wake_up_all(&q->mq_freeze_wq);
  }
bca237a52   Kees Cook   block/laptop_mode...
483
  static void blk_rq_timed_out_timer(struct timer_list *t)
287922eb0   Christoph Hellwig   block: defer time...
484
  {
bca237a52   Kees Cook   block/laptop_mode...
485
  	struct request_queue *q = from_timer(q, t, timeout);
287922eb0   Christoph Hellwig   block: defer time...
486
487
488
  
  	kblockd_schedule_work(&q->timeout_work);
  }
2e3c18d0a   Tetsuo Handa   block: pass no-op...
489
490
491
  static void blk_timeout_work(struct work_struct *work)
  {
  }
c62b37d96   Christoph Hellwig   block: move ->mak...
492
  struct request_queue *blk_alloc_queue(int node_id)
1946089a1   Christoph Lameter   [PATCH] NUMA awar...
493
  {
165125e1e   Jens Axboe   [BLOCK] Get rid o...
494
  	struct request_queue *q;
338aa96d5   Kent Overstreet   block: convert bo...
495
  	int ret;
1946089a1   Christoph Lameter   [PATCH] NUMA awar...
496

8324aa91d   Jens Axboe   block: split tag ...
497
  	q = kmem_cache_alloc_node(blk_requestq_cachep,
3d745ea5b   Christoph Hellwig   block: simplify q...
498
  				GFP_KERNEL | __GFP_ZERO, node_id);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
499
500
  	if (!q)
  		return NULL;
cbf62af35   Christoph Hellwig   block: move initi...
501
  	q->last_merge = NULL;
cbf62af35   Christoph Hellwig   block: move initi...
502

3d745ea5b   Christoph Hellwig   block: simplify q...
503
  	q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL);
a73f730d0   Tejun Heo   block, cfq: move ...
504
  	if (q->id < 0)
3d2936f45   Ming Lei   block: only alloc...
505
  		goto fail_q;
a73f730d0   Tejun Heo   block, cfq: move ...
506

338aa96d5   Kent Overstreet   block: convert bo...
507
508
  	ret = bioset_init(&q->bio_split, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
  	if (ret)
54efd50bf   Kent Overstreet   block: make gener...
509
  		goto fail_id;
aef33c2ff   Christoph Hellwig   bdi: simplify bdi...
510
  	q->backing_dev_info = bdi_alloc(node_id);
d03f6cdc1   Jan Kara   block: Dynamicall...
511
512
  	if (!q->backing_dev_info)
  		goto fail_split;
a83b576c9   Jens Axboe   block: fix stacke...
513
514
515
  	q->stats = blk_alloc_queue_stats();
  	if (!q->stats)
  		goto fail_stats;
5151412dd   Mike Snitzer   block: initialize...
516
  	q->node = node_id;
0989a025d   Jens Axboe   block: don't over...
517

bccf5e26d   John Garry   blk-mq: Record nr...
518
  	atomic_set(&q->nr_active_requests_shared_sbitmap, 0);
bca237a52   Kees Cook   block/laptop_mode...
519
520
521
  	timer_setup(&q->backing_dev_info->laptop_mode_wb_timer,
  		    laptop_mode_timer_fn, 0);
  	timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
2e3c18d0a   Tetsuo Handa   block: pass no-op...
522
  	INIT_WORK(&q->timeout_work, blk_timeout_work);
a612fddf0   Tejun Heo   block, cfq: move ...
523
  	INIT_LIST_HEAD(&q->icq_list);
4eef30499   Tejun Heo   blkcg: move per-q...
524
  #ifdef CONFIG_BLK_CGROUP
e8989fae3   Tejun Heo   blkcg: unify blkg...
525
  	INIT_LIST_HEAD(&q->blkg_list);
4eef30499   Tejun Heo   blkcg: move per-q...
526
  #endif
483f4afc4   Al Viro   [PATCH] fix sysfs...
527

8324aa91d   Jens Axboe   block: split tag ...
528
  	kobject_init(&q->kobj, &blk_queue_ktype);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
529

85e0cbbb8   Luis Chamberlain   block: create the...
530
  	mutex_init(&q->debugfs_mutex);
483f4afc4   Al Viro   [PATCH] fix sysfs...
531
  	mutex_init(&q->sysfs_lock);
cecf5d87f   Ming Lei   block: split .sys...
532
  	mutex_init(&q->sysfs_dir_lock);
0d945c1f9   Christoph Hellwig   block: remove the...
533
  	spin_lock_init(&q->queue_lock);
c94a96ac9   Vivek Goyal   block: Initialize...
534

320ae51fe   Jens Axboe   blk-mq: new multi...
535
  	init_waitqueue_head(&q->mq_freeze_wq);
7996a8b55   Bob Liu   blk-mq: fix hang ...
536
  	mutex_init(&q->mq_freeze_lock);
320ae51fe   Jens Axboe   blk-mq: new multi...
537

3ef28e83a   Dan Williams   block: generic re...
538
539
540
541
542
543
544
  	/*
  	 * Init percpu_ref in atomic mode so that it's faster to shutdown.
  	 * See blk_register_queue() for details.
  	 */
  	if (percpu_ref_init(&q->q_usage_counter,
  				blk_queue_usage_counter_release,
  				PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
fff4996b7   Mikulas Patocka   blk-core: Fix mem...
545
  		goto fail_bdi;
f51b802c1   Tejun Heo   blkcg: use the us...
546

3ef28e83a   Dan Williams   block: generic re...
547
548
  	if (blkcg_init_queue(q))
  		goto fail_ref;
3d745ea5b   Christoph Hellwig   block: simplify q...
549
550
  	blk_queue_dma_alignment(q, 511);
  	blk_set_default_limits(&q->limits);
c62b37d96   Christoph Hellwig   block: move ->mak...
551
  	q->nr_requests = BLKDEV_MAX_RQ;
3d745ea5b   Christoph Hellwig   block: simplify q...
552

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
553
  	return q;
a73f730d0   Tejun Heo   block, cfq: move ...
554

3ef28e83a   Dan Williams   block: generic re...
555
556
  fail_ref:
  	percpu_ref_exit(&q->q_usage_counter);
fff4996b7   Mikulas Patocka   blk-core: Fix mem...
557
  fail_bdi:
a83b576c9   Jens Axboe   block: fix stacke...
558
559
  	blk_free_queue_stats(q->stats);
  fail_stats:
d03f6cdc1   Jan Kara   block: Dynamicall...
560
  	bdi_put(q->backing_dev_info);
54efd50bf   Kent Overstreet   block: make gener...
561
  fail_split:
338aa96d5   Kent Overstreet   block: convert bo...
562
  	bioset_exit(&q->bio_split);
a73f730d0   Tejun Heo   block, cfq: move ...
563
564
565
566
567
  fail_id:
  	ida_simple_remove(&blk_queue_ida, q->id);
  fail_q:
  	kmem_cache_free(blk_requestq_cachep, q);
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
568
  }
3d745ea5b   Christoph Hellwig   block: simplify q...
569
  EXPORT_SYMBOL(blk_alloc_queue);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
570

b5bd357cf   Luis Chamberlain   block: add docs f...
571
572
573
574
575
  /**
   * blk_get_queue - increment the request_queue refcount
   * @q: the request_queue structure to increment the refcount for
   *
   * Increment the refcount of the request_queue kobject.
763b58923   Luis Chamberlain   block: clarify co...
576
577
   *
   * Context: Any context.
b5bd357cf   Luis Chamberlain   block: add docs f...
578
   */
09ac46c42   Tejun Heo   block: misc updat...
579
  bool blk_get_queue(struct request_queue *q)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
580
  {
3f3299d5c   Bart Van Assche   block: Rename que...
581
  	if (likely(!blk_queue_dying(q))) {
09ac46c42   Tejun Heo   block: misc updat...
582
583
  		__blk_get_queue(q);
  		return true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
584
  	}
09ac46c42   Tejun Heo   block: misc updat...
585
  	return false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
586
  }
d86e0e83b   Jens Axboe   block: export blk...
587
  EXPORT_SYMBOL(blk_get_queue);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
588

a1ce35fa4   Jens Axboe   block: remove dea...
589
590
591
592
593
  /**
   * blk_get_request - allocate a request
   * @q: request queue to allocate a request for
   * @op: operation (REQ_OP_*) and REQ_* flags, e.g. REQ_SYNC.
   * @flags: BLK_MQ_REQ_* flags, e.g. BLK_MQ_REQ_NOWAIT.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
594
   */
a1ce35fa4   Jens Axboe   block: remove dea...
595
596
  struct request *blk_get_request(struct request_queue *q, unsigned int op,
  				blk_mq_req_flags_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
597
  {
a1ce35fa4   Jens Axboe   block: remove dea...
598
  	struct request *req;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
599

a1ce35fa4   Jens Axboe   block: remove dea...
600
  	WARN_ON_ONCE(op & REQ_NOWAIT);
782c9ef2a   Bart Van Assche   scsi: block: Remo...
601
  	WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PM));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
602

a1ce35fa4   Jens Axboe   block: remove dea...
603
604
605
  	req = blk_mq_alloc_request(q, op, flags);
  	if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
  		q->mq_ops->initialize_rq_fn(req);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
606

a1ce35fa4   Jens Axboe   block: remove dea...
607
  	return req;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
608
  }
a1ce35fa4   Jens Axboe   block: remove dea...
609
  EXPORT_SYMBOL(blk_get_request);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
610

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
611
612
  void blk_put_request(struct request *req)
  {
a1ce35fa4   Jens Axboe   block: remove dea...
613
  	blk_mq_free_request(req);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
614
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
615
  EXPORT_SYMBOL(blk_put_request);
52c5e62d4   Christoph Hellwig   block: bio_check_...
616
  static void handle_bad_sector(struct bio *bio, sector_t maxsector)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
617
618
  {
  	char b[BDEVNAME_SIZE];
f4ac712e4   Tetsuo Handa   block: ratelimit ...
619
620
621
622
623
624
  	pr_info_ratelimited("attempt to access beyond end of device
  "
  			    "%s: rw=%d, want=%llu, limit=%llu
  ",
  			    bio_devname(bio, b), bio->bi_opf,
  			    bio_end_sector(bio), maxsector);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
625
  }
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
626
627
628
629
630
631
632
633
634
  #ifdef CONFIG_FAIL_MAKE_REQUEST
  
  static DECLARE_FAULT_ATTR(fail_make_request);
  
  static int __init setup_fail_make_request(char *str)
  {
  	return setup_fault_attr(&fail_make_request, str);
  }
  __setup("fail_make_request=", setup_fail_make_request);
b2c9cd379   Akinobu Mita   fail_make_request...
635
  static bool should_fail_request(struct hd_struct *part, unsigned int bytes)
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
636
  {
b2c9cd379   Akinobu Mita   fail_make_request...
637
  	return part->make_it_fail && should_fail(&fail_make_request, bytes);
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
638
639
640
641
  }
  
  static int __init fail_make_request_debugfs(void)
  {
dd48c085c   Akinobu Mita   fault-injection: ...
642
643
  	struct dentry *dir = fault_create_debugfs_attr("fail_make_request",
  						NULL, &fail_make_request);
21f9fcd81   Duan Jiong   block: replace IS...
644
  	return PTR_ERR_OR_ZERO(dir);
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
645
646
647
648
649
  }
  
  late_initcall(fail_make_request_debugfs);
  
  #else /* CONFIG_FAIL_MAKE_REQUEST */
b2c9cd379   Akinobu Mita   fail_make_request...
650
651
  static inline bool should_fail_request(struct hd_struct *part,
  					unsigned int bytes)
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
652
  {
b2c9cd379   Akinobu Mita   fail_make_request...
653
  	return false;
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
654
655
656
  }
  
  #endif /* CONFIG_FAIL_MAKE_REQUEST */
721c7fc70   Ilya Dryomov   block: fail op_is...
657
658
  static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
  {
b089cfd95   Jens Axboe   block: don't warn...
659
  	const int op = bio_op(bio);
8b2ded1c9   Mikulas Patocka   block: don't warn...
660
  	if (part->policy && op_is_write(op)) {
721c7fc70   Ilya Dryomov   block: fail op_is...
661
  		char b[BDEVNAME_SIZE];
8b2ded1c9   Mikulas Patocka   block: don't warn...
662
663
  		if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
  			return false;
a32e236eb   Linus Torvalds   Partially revert ...
664
  		WARN_ONCE(1,
c81786746   Christoph Hellwig   block: tidy up a ...
665
666
  		       "Trying to write to read-only block-device %s (partno %d)
  ",
721c7fc70   Ilya Dryomov   block: fail op_is...
667
  			bio_devname(bio, b), part->partno);
a32e236eb   Linus Torvalds   Partially revert ...
668
669
  		/* Older lvm-tools actually trigger this */
  		return false;
721c7fc70   Ilya Dryomov   block: fail op_is...
670
671
672
673
  	}
  
  	return false;
  }
30abb3a67   Howard McLauchlan   block: Add should...
674
675
676
677
678
679
680
  static noinline int should_fail_bio(struct bio *bio)
  {
  	if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
  		return -EIO;
  	return 0;
  }
  ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO);
c07e2b412   Jens Axboe   block: factor our...
681
  /*
52c5e62d4   Christoph Hellwig   block: bio_check_...
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
   * Check whether this bio extends beyond the end of the device or partition.
   * This may well happen - the kernel calls bread() without checking the size of
   * the device, e.g., when mounting a file system.
   */
  static inline int bio_check_eod(struct bio *bio, sector_t maxsector)
  {
  	unsigned int nr_sectors = bio_sectors(bio);
  
  	if (nr_sectors && maxsector &&
  	    (nr_sectors > maxsector ||
  	     bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
  		handle_bad_sector(bio, maxsector);
  		return -EIO;
  	}
  	return 0;
  }
  
  /*
74d46992e   Christoph Hellwig   block: replace bi...
700
701
702
703
704
   * Remap block n of partition p to block n+start(p) of the disk.
   */
  static inline int blk_partition_remap(struct bio *bio)
  {
  	struct hd_struct *p;
52c5e62d4   Christoph Hellwig   block: bio_check_...
705
  	int ret = -EIO;
74d46992e   Christoph Hellwig   block: replace bi...
706

721c7fc70   Ilya Dryomov   block: fail op_is...
707
708
  	rcu_read_lock();
  	p = __disk_get_part(bio->bi_disk, bio->bi_partno);
52c5e62d4   Christoph Hellwig   block: bio_check_...
709
710
711
712
713
  	if (unlikely(!p))
  		goto out;
  	if (unlikely(should_fail_request(p, bio->bi_iter.bi_size)))
  		goto out;
  	if (unlikely(bio_check_ro(bio, p)))
721c7fc70   Ilya Dryomov   block: fail op_is...
714
  		goto out;
721c7fc70   Ilya Dryomov   block: fail op_is...
715

5eac3eb30   Damien Le Moal   block: Remove par...
716
  	if (bio_sectors(bio)) {
52c5e62d4   Christoph Hellwig   block: bio_check_...
717
718
719
  		if (bio_check_eod(bio, part_nr_sects_read(p)))
  			goto out;
  		bio->bi_iter.bi_sector += p->start_sect;
52c5e62d4   Christoph Hellwig   block: bio_check_...
720
721
722
  		trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
  				      bio->bi_iter.bi_sector - p->start_sect);
  	}
c04fa44b7   Hannes Reinecke   block: always set...
723
  	bio->bi_partno = 0;
52c5e62d4   Christoph Hellwig   block: bio_check_...
724
  	ret = 0;
721c7fc70   Ilya Dryomov   block: fail op_is...
725
726
  out:
  	rcu_read_unlock();
74d46992e   Christoph Hellwig   block: replace bi...
727
728
  	return ret;
  }
0512a75b9   Keith Busch   block: Introduce ...
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
  /*
   * Check write append to a zoned block device.
   */
  static inline blk_status_t blk_check_zone_append(struct request_queue *q,
  						 struct bio *bio)
  {
  	sector_t pos = bio->bi_iter.bi_sector;
  	int nr_sectors = bio_sectors(bio);
  
  	/* Only applicable to zoned block devices */
  	if (!blk_queue_is_zoned(q))
  		return BLK_STS_NOTSUPP;
  
  	/* The bio sector must point to the start of a sequential zone */
  	if (pos & (blk_queue_zone_sectors(q) - 1) ||
  	    !blk_queue_zone_is_seq(q, pos))
  		return BLK_STS_IOERR;
  
  	/*
  	 * Not allowed to cross zone boundaries. Otherwise, the BIO will be
  	 * split and could result in non-contiguous sectors being written in
  	 * different zones.
  	 */
  	if (nr_sectors > q->limits.chunk_sectors)
  		return BLK_STS_IOERR;
  
  	/* Make sure the BIO is small enough and will not get split */
  	if (nr_sectors > q->limits.max_zone_append_sectors)
  		return BLK_STS_IOERR;
  
  	bio->bi_opf |= REQ_NOMERGE;
  
  	return BLK_STS_OK;
  }
ed00aabd5   Christoph Hellwig   block: rename gen...
763
  static noinline_for_stack bool submit_bio_checks(struct bio *bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
764
  {
833f84e2b   Christoph Hellwig   block: remove the...
765
  	struct request_queue *q = bio->bi_disk->queue;
4e4cbee93   Christoph Hellwig   block: switch bio...
766
  	blk_status_t status = BLK_STS_IOERR;
5a473e831   Jens Axboe   block: provide pl...
767
  	struct blk_plug *plug;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
768
769
  
  	might_sleep();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
770

5a473e831   Jens Axboe   block: provide pl...
771
772
773
  	plug = blk_mq_plug(q, bio);
  	if (plug && plug->nowait)
  		bio->bi_opf |= REQ_NOWAIT;
03a07c92a   Goldwyn Rodrigues   block: return on ...
774
  	/*
b0beb2809   Jens Axboe   Revert "block: en...
775
  	 * For a REQ_NOWAIT based request, return -EOPNOTSUPP
021a24460   Mike Snitzer   block: add QUEUE_...
776
  	 * if queue does not support NOWAIT.
03a07c92a   Goldwyn Rodrigues   block: return on ...
777
  	 */
021a24460   Mike Snitzer   block: add QUEUE_...
778
  	if ((bio->bi_opf & REQ_NOWAIT) && !blk_queue_nowait(q))
b0beb2809   Jens Axboe   Revert "block: en...
779
  		goto not_supported;
03a07c92a   Goldwyn Rodrigues   block: return on ...
780

30abb3a67   Howard McLauchlan   block: Add should...
781
  	if (should_fail_bio(bio))
5a7bbad27   Christoph Hellwig   block: remove sup...
782
  		goto end_io;
2056a782f   Jens Axboe   [PATCH] Block que...
783

52c5e62d4   Christoph Hellwig   block: bio_check_...
784
785
  	if (bio->bi_partno) {
  		if (unlikely(blk_partition_remap(bio)))
721c7fc70   Ilya Dryomov   block: fail op_is...
786
787
  			goto end_io;
  	} else {
52c5e62d4   Christoph Hellwig   block: bio_check_...
788
789
790
  		if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0)))
  			goto end_io;
  		if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk))))
721c7fc70   Ilya Dryomov   block: fail op_is...
791
792
  			goto end_io;
  	}
2056a782f   Jens Axboe   [PATCH] Block que...
793

5a7bbad27   Christoph Hellwig   block: remove sup...
794
  	/*
ed00aabd5   Christoph Hellwig   block: rename gen...
795
796
  	 * Filter flush bio's early so that bio based drivers without flush
  	 * support don't have to worry about them.
5a7bbad27   Christoph Hellwig   block: remove sup...
797
  	 */
f3a8ab7d5   Jens Axboe   block: cleanup re...
798
  	if (op_is_flush(bio->bi_opf) &&
c888a8f95   Jens Axboe   block: kill off q...
799
  	    !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
1eff9d322   Jens Axboe   block: rename bio...
800
  		bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
e439ab710   Christoph Hellwig   block: remove the...
801
  		if (!bio_sectors(bio)) {
4e4cbee93   Christoph Hellwig   block: switch bio...
802
  			status = BLK_STS_OK;
51fd77bd9   Jens Axboe   [BLOCK] Don't all...
803
804
  			goto end_io;
  		}
5a7bbad27   Christoph Hellwig   block: remove sup...
805
  	}
5ddfe9691   NeilBrown   [PATCH] md: check...
806

d04c406f2   Christoph Hellwig   block: clear REQ_...
807
808
  	if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
  		bio->bi_opf &= ~REQ_HIPRI;
288dab8a3   Christoph Hellwig   block: add a sepa...
809
810
811
812
813
814
815
816
817
818
  	switch (bio_op(bio)) {
  	case REQ_OP_DISCARD:
  		if (!blk_queue_discard(q))
  			goto not_supported;
  		break;
  	case REQ_OP_SECURE_ERASE:
  		if (!blk_queue_secure_erase(q))
  			goto not_supported;
  		break;
  	case REQ_OP_WRITE_SAME:
74d46992e   Christoph Hellwig   block: replace bi...
819
  		if (!q->limits.max_write_same_sectors)
288dab8a3   Christoph Hellwig   block: add a sepa...
820
  			goto not_supported;
58886785d   Nicolai Stange   block: fix uninte...
821
  		break;
0512a75b9   Keith Busch   block: Introduce ...
822
823
824
825
826
  	case REQ_OP_ZONE_APPEND:
  		status = blk_check_zone_append(q, bio);
  		if (status != BLK_STS_OK)
  			goto end_io;
  		break;
2d253440b   Shaun Tancheff   block: Define zon...
827
  	case REQ_OP_ZONE_RESET:
6c1b1da58   Ajay Joshi   block: add zone o...
828
829
830
  	case REQ_OP_ZONE_OPEN:
  	case REQ_OP_ZONE_CLOSE:
  	case REQ_OP_ZONE_FINISH:
74d46992e   Christoph Hellwig   block: replace bi...
831
  		if (!blk_queue_is_zoned(q))
2d253440b   Shaun Tancheff   block: Define zon...
832
  			goto not_supported;
288dab8a3   Christoph Hellwig   block: add a sepa...
833
  		break;
6e33dbf28   Chaitanya Kulkarni   blk-zoned: implem...
834
835
836
837
  	case REQ_OP_ZONE_RESET_ALL:
  		if (!blk_queue_is_zoned(q) || !blk_queue_zone_resetall(q))
  			goto not_supported;
  		break;
a6f0788ec   Chaitanya Kulkarni   block: add suppor...
838
  	case REQ_OP_WRITE_ZEROES:
74d46992e   Christoph Hellwig   block: replace bi...
839
  		if (!q->limits.max_write_zeroes_sectors)
a6f0788ec   Chaitanya Kulkarni   block: add suppor...
840
841
  			goto not_supported;
  		break;
288dab8a3   Christoph Hellwig   block: add a sepa...
842
843
  	default:
  		break;
5a7bbad27   Christoph Hellwig   block: remove sup...
844
  	}
01edede41   Minchan Kim   block: trace bio ...
845

7f4b35d15   Tejun Heo   block: allocate i...
846
  	/*
3e82c3485   Christoph Hellwig   block: remove cre...
847
848
849
850
  	 * Various block parts want %current->io_context, so allocate it up
  	 * front rather than dealing with lots of pain to allocate it only
  	 * where needed. This may fail and the block layer knows how to live
  	 * with it.
7f4b35d15   Tejun Heo   block: allocate i...
851
  	 */
3e82c3485   Christoph Hellwig   block: remove cre...
852
853
  	if (unlikely(!current->io_context))
  		create_task_io_context(current, GFP_ATOMIC, q->node);
7f4b35d15   Tejun Heo   block: allocate i...
854

db18a53e5   Christoph Hellwig   blk-cgroup: remov...
855
856
  	if (blk_throtl_bio(bio)) {
  		blkcg_bio_issue_init(bio);
ae1188963   Tejun Heo   blkcg: consolidat...
857
  		return false;
db18a53e5   Christoph Hellwig   blk-cgroup: remov...
858
859
860
861
  	}
  
  	blk_cgroup_bio_start(bio);
  	blkcg_bio_issue_init(bio);
27a84d54c   Christoph Hellwig   block: refactor g...
862

fbbaf700e   NeilBrown   block: trace comp...
863
864
865
866
867
868
869
  	if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
  		trace_block_bio_queue(q, bio);
  		/* Now that enqueuing has been traced, we need to trace
  		 * completion as well.
  		 */
  		bio_set_flag(bio, BIO_TRACE_COMPLETION);
  	}
27a84d54c   Christoph Hellwig   block: refactor g...
870
  	return true;
a7384677b   Tejun Heo   block: remove dup...
871

288dab8a3   Christoph Hellwig   block: add a sepa...
872
  not_supported:
4e4cbee93   Christoph Hellwig   block: switch bio...
873
  	status = BLK_STS_NOTSUPP;
a7384677b   Tejun Heo   block: remove dup...
874
  end_io:
4e4cbee93   Christoph Hellwig   block: switch bio...
875
  	bio->bi_status = status;
4246a0b63   Christoph Hellwig   block: add a bi_e...
876
  	bio_endio(bio);
27a84d54c   Christoph Hellwig   block: refactor g...
877
  	return false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
878
  }
ed00aabd5   Christoph Hellwig   block: rename gen...
879
  static blk_qc_t __submit_bio(struct bio *bio)
ac7c5675f   Christoph Hellwig   blk-mq: allow blk...
880
  {
c62b37d96   Christoph Hellwig   block: move ->mak...
881
  	struct gendisk *disk = bio->bi_disk;
ac7c5675f   Christoph Hellwig   blk-mq: allow blk...
882
883
884
  	blk_qc_t ret = BLK_QC_T_NONE;
  
  	if (blk_crypto_bio_prep(&bio)) {
c62b37d96   Christoph Hellwig   block: move ->mak...
885
886
887
  		if (!disk->fops->submit_bio)
  			return blk_mq_submit_bio(bio);
  		ret = disk->fops->submit_bio(bio);
ac7c5675f   Christoph Hellwig   blk-mq: allow blk...
888
  	}
c62b37d96   Christoph Hellwig   block: move ->mak...
889
  	blk_queue_exit(disk->queue);
ac7c5675f   Christoph Hellwig   blk-mq: allow blk...
890
891
  	return ret;
  }
566acf2da   Christoph Hellwig   block: refator su...
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
  /*
   * The loop in this function may be a bit non-obvious, and so deserves some
   * explanation:
   *
   *  - Before entering the loop, bio->bi_next is NULL (as all callers ensure
   *    that), so we have a list with a single bio.
   *  - We pretend that we have just taken it off a longer list, so we assign
   *    bio_list to a pointer to the bio_list_on_stack, thus initialising the
   *    bio_list of new bios to be added.  ->submit_bio() may indeed add some more
   *    bios through a recursive call to submit_bio_noacct.  If it did, we find a
   *    non-NULL value in bio_list and re-enter the loop from the top.
   *  - In this case we really did just take the bio of the top of the list (no
   *    pretending) and so remove it from bio_list, and call into ->submit_bio()
   *    again.
   *
   * bio_list_on_stack[0] contains bios submitted by the current ->submit_bio.
   * bio_list_on_stack[1] contains bios that were submitted before the current
   *	->submit_bio_bio, but that haven't been processed yet.
   */
  static blk_qc_t __submit_bio_noacct(struct bio *bio)
  {
  	struct bio_list bio_list_on_stack[2];
  	blk_qc_t ret = BLK_QC_T_NONE;
  
  	BUG_ON(bio->bi_next);
  
  	bio_list_init(&bio_list_on_stack[0]);
  	current->bio_list = bio_list_on_stack;
  
  	do {
  		struct request_queue *q = bio->bi_disk->queue;
  		struct bio_list lower, same;
  
  		if (unlikely(bio_queue_enter(bio) != 0))
  			continue;
  
  		/*
  		 * Create a fresh bio_list for all subordinate requests.
  		 */
  		bio_list_on_stack[1] = bio_list_on_stack[0];
  		bio_list_init(&bio_list_on_stack[0]);
  
  		ret = __submit_bio(bio);
  
  		/*
  		 * Sort new bios into those for a lower level and those for the
  		 * same level.
  		 */
  		bio_list_init(&lower);
  		bio_list_init(&same);
  		while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
  			if (q == bio->bi_disk->queue)
  				bio_list_add(&same, bio);
  			else
  				bio_list_add(&lower, bio);
  
  		/*
  		 * Now assemble so we handle the lowest level first.
  		 */
  		bio_list_merge(&bio_list_on_stack[0], &lower);
  		bio_list_merge(&bio_list_on_stack[0], &same);
  		bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
  	} while ((bio = bio_list_pop(&bio_list_on_stack[0])));
  
  	current->bio_list = NULL;
  	return ret;
  }
ff93ea0ce   Christoph Hellwig   block: shortcut _...
959
960
  static blk_qc_t __submit_bio_noacct_mq(struct bio *bio)
  {
7c792f33c   Christoph Hellwig   block: initialize...
961
  	struct bio_list bio_list[2] = { };
ff93ea0ce   Christoph Hellwig   block: shortcut _...
962
  	blk_qc_t ret = BLK_QC_T_NONE;
7c792f33c   Christoph Hellwig   block: initialize...
963
  	current->bio_list = bio_list;
ff93ea0ce   Christoph Hellwig   block: shortcut _...
964
965
  
  	do {
0e6e255e7   Christoph Hellwig   block: remove a b...
966
  		struct gendisk *disk = bio->bi_disk;
ff93ea0ce   Christoph Hellwig   block: shortcut _...
967
968
969
970
971
972
973
974
975
976
977
  
  		if (unlikely(bio_queue_enter(bio) != 0))
  			continue;
  
  		if (!blk_crypto_bio_prep(&bio)) {
  			blk_queue_exit(disk->queue);
  			ret = BLK_QC_T_NONE;
  			continue;
  		}
  
  		ret = blk_mq_submit_bio(bio);
7c792f33c   Christoph Hellwig   block: initialize...
978
  	} while ((bio = bio_list_pop(&bio_list[0])));
ff93ea0ce   Christoph Hellwig   block: shortcut _...
979
980
981
982
  
  	current->bio_list = NULL;
  	return ret;
  }
27a84d54c   Christoph Hellwig   block: refactor g...
983
  /**
ed00aabd5   Christoph Hellwig   block: rename gen...
984
   * submit_bio_noacct - re-submit a bio to the block device layer for I/O
27a84d54c   Christoph Hellwig   block: refactor g...
985
986
   * @bio:  The bio describing the location in memory and on the device.
   *
3fdd40861   Christoph Hellwig   block: improve th...
987
988
989
990
   * This is a version of submit_bio() that shall only be used for I/O that is
   * resubmitted to lower level drivers by stacking block drivers.  All file
   * systems and other upper level users of the block layer should use
   * submit_bio() instead.
d89d87965   Neil Brown   When stacked bloc...
991
   */
ed00aabd5   Christoph Hellwig   block: rename gen...
992
  blk_qc_t submit_bio_noacct(struct bio *bio)
d89d87965   Neil Brown   When stacked bloc...
993
  {
ed00aabd5   Christoph Hellwig   block: rename gen...
994
  	if (!submit_bio_checks(bio))
566acf2da   Christoph Hellwig   block: refator su...
995
  		return BLK_QC_T_NONE;
27a84d54c   Christoph Hellwig   block: refactor g...
996
997
  
  	/*
566acf2da   Christoph Hellwig   block: refator su...
998
999
1000
1001
  	 * We only want one ->submit_bio to be active at a time, else stack
  	 * usage with stacked devices could be a problem.  Use current->bio_list
  	 * to collect a list of requests submited by a ->submit_bio method while
  	 * it is active, and then process them after it returned.
27a84d54c   Christoph Hellwig   block: refactor g...
1002
  	 */
bddd87c7e   Akinobu Mita   blk-core: use BIO...
1003
  	if (current->bio_list) {
f5fe1b519   NeilBrown   blk: Ensure users...
1004
  		bio_list_add(&current->bio_list[0], bio);
566acf2da   Christoph Hellwig   block: refator su...
1005
  		return BLK_QC_T_NONE;
d89d87965   Neil Brown   When stacked bloc...
1006
  	}
27a84d54c   Christoph Hellwig   block: refactor g...
1007

ff93ea0ce   Christoph Hellwig   block: shortcut _...
1008
1009
  	if (!bio->bi_disk->fops->submit_bio)
  		return __submit_bio_noacct_mq(bio);
566acf2da   Christoph Hellwig   block: refator su...
1010
  	return __submit_bio_noacct(bio);
d89d87965   Neil Brown   When stacked bloc...
1011
  }
ed00aabd5   Christoph Hellwig   block: rename gen...
1012
  EXPORT_SYMBOL(submit_bio_noacct);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1013
1014
  
  /**
710027a48   Randy Dunlap   Add some block/ s...
1015
   * submit_bio - submit a bio to the block device layer for I/O
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1016
1017
   * @bio: The &struct bio which describes the I/O
   *
3fdd40861   Christoph Hellwig   block: improve th...
1018
1019
1020
   * submit_bio() is used to submit I/O requests to block devices.  It is passed a
   * fully set up &struct bio that describes the I/O that needs to be done.  The
   * bio will be send to the device described by the bi_disk and bi_partno fields.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1021
   *
3fdd40861   Christoph Hellwig   block: improve th...
1022
1023
1024
1025
   * The success/failure status of the request, along with notification of
   * completion, is delivered asynchronously through the ->bi_end_io() callback
   * in @bio.  The bio must NOT be touched by thecaller until ->bi_end_io() has
   * been called.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1026
   */
4e49ea4a3   Mike Christie   block/fs/drivers:...
1027
  blk_qc_t submit_bio(struct bio *bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1028
  {
d3f77dfdc   Tejun Heo   blkcg: implement ...
1029
1030
  	if (blkcg_punt_bio_submit(bio))
  		return BLK_QC_T_NONE;
bf2de6f5a   Jens Axboe   block: Initial su...
1031
1032
1033
1034
  	/*
  	 * If it's a regular read/write or a barrier with data attached,
  	 * go through the normal accounting stuff before submission.
  	 */
e2a60da74   Martin K. Petersen   block: Clean up s...
1035
  	if (bio_has_data(bio)) {
4363ac7c1   Martin K. Petersen   block: Implement ...
1036
  		unsigned int count;
95fe6c1a2   Mike Christie   block, fs, mm, dr...
1037
  		if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
7c5a0dcf5   Jiufei Xue   block: fix the co...
1038
  			count = queue_logical_block_size(bio->bi_disk->queue) >> 9;
4363ac7c1   Martin K. Petersen   block: Implement ...
1039
1040
  		else
  			count = bio_sectors(bio);
a8ebb056a   Mike Christie   block, drivers, c...
1041
  		if (op_is_write(bio_op(bio))) {
bf2de6f5a   Jens Axboe   block: Initial su...
1042
1043
  			count_vm_events(PGPGOUT, count);
  		} else {
4f024f379   Kent Overstreet   block: Abstract o...
1044
  			task_io_account_read(bio->bi_iter.bi_size);
bf2de6f5a   Jens Axboe   block: Initial su...
1045
1046
1047
1048
1049
  			count_vm_events(PGPGIN, count);
  		}
  
  		if (unlikely(block_dump)) {
  			char b[BDEVNAME_SIZE];
8dcbdc742   San Mehat   block: block_dump...
1050
1051
  			printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)
  ",
ba25f9dcc   Pavel Emelyanov   Use helpers to ob...
1052
  			current->comm, task_pid_nr(current),
a8ebb056a   Mike Christie   block, drivers, c...
1053
  				op_is_write(bio_op(bio)) ? "WRITE" : "READ",
4f024f379   Kent Overstreet   block: Abstract o...
1054
  				(unsigned long long)bio->bi_iter.bi_sector,
74d46992e   Christoph Hellwig   block: replace bi...
1055
  				bio_devname(bio, b), count);
bf2de6f5a   Jens Axboe   block: Initial su...
1056
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1057
  	}
b8e24a930   Johannes Weiner   block: annotate r...
1058
  	/*
760f83ea6   Christoph Hellwig   block: cleanup th...
1059
1060
1061
1062
  	 * If we're reading data that is part of the userspace workingset, count
  	 * submission time as memory stall.  When the device is congested, or
  	 * the submitting cgroup IO-throttled, submission can be a significant
  	 * part of overall IO time.
b8e24a930   Johannes Weiner   block: annotate r...
1063
  	 */
760f83ea6   Christoph Hellwig   block: cleanup th...
1064
1065
1066
1067
  	if (unlikely(bio_op(bio) == REQ_OP_READ &&
  	    bio_flagged(bio, BIO_WORKINGSET))) {
  		unsigned long pflags;
  		blk_qc_t ret;
b8e24a930   Johannes Weiner   block: annotate r...
1068

760f83ea6   Christoph Hellwig   block: cleanup th...
1069
  		psi_memstall_enter(&pflags);
ed00aabd5   Christoph Hellwig   block: rename gen...
1070
  		ret = submit_bio_noacct(bio);
b8e24a930   Johannes Weiner   block: annotate r...
1071
  		psi_memstall_leave(&pflags);
760f83ea6   Christoph Hellwig   block: cleanup th...
1072
1073
  		return ret;
  	}
ed00aabd5   Christoph Hellwig   block: rename gen...
1074
  	return submit_bio_noacct(bio);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1075
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1076
  EXPORT_SYMBOL(submit_bio);
1052b8ac5   Jens Axboe   blk-mq: when poll...
1077
  /**
bf4e6b4e7   Hannes Reinecke   block: Always che...
1078
   * blk_cloned_rq_check_limits - Helper function to check a cloned request
0d7203182   Guoqing Jiang   block: fix commen...
1079
   *                              for the new queue limits
82124d603   Kiyoshi Ueda   block: add reques...
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
   * @q:  the queue
   * @rq: the request being checked
   *
   * Description:
   *    @rq may have been made based on weaker limitations of upper-level queues
   *    in request stacking drivers, and it may violate the limitation of @q.
   *    Since the block layer and the underlying device driver trust @rq
   *    after it is inserted to @q, it should be checked against @q before
   *    the insertion using this generic function.
   *
82124d603   Kiyoshi Ueda   block: add reques...
1090
   *    Request stacking drivers like request-based dm may change the queue
bf4e6b4e7   Hannes Reinecke   block: Always che...
1091
1092
   *    limits when retrying requests on other queues. Those requests need
   *    to be checked against the new queue limits again during dispatch.
82124d603   Kiyoshi Ueda   block: add reques...
1093
   */
143d2600f   Ritika Srivastava   block: Return blk...
1094
  static blk_status_t blk_cloned_rq_check_limits(struct request_queue *q,
bf4e6b4e7   Hannes Reinecke   block: Always che...
1095
  				      struct request *rq)
82124d603   Kiyoshi Ueda   block: add reques...
1096
  {
8327cce5f   Ritika Srivastava   block: better dea...
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
  	unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq));
  
  	if (blk_rq_sectors(rq) > max_sectors) {
  		/*
  		 * SCSI device does not have a good way to return if
  		 * Write Same/Zero is actually supported. If a device rejects
  		 * a non-read/write command (discard, write same,etc.) the
  		 * low-level device driver will set the relevant queue limit to
  		 * 0 to prevent blk-lib from issuing more of the offending
  		 * operations. Commands queued prior to the queue limit being
  		 * reset need to be completed with BLK_STS_NOTSUPP to avoid I/O
  		 * errors being propagated to upper layers.
  		 */
  		if (max_sectors == 0)
  			return BLK_STS_NOTSUPP;
61939b12d   John Pittman   block: print offe...
1112
1113
  		printk(KERN_ERR "%s: over max size limit. (%u > %u)
  ",
8327cce5f   Ritika Srivastava   block: better dea...
1114
  			__func__, blk_rq_sectors(rq), max_sectors);
143d2600f   Ritika Srivastava   block: Return blk...
1115
  		return BLK_STS_IOERR;
82124d603   Kiyoshi Ueda   block: add reques...
1116
1117
1118
1119
1120
1121
1122
1123
  	}
  
  	/*
  	 * queue's settings related to segment counting like q->bounce_pfn
  	 * may differ from that of other stacking queues.
  	 * Recalculate it to check the request correctly on this queue's
  	 * limitation.
  	 */
e9cd19c0c   Christoph Hellwig   block: simplify b...
1124
  	rq->nr_phys_segments = blk_recalc_rq_segments(rq);
8a78362c4   Martin K. Petersen   block: Consolidat...
1125
  	if (rq->nr_phys_segments > queue_max_segments(q)) {
61939b12d   John Pittman   block: print offe...
1126
1127
1128
  		printk(KERN_ERR "%s: over max segments limit. (%hu > %hu)
  ",
  			__func__, rq->nr_phys_segments, queue_max_segments(q));
143d2600f   Ritika Srivastava   block: Return blk...
1129
  		return BLK_STS_IOERR;
82124d603   Kiyoshi Ueda   block: add reques...
1130
  	}
143d2600f   Ritika Srivastava   block: Return blk...
1131
  	return BLK_STS_OK;
82124d603   Kiyoshi Ueda   block: add reques...
1132
  }
82124d603   Kiyoshi Ueda   block: add reques...
1133
1134
1135
1136
1137
1138
  
  /**
   * blk_insert_cloned_request - Helper for stacking drivers to submit a request
   * @q:  the queue to submit the request
   * @rq: the request being queued
   */
2a842acab   Christoph Hellwig   block: introduce ...
1139
  blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
82124d603   Kiyoshi Ueda   block: add reques...
1140
  {
8327cce5f   Ritika Srivastava   block: better dea...
1141
1142
1143
1144
1145
  	blk_status_t ret;
  
  	ret = blk_cloned_rq_check_limits(q, rq);
  	if (ret != BLK_STS_OK)
  		return ret;
82124d603   Kiyoshi Ueda   block: add reques...
1146

b2c9cd379   Akinobu Mita   fail_make_request...
1147
1148
  	if (rq->rq_disk &&
  	    should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
2a842acab   Christoph Hellwig   block: introduce ...
1149
  		return BLK_STS_IOERR;
82124d603   Kiyoshi Ueda   block: add reques...
1150

a892c8d52   Satya Tangirala   block: Inline enc...
1151
1152
  	if (blk_crypto_insert_cloned_request(rq))
  		return BLK_STS_IOERR;
a1ce35fa4   Jens Axboe   block: remove dea...
1153
  	if (blk_queue_io_stat(q))
b5af37ab3   Konstantin Khlebnikov   block: add a blk_...
1154
  		blk_account_io_start(rq);
82124d603   Kiyoshi Ueda   block: add reques...
1155
1156
  
  	/*
a1ce35fa4   Jens Axboe   block: remove dea...
1157
1158
1159
  	 * Since we have a scheduler attached on the top device,
  	 * bypass a potential scheduler on the bottom device for
  	 * insert.
82124d603   Kiyoshi Ueda   block: add reques...
1160
  	 */
fd9c40f64   Bart Van Assche   block: Revert v5....
1161
  	return blk_mq_request_issue_directly(rq, true);
82124d603   Kiyoshi Ueda   block: add reques...
1162
1163
  }
  EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
80a761fd3   Tejun Heo   block: implement ...
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
  /**
   * blk_rq_err_bytes - determine number of bytes till the next failure boundary
   * @rq: request to examine
   *
   * Description:
   *     A request could be merge of IOs which require different failure
   *     handling.  This function determines the number of bytes which
   *     can be failed from the beginning of the request without
   *     crossing into area which need to be retried further.
   *
   * Return:
   *     The number of bytes to fail.
80a761fd3   Tejun Heo   block: implement ...
1176
1177
1178
1179
1180
1181
   */
  unsigned int blk_rq_err_bytes(const struct request *rq)
  {
  	unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
  	unsigned int bytes = 0;
  	struct bio *bio;
e80640213   Christoph Hellwig   block: split out ...
1182
  	if (!(rq->rq_flags & RQF_MIXED_MERGE))
80a761fd3   Tejun Heo   block: implement ...
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
  		return blk_rq_bytes(rq);
  
  	/*
  	 * Currently the only 'mixing' which can happen is between
  	 * different fastfail types.  We can safely fail portions
  	 * which have all the failfast bits that the first one has -
  	 * the ones which are at least as eager to fail as the first
  	 * one.
  	 */
  	for (bio = rq->bio; bio; bio = bio->bi_next) {
1eff9d322   Jens Axboe   block: rename bio...
1193
  		if ((bio->bi_opf & ff) != ff)
80a761fd3   Tejun Heo   block: implement ...
1194
  			break;
4f024f379   Kent Overstreet   block: Abstract o...
1195
  		bytes += bio->bi_iter.bi_size;
80a761fd3   Tejun Heo   block: implement ...
1196
1197
1198
1199
1200
1201
1202
  	}
  
  	/* this could lead to infinite loop */
  	BUG_ON(blk_rq_bytes(rq) && !bytes);
  	return bytes;
  }
  EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
9123bf6f2   Christoph Hellwig   block: move updat...
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
  static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
  {
  	unsigned long stamp;
  again:
  	stamp = READ_ONCE(part->stamp);
  	if (unlikely(stamp != now)) {
  		if (likely(cmpxchg(&part->stamp, stamp, now) == stamp))
  			__part_stat_add(part, io_ticks, end ? now - stamp : 1);
  	}
  	if (part->partno) {
  		part = &part_to_disk(part)->part0;
  		goto again;
  	}
  }
f1394b798   Christoph Hellwig   block: mark blk_a...
1217
  static void blk_account_io_completion(struct request *req, unsigned int bytes)
bc58ba946   Jens Axboe   block: add sysfs ...
1218
  {
ecb6186cf   Logan Gunthorpe   block: fix NULL p...
1219
  	if (req->part && blk_do_io_stat(req)) {
ddcf35d39   Michael Callahan   block: Add and us...
1220
  		const int sgrp = op_stat_group(req_op(req));
bc58ba946   Jens Axboe   block: add sysfs ...
1221
  		struct hd_struct *part;
bc58ba946   Jens Axboe   block: add sysfs ...
1222

112f158f6   Mike Snitzer   block: stop passi...
1223
  		part_stat_lock();
09e099d4b   Jerome Marchand   block: fix accoun...
1224
  		part = req->part;
112f158f6   Mike Snitzer   block: stop passi...
1225
  		part_stat_add(part, sectors[sgrp], bytes >> 9);
bc58ba946   Jens Axboe   block: add sysfs ...
1226
1227
1228
  		part_stat_unlock();
  	}
  }
522a77756   Omar Sandoval   block: consolidat...
1229
  void blk_account_io_done(struct request *req, u64 now)
bc58ba946   Jens Axboe   block: add sysfs ...
1230
  {
bc58ba946   Jens Axboe   block: add sysfs ...
1231
  	/*
dd4c133f3   Tejun Heo   block: rename bar...
1232
1233
1234
  	 * Account IO completion.  flush_rq isn't accounted as a
  	 * normal IO on queueing nor completion.  Accounting the
  	 * containing request is enough.
bc58ba946   Jens Axboe   block: add sysfs ...
1235
  	 */
ecb6186cf   Logan Gunthorpe   block: fix NULL p...
1236
1237
  	if (req->part && blk_do_io_stat(req) &&
  	    !(req->rq_flags & RQF_FLUSH_SEQ)) {
ddcf35d39   Michael Callahan   block: Add and us...
1238
  		const int sgrp = op_stat_group(req_op(req));
bc58ba946   Jens Axboe   block: add sysfs ...
1239
  		struct hd_struct *part;
bc58ba946   Jens Axboe   block: add sysfs ...
1240

112f158f6   Mike Snitzer   block: stop passi...
1241
  		part_stat_lock();
09e099d4b   Jerome Marchand   block: fix accoun...
1242
  		part = req->part;
bc58ba946   Jens Axboe   block: add sysfs ...
1243

2b8bd4236   Konstantin Khlebnikov   block/diskstats: ...
1244
  		update_io_ticks(part, jiffies, true);
112f158f6   Mike Snitzer   block: stop passi...
1245
1246
  		part_stat_inc(part, ios[sgrp]);
  		part_stat_add(part, nsecs[sgrp], now - req->start_time_ns);
524f9ffd6   Christoph Hellwig   block: reduce par...
1247
  		part_stat_unlock();
bc58ba946   Jens Axboe   block: add sysfs ...
1248

6c23a9681   Jens Axboe   block: add intern...
1249
  		hd_struct_put(part);
bc58ba946   Jens Axboe   block: add sysfs ...
1250
1251
  	}
  }
b5af37ab3   Konstantin Khlebnikov   block: add a blk_...
1252
  void blk_account_io_start(struct request *rq)
320ae51fe   Jens Axboe   blk-mq: new multi...
1253
  {
320ae51fe   Jens Axboe   blk-mq: new multi...
1254
1255
  	if (!blk_do_io_stat(rq))
  		return;
b5af37ab3   Konstantin Khlebnikov   block: add a blk_...
1256
  	rq->part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
524f9ffd6   Christoph Hellwig   block: reduce par...
1257

112f158f6   Mike Snitzer   block: stop passi...
1258
  	part_stat_lock();
76268f3ac   Christoph Hellwig   block: don't call...
1259
  	update_io_ticks(rq->part, jiffies, false);
320ae51fe   Jens Axboe   blk-mq: new multi...
1260
1261
  	part_stat_unlock();
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
1262

7b26410b0   Song Liu   block: introduce ...
1263
1264
  static unsigned long __part_start_io_acct(struct hd_struct *part,
  					  unsigned int sectors, unsigned int op)
956d510ee   Christoph Hellwig   block: add disk/b...
1265
  {
956d510ee   Christoph Hellwig   block: add disk/b...
1266
1267
1268
1269
1270
1271
1272
1273
1274
  	const int sgrp = op_stat_group(op);
  	unsigned long now = READ_ONCE(jiffies);
  
  	part_stat_lock();
  	update_io_ticks(part, now, false);
  	part_stat_inc(part, ios[sgrp]);
  	part_stat_add(part, sectors[sgrp], sectors);
  	part_stat_local_inc(part, in_flight[op_is_write(op)]);
  	part_stat_unlock();
320ae51fe   Jens Axboe   blk-mq: new multi...
1275

956d510ee   Christoph Hellwig   block: add disk/b...
1276
1277
  	return now;
  }
7b26410b0   Song Liu   block: introduce ...
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
  
  unsigned long part_start_io_acct(struct gendisk *disk, struct hd_struct **part,
  				 struct bio *bio)
  {
  	*part = disk_map_sector_rcu(disk, bio->bi_iter.bi_sector);
  
  	return __part_start_io_acct(*part, bio_sectors(bio), bio_op(bio));
  }
  EXPORT_SYMBOL_GPL(part_start_io_acct);
  
  unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
  				 unsigned int op)
  {
  	return __part_start_io_acct(&disk->part0, sectors, op);
  }
956d510ee   Christoph Hellwig   block: add disk/b...
1293
  EXPORT_SYMBOL(disk_start_io_acct);
7b26410b0   Song Liu   block: introduce ...
1294
1295
  static void __part_end_io_acct(struct hd_struct *part, unsigned int op,
  			       unsigned long start_time)
956d510ee   Christoph Hellwig   block: add disk/b...
1296
  {
956d510ee   Christoph Hellwig   block: add disk/b...
1297
1298
1299
  	const int sgrp = op_stat_group(op);
  	unsigned long now = READ_ONCE(jiffies);
  	unsigned long duration = now - start_time;
5b18b5a73   Mikulas Patocka   block: delete par...
1300

956d510ee   Christoph Hellwig   block: add disk/b...
1301
1302
1303
1304
  	part_stat_lock();
  	update_io_ticks(part, now, true);
  	part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
  	part_stat_local_dec(part, in_flight[op_is_write(op)]);
320ae51fe   Jens Axboe   blk-mq: new multi...
1305
1306
  	part_stat_unlock();
  }
7b26410b0   Song Liu   block: introduce ...
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
  
  void part_end_io_acct(struct hd_struct *part, struct bio *bio,
  		      unsigned long start_time)
  {
  	__part_end_io_acct(part, bio_op(bio), start_time);
  	hd_struct_put(part);
  }
  EXPORT_SYMBOL_GPL(part_end_io_acct);
  
  void disk_end_io_acct(struct gendisk *disk, unsigned int op,
  		      unsigned long start_time)
  {
  	__part_end_io_acct(&disk->part0, op, start_time);
  }
956d510ee   Christoph Hellwig   block: add disk/b...
1321
  EXPORT_SYMBOL(disk_end_io_acct);
320ae51fe   Jens Axboe   blk-mq: new multi...
1322

ef71de8b1   Christoph Hellwig   block: add a blk_...
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
  /*
   * Steal bios from a request and add them to a bio list.
   * The request must not have been partially completed before.
   */
  void blk_steal_bios(struct bio_list *list, struct request *rq)
  {
  	if (rq->bio) {
  		if (list->tail)
  			list->tail->bi_next = rq->bio;
  		else
  			list->head = rq->bio;
  		list->tail = rq->biotail;
  
  		rq->bio = NULL;
  		rq->biotail = NULL;
  	}
  
  	rq->__data_len = 0;
  }
  EXPORT_SYMBOL_GPL(blk_steal_bios);
9934c8c04   Tejun Heo   block: implement ...
1343
  /**
2e60e0229   Tejun Heo   block: clean up r...
1344
   * blk_update_request - Special helper function for request stacking drivers
8ebf97560   Randy Dunlap   block: fix kernel...
1345
   * @req:      the request being processed
2a842acab   Christoph Hellwig   block: introduce ...
1346
   * @error:    block status code
8ebf97560   Randy Dunlap   block: fix kernel...
1347
   * @nr_bytes: number of bytes to complete @req
3bcddeac1   Kiyoshi Ueda   blk_end_request: ...
1348
1349
   *
   * Description:
8ebf97560   Randy Dunlap   block: fix kernel...
1350
1351
1352
   *     Ends I/O on a number of bytes attached to @req, but doesn't complete
   *     the request structure even if @req doesn't have leftover.
   *     If @req has leftover, sets it up for the next range of segments.
2e60e0229   Tejun Heo   block: clean up r...
1353
1354
1355
   *
   *     This special helper function is only for request stacking drivers
   *     (e.g. request-based dm) so that they can handle partial completion.
3a211b715   Pavel Begunkov   blk-core: Remove ...
1356
   *     Actual device drivers should use blk_mq_end_request instead.
2e60e0229   Tejun Heo   block: clean up r...
1357
1358
1359
   *
   *     Passing the result of blk_rq_bytes() as @nr_bytes guarantees
   *     %false return from this function.
3bcddeac1   Kiyoshi Ueda   blk_end_request: ...
1360
   *
1954e9a99   Bart Van Assche   block: Document h...
1361
1362
1363
1364
   * Note:
   *	The RQF_SPECIAL_PAYLOAD flag is ignored on purpose in both
   *	blk_rq_bytes() and in blk_update_request().
   *
3bcddeac1   Kiyoshi Ueda   blk_end_request: ...
1365
   * Return:
2e60e0229   Tejun Heo   block: clean up r...
1366
1367
   *     %false - this request doesn't have any more data
   *     %true  - this request has more data
3bcddeac1   Kiyoshi Ueda   blk_end_request: ...
1368
   **/
2a842acab   Christoph Hellwig   block: introduce ...
1369
1370
  bool blk_update_request(struct request *req, blk_status_t error,
  		unsigned int nr_bytes)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1371
  {
f79ea4161   Kent Overstreet   block: Refactor b...
1372
  	int total_bytes;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1373

2a842acab   Christoph Hellwig   block: introduce ...
1374
  	trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes);
4a0efdc93   Hannes Reinecke   block: misplaced ...
1375

2e60e0229   Tejun Heo   block: clean up r...
1376
1377
  	if (!req->bio)
  		return false;
54d4e6ab9   Max Gurtovoy   block: centralize...
1378
1379
1380
1381
1382
  #ifdef CONFIG_BLK_DEV_INTEGRITY
  	if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
  	    error == BLK_STS_OK)
  		req->q->integrity.profile->complete_fn(req, nr_bytes);
  #endif
2a842acab   Christoph Hellwig   block: introduce ...
1383
1384
  	if (unlikely(error && !blk_rq_is_passthrough(req) &&
  		     !(req->rq_flags & RQF_QUIET)))
178cc590e   Christoph Hellwig   block: improve pr...
1385
  		print_req_error(req, error, __func__);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1386

bc58ba946   Jens Axboe   block: add sysfs ...
1387
  	blk_account_io_completion(req, nr_bytes);
d72d904a5   Jens Axboe   [BLOCK] Update re...
1388

f79ea4161   Kent Overstreet   block: Refactor b...
1389
1390
1391
  	total_bytes = 0;
  	while (req->bio) {
  		struct bio *bio = req->bio;
4f024f379   Kent Overstreet   block: Abstract o...
1392
  		unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1393

9c24c10a2   Bart Van Assche   Revert "block: Ad...
1394
  		if (bio_bytes == bio->bi_iter.bi_size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1395
  			req->bio = bio->bi_next;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1396

fbbaf700e   NeilBrown   block: trace comp...
1397
1398
  		/* Completion has already been traced */
  		bio_clear_flag(bio, BIO_TRACE_COMPLETION);
f79ea4161   Kent Overstreet   block: Refactor b...
1399
  		req_bio_endio(req, bio, bio_bytes, error);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1400

f79ea4161   Kent Overstreet   block: Refactor b...
1401
1402
  		total_bytes += bio_bytes;
  		nr_bytes -= bio_bytes;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1403

f79ea4161   Kent Overstreet   block: Refactor b...
1404
1405
  		if (!nr_bytes)
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1406
1407
1408
1409
1410
  	}
  
  	/*
  	 * completely done
  	 */
2e60e0229   Tejun Heo   block: clean up r...
1411
1412
1413
1414
1415
1416
  	if (!req->bio) {
  		/*
  		 * Reset counters so that the request stacking driver
  		 * can find how many bytes remain in the request
  		 * later.
  		 */
a2dec7b36   Tejun Heo   block: hide reque...
1417
  		req->__data_len = 0;
2e60e0229   Tejun Heo   block: clean up r...
1418
1419
  		return false;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1420

a2dec7b36   Tejun Heo   block: hide reque...
1421
  	req->__data_len -= total_bytes;
2e46e8b27   Tejun Heo   block: drop reque...
1422
1423
  
  	/* update sector only for requests with clear definition of sector */
57292b58d   Christoph Hellwig   block: introduce ...
1424
  	if (!blk_rq_is_passthrough(req))
a2dec7b36   Tejun Heo   block: hide reque...
1425
  		req->__sector += total_bytes >> 9;
2e46e8b27   Tejun Heo   block: drop reque...
1426

80a761fd3   Tejun Heo   block: implement ...
1427
  	/* mixed attributes always follow the first bio */
e80640213   Christoph Hellwig   block: split out ...
1428
  	if (req->rq_flags & RQF_MIXED_MERGE) {
80a761fd3   Tejun Heo   block: implement ...
1429
  		req->cmd_flags &= ~REQ_FAILFAST_MASK;
1eff9d322   Jens Axboe   block: rename bio...
1430
  		req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK;
80a761fd3   Tejun Heo   block: implement ...
1431
  	}
ed6565e73   Christoph Hellwig   block: handle par...
1432
1433
1434
1435
1436
1437
1438
1439
1440
  	if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) {
  		/*
  		 * If total number of sectors is less than the first segment
  		 * size, something has gone terribly wrong.
  		 */
  		if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
  			blk_dump_rq_flags(req, "request botched");
  			req->__data_len = blk_rq_cur_bytes(req);
  		}
2e46e8b27   Tejun Heo   block: drop reque...
1441

ed6565e73   Christoph Hellwig   block: handle par...
1442
  		/* recalculate the number of segments */
e9cd19c0c   Christoph Hellwig   block: simplify b...
1443
  		req->nr_phys_segments = blk_recalc_rq_segments(req);
ed6565e73   Christoph Hellwig   block: handle par...
1444
  	}
2e46e8b27   Tejun Heo   block: drop reque...
1445

2e60e0229   Tejun Heo   block: clean up r...
1446
  	return true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1447
  }
2e60e0229   Tejun Heo   block: clean up r...
1448
  EXPORT_SYMBOL_GPL(blk_update_request);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1449

2d4dc890b   Ilya Loginov   block: add helper...
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
  #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
  /**
   * rq_flush_dcache_pages - Helper function to flush all pages in a request
   * @rq: the request to be flushed
   *
   * Description:
   *     Flush all pages in @rq.
   */
  void rq_flush_dcache_pages(struct request *rq)
  {
  	struct req_iterator iter;
7988613b0   Kent Overstreet   block: Convert bi...
1461
  	struct bio_vec bvec;
2d4dc890b   Ilya Loginov   block: add helper...
1462
1463
  
  	rq_for_each_segment(bvec, rq, iter)
7988613b0   Kent Overstreet   block: Convert bi...
1464
  		flush_dcache_page(bvec.bv_page);
2d4dc890b   Ilya Loginov   block: add helper...
1465
1466
1467
  }
  EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
  #endif
ef9e3facd   Kiyoshi Ueda   block: add lld bu...
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
  /**
   * blk_lld_busy - Check if underlying low-level drivers of a device are busy
   * @q : the queue of the device being checked
   *
   * Description:
   *    Check if underlying low-level drivers of a device are busy.
   *    If the drivers want to export their busy state, they must set own
   *    exporting function using blk_queue_lld_busy() first.
   *
   *    Basically, this function is used only by request stacking drivers
   *    to stop dispatching requests to underlying devices when underlying
   *    devices are busy.  This behavior helps more I/O merging on the queue
   *    of the request stacking driver and prevents I/O throughput regression
   *    on burst I/O load.
   *
   * Return:
   *    0 - Not busy (The request stacking driver should dispatch request)
   *    1 - Busy (The request stacking driver should stop dispatching request)
   */
  int blk_lld_busy(struct request_queue *q)
  {
344e9ffcb   Jens Axboe   block: add queue_...
1489
  	if (queue_is_mq(q) && q->mq_ops->busy)
9ba20527f   Jens Axboe   blk-mq: provide m...
1490
  		return q->mq_ops->busy(q);
ef9e3facd   Kiyoshi Ueda   block: add lld bu...
1491
1492
1493
1494
  
  	return 0;
  }
  EXPORT_SYMBOL_GPL(blk_lld_busy);
78d8e58a0   Mike Snitzer   Revert "block, dm...
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
  /**
   * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
   * @rq: the clone request to be cleaned up
   *
   * Description:
   *     Free all bios in @rq for a cloned request.
   */
  void blk_rq_unprep_clone(struct request *rq)
  {
  	struct bio *bio;
  
  	while ((bio = rq->bio) != NULL) {
  		rq->bio = bio->bi_next;
  
  		bio_put(bio);
  	}
  }
  EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
78d8e58a0   Mike Snitzer   Revert "block, dm...
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
  /**
   * blk_rq_prep_clone - Helper function to setup clone request
   * @rq: the request to be setup
   * @rq_src: original request to be cloned
   * @bs: bio_set that bios for clone are allocated from
   * @gfp_mask: memory allocation mask for bio
   * @bio_ctr: setup function to be called for each clone bio.
   *           Returns %0 for success, non %0 for failure.
   * @data: private data to be passed to @bio_ctr
   *
   * Description:
   *     Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
78d8e58a0   Mike Snitzer   Revert "block, dm...
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
   *     Also, pages which the original bios are pointing to are not copied
   *     and the cloned bios just point same pages.
   *     So cloned bios must be completed before original bios, which means
   *     the caller must complete @rq before @rq_src.
   */
  int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
  		      struct bio_set *bs, gfp_t gfp_mask,
  		      int (*bio_ctr)(struct bio *, struct bio *, void *),
  		      void *data)
  {
  	struct bio *bio, *bio_src;
  
  	if (!bs)
f4f8154a0   Kent Overstreet   block: Use bioset...
1538
  		bs = &fs_bio_set;
78d8e58a0   Mike Snitzer   Revert "block, dm...
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
  
  	__rq_for_each_bio(bio_src, rq_src) {
  		bio = bio_clone_fast(bio_src, gfp_mask, bs);
  		if (!bio)
  			goto free_and_out;
  
  		if (bio_ctr && bio_ctr(bio, bio_src, data))
  			goto free_and_out;
  
  		if (rq->bio) {
  			rq->biotail->bi_next = bio;
  			rq->biotail = bio;
93f221ae0   Eric Biggers   block: make blk_c...
1551
  		} else {
78d8e58a0   Mike Snitzer   Revert "block, dm...
1552
  			rq->bio = rq->biotail = bio;
93f221ae0   Eric Biggers   block: make blk_c...
1553
1554
  		}
  		bio = NULL;
78d8e58a0   Mike Snitzer   Revert "block, dm...
1555
  	}
361301a22   Guoqing Jiang   block: cleanup fo...
1556
1557
1558
1559
1560
1561
1562
1563
1564
  	/* Copy attributes of the original request to the clone request. */
  	rq->__sector = blk_rq_pos(rq_src);
  	rq->__data_len = blk_rq_bytes(rq_src);
  	if (rq_src->rq_flags & RQF_SPECIAL_PAYLOAD) {
  		rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
  		rq->special_vec = rq_src->special_vec;
  	}
  	rq->nr_phys_segments = rq_src->nr_phys_segments;
  	rq->ioprio = rq_src->ioprio;
78d8e58a0   Mike Snitzer   Revert "block, dm...
1565

93f221ae0   Eric Biggers   block: make blk_c...
1566
1567
  	if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0)
  		goto free_and_out;
78d8e58a0   Mike Snitzer   Revert "block, dm...
1568
1569
1570
1571
1572
1573
1574
1575
1576
  
  	return 0;
  
  free_and_out:
  	if (bio)
  		bio_put(bio);
  	blk_rq_unprep_clone(rq);
  
  	return -ENOMEM;
b0fd271d5   Kiyoshi Ueda   block: add reques...
1577
1578
  }
  EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
59c3d45e4   Jens Axboe   block: remove 'q'...
1579
  int kblockd_schedule_work(struct work_struct *work)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1580
1581
1582
  {
  	return queue_work(kblockd_workqueue, work);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1583
  EXPORT_SYMBOL(kblockd_schedule_work);
818cd1cba   Jens Axboe   block: add kblock...
1584
1585
1586
1587
1588
1589
  int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork,
  				unsigned long delay)
  {
  	return mod_delayed_work_on(cpu, kblockd_workqueue, dwork, delay);
  }
  EXPORT_SYMBOL(kblockd_mod_delayed_work_on);
75df71362   Suresh Jayaraman   block: document b...
1590
1591
1592
1593
1594
  /**
   * blk_start_plug - initialize blk_plug and track it inside the task_struct
   * @plug:	The &struct blk_plug that needs to be initialized
   *
   * Description:
40405851a   Jeff Moyer   block: clarify do...
1595
1596
1597
1598
1599
1600
1601
1602
1603
   *   blk_start_plug() indicates to the block layer an intent by the caller
   *   to submit multiple I/O requests in a batch.  The block layer may use
   *   this hint to defer submitting I/Os from the caller until blk_finish_plug()
   *   is called.  However, the block layer may choose to submit requests
   *   before a call to blk_finish_plug() if the number of queued I/Os
   *   exceeds %BLK_MAX_REQUEST_COUNT, or if the size of the I/O is larger than
   *   %BLK_PLUG_FLUSH_SIZE.  The queued I/Os may also be submitted early if
   *   the task schedules (see below).
   *
75df71362   Suresh Jayaraman   block: document b...
1604
1605
1606
1607
1608
1609
1610
1611
1612
   *   Tracking blk_plug inside the task_struct will help with auto-flushing the
   *   pending I/O should the task end up blocking between blk_start_plug() and
   *   blk_finish_plug(). This is important from a performance perspective, but
   *   also ensures that we don't deadlock. For instance, if the task is blocking
   *   for a memory allocation, memory reclaim could end up wanting to free a
   *   page belonging to that request that is currently residing in our private
   *   plug. By flushing the pending I/O when the process goes to sleep, we avoid
   *   this kind of deadlock.
   */
73c101011   Jens Axboe   block: initial pa...
1613
1614
1615
  void blk_start_plug(struct blk_plug *plug)
  {
  	struct task_struct *tsk = current;
dd6cf3e18   Shaohua Li   blk: clean up plug
1616
1617
1618
1619
1620
  	/*
  	 * If this is a nested plug, don't actually assign it.
  	 */
  	if (tsk->plug)
  		return;
320ae51fe   Jens Axboe   blk-mq: new multi...
1621
  	INIT_LIST_HEAD(&plug->mq_list);
048c9374a   NeilBrown   block: Enhance ne...
1622
  	INIT_LIST_HEAD(&plug->cb_list);
5f0ed774e   Jens Axboe   block: sum reques...
1623
  	plug->rq_count = 0;
ce5b009cf   Jens Axboe   block: improve lo...
1624
  	plug->multiple_queues = false;
5a473e831   Jens Axboe   block: provide pl...
1625
  	plug->nowait = false;
5f0ed774e   Jens Axboe   block: sum reques...
1626

73c101011   Jens Axboe   block: initial pa...
1627
  	/*
dd6cf3e18   Shaohua Li   blk: clean up plug
1628
1629
  	 * Store ordering should not be needed here, since a potential
  	 * preempt will imply a full memory barrier
73c101011   Jens Axboe   block: initial pa...
1630
  	 */
dd6cf3e18   Shaohua Li   blk: clean up plug
1631
  	tsk->plug = plug;
73c101011   Jens Axboe   block: initial pa...
1632
1633
  }
  EXPORT_SYMBOL(blk_start_plug);
74018dc30   NeilBrown   blk: pass from_sc...
1634
  static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
048c9374a   NeilBrown   block: Enhance ne...
1635
1636
  {
  	LIST_HEAD(callbacks);
2a7d5559b   Shaohua Li   block: stack unplug
1637
1638
  	while (!list_empty(&plug->cb_list)) {
  		list_splice_init(&plug->cb_list, &callbacks);
048c9374a   NeilBrown   block: Enhance ne...
1639

2a7d5559b   Shaohua Li   block: stack unplug
1640
1641
  		while (!list_empty(&callbacks)) {
  			struct blk_plug_cb *cb = list_first_entry(&callbacks,
048c9374a   NeilBrown   block: Enhance ne...
1642
1643
  							  struct blk_plug_cb,
  							  list);
2a7d5559b   Shaohua Li   block: stack unplug
1644
  			list_del(&cb->list);
74018dc30   NeilBrown   blk: pass from_sc...
1645
  			cb->callback(cb, from_schedule);
2a7d5559b   Shaohua Li   block: stack unplug
1646
  		}
048c9374a   NeilBrown   block: Enhance ne...
1647
1648
  	}
  }
9cbb17508   NeilBrown   blk: centralize n...
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
  struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data,
  				      int size)
  {
  	struct blk_plug *plug = current->plug;
  	struct blk_plug_cb *cb;
  
  	if (!plug)
  		return NULL;
  
  	list_for_each_entry(cb, &plug->cb_list, list)
  		if (cb->callback == unplug && cb->data == data)
  			return cb;
  
  	/* Not currently on the callback list */
  	BUG_ON(size < sizeof(*cb));
  	cb = kzalloc(size, GFP_ATOMIC);
  	if (cb) {
  		cb->data = data;
  		cb->callback = unplug;
  		list_add(&cb->list, &plug->cb_list);
  	}
  	return cb;
  }
  EXPORT_SYMBOL(blk_check_plugged);
49cac01e1   Jens Axboe   block: make unplu...
1673
  void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
73c101011   Jens Axboe   block: initial pa...
1674
  {
74018dc30   NeilBrown   blk: pass from_sc...
1675
  	flush_plug_callbacks(plug, from_schedule);
320ae51fe   Jens Axboe   blk-mq: new multi...
1676
1677
1678
  
  	if (!list_empty(&plug->mq_list))
  		blk_mq_flush_plug_list(plug, from_schedule);
73c101011   Jens Axboe   block: initial pa...
1679
  }
73c101011   Jens Axboe   block: initial pa...
1680

40405851a   Jeff Moyer   block: clarify do...
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
  /**
   * blk_finish_plug - mark the end of a batch of submitted I/O
   * @plug:	The &struct blk_plug passed to blk_start_plug()
   *
   * Description:
   * Indicate that a batch of I/O submissions is complete.  This function
   * must be paired with an initial call to blk_start_plug().  The intent
   * is to allow the block layer to optimize I/O submission.  See the
   * documentation for blk_start_plug() for more information.
   */
73c101011   Jens Axboe   block: initial pa...
1691
1692
  void blk_finish_plug(struct blk_plug *plug)
  {
dd6cf3e18   Shaohua Li   blk: clean up plug
1693
1694
  	if (plug != current->plug)
  		return;
f6603783f   Jens Axboe   block: only force...
1695
  	blk_flush_plug_list(plug, false);
73c101011   Jens Axboe   block: initial pa...
1696

dd6cf3e18   Shaohua Li   blk: clean up plug
1697
  	current->plug = NULL;
73c101011   Jens Axboe   block: initial pa...
1698
  }
88b996cd0   Christoph Hellwig   block: cleanup th...
1699
  EXPORT_SYMBOL(blk_finish_plug);
73c101011   Jens Axboe   block: initial pa...
1700

71ac860af   Ming Lei   block: move blk_i...
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
  void blk_io_schedule(void)
  {
  	/* Prevent hang_check timer from firing at us during very long I/O */
  	unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2;
  
  	if (timeout)
  		io_schedule_timeout(timeout);
  	else
  		io_schedule();
  }
  EXPORT_SYMBOL_GPL(blk_io_schedule);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1712
1713
  int __init blk_dev_init(void)
  {
ef295ecf0   Christoph Hellwig   block: better op ...
1714
1715
  	BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
  	BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
c593642c8   Pankaj Bharadiya   treewide: Use siz...
1716
  			sizeof_field(struct request, cmd_flags));
ef295ecf0   Christoph Hellwig   block: better op ...
1717
  	BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
c593642c8   Pankaj Bharadiya   treewide: Use siz...
1718
  			sizeof_field(struct bio, bi_opf));
9eb55b030   Nikanth Karthikesan   block: catch tryi...
1719

89b90be2d   Tejun Heo   block: make kbloc...
1720
1721
  	/* used for unplugging and affects IO latency/throughput - HIGHPRI */
  	kblockd_workqueue = alloc_workqueue("kblockd",
28747fcd2   Matias Bjørling   block: remove WQ_...
1722
  					    WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1723
1724
1725
  	if (!kblockd_workqueue)
  		panic("Failed to create kblockd
  ");
c2789bd40   Ilya Dryomov   block: rename req...
1726
  	blk_requestq_cachep = kmem_cache_create("request_queue",
165125e1e   Jens Axboe   [BLOCK] Get rid o...
1727
  			sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1728

18fbda91c   Omar Sandoval   block: use same b...
1729
  	blk_debugfs_root = debugfs_create_dir("block", NULL);
18fbda91c   Omar Sandoval   block: use same b...
1730

d38ecf935   Jens Axboe   io context sharin...
1731
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1732
  }