Blame view

block/blk-core.c 48.7 KB
3dcf60bcb   Christoph Hellwig   block: add SPDX t...
1
  // SPDX-License-Identifier: GPL-2.0
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
3
4
5
6
   * Copyright (C) 1991, 1992 Linus Torvalds
   * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
   * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
   * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
6728cb0e6   Jens Axboe   block: make core ...
7
8
   * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>
   *	-  July2000
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
9
10
11
12
13
14
   * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001
   */
  
  /*
   * This handles all read/write requests to block devices
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
15
16
17
18
19
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/backing-dev.h>
  #include <linux/bio.h>
  #include <linux/blkdev.h>
320ae51fe   Jens Axboe   blk-mq: new multi...
20
  #include <linux/blk-mq.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
21
22
23
24
25
  #include <linux/highmem.h>
  #include <linux/mm.h>
  #include <linux/kernel_stat.h>
  #include <linux/string.h>
  #include <linux/init.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
26
27
28
29
  #include <linux/completion.h>
  #include <linux/slab.h>
  #include <linux/swap.h>
  #include <linux/writeback.h>
faccbd4b2   Andrew Morton   [PATCH] io-accoun...
30
  #include <linux/task_io_accounting_ops.h>
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
31
  #include <linux/fault-inject.h>
73c101011   Jens Axboe   block: initial pa...
32
  #include <linux/list_sort.h>
e3c78ca52   Tejun Heo   block: reorganize...
33
  #include <linux/delay.h>
aaf7c6806   Tejun Heo   block: fix elvpri...
34
  #include <linux/ratelimit.h>
6c9546675   Lin Ming   block: add runtim...
35
  #include <linux/pm_runtime.h>
eea8f41cc   Tejun Heo   blkcg: move block...
36
  #include <linux/blk-cgroup.h>
54d4e6ab9   Max Gurtovoy   block: centralize...
37
  #include <linux/t10-pi.h>
18fbda91c   Omar Sandoval   block: use same b...
38
  #include <linux/debugfs.h>
30abb3a67   Howard McLauchlan   block: Add should...
39
  #include <linux/bpf.h>
b8e24a930   Johannes Weiner   block: annotate r...
40
  #include <linux/psi.h>
55782138e   Li Zefan   tracing/events: c...
41
42
43
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/block.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
44

8324aa91d   Jens Axboe   block: split tag ...
45
  #include "blk.h"
43a5e4e21   Ming Lei   block: blk-mq: su...
46
  #include "blk-mq.h"
bd166ef18   Jens Axboe   blk-mq-sched: add...
47
  #include "blk-mq-sched.h"
bca6b067b   Bart Van Assche   block: Move power...
48
  #include "blk-pm.h"
c1c80384c   Josef Bacik   block: remove ext...
49
  #include "blk-rq-qos.h"
8324aa91d   Jens Axboe   block: split tag ...
50

18fbda91c   Omar Sandoval   block: use same b...
51
52
53
  #ifdef CONFIG_DEBUG_FS
  struct dentry *blk_debugfs_root;
  #endif
d07335e51   Mike Snitzer   block: Rename "bl...
54
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
b0da3f0da   Jun'ichi Nomura   Add a tracepoint ...
55
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
0a82a8d13   Linus Torvalds   Revert "block: ad...
56
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
3291fa57c   Keith Busch   NVMe: Add tracepo...
57
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_split);
cbae8d45d   NeilBrown   block: export blo...
58
  EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
0bfc24559   Ingo Molnar   blktrace: port to...
59

a73f730d0   Tejun Heo   block, cfq: move ...
60
  DEFINE_IDA(blk_queue_ida);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
61
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
62
63
   * For queue allocation
   */
6728cb0e6   Jens Axboe   block: make core ...
64
  struct kmem_cache *blk_requestq_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
65
66
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
67
68
   * Controlling structure to kblockd
   */
ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
69
  static struct workqueue_struct *kblockd_workqueue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
70

8814ce8a0   Bart Van Assche   block: Introduce ...
71
72
73
74
75
76
77
  /**
   * blk_queue_flag_set - atomically set a queue flag
   * @flag: flag to be set
   * @q: request queue
   */
  void blk_queue_flag_set(unsigned int flag, struct request_queue *q)
  {
57d74df90   Christoph Hellwig   block: use atomic...
78
  	set_bit(flag, &q->queue_flags);
8814ce8a0   Bart Van Assche   block: Introduce ...
79
80
81
82
83
84
85
86
87
88
  }
  EXPORT_SYMBOL(blk_queue_flag_set);
  
  /**
   * blk_queue_flag_clear - atomically clear a queue flag
   * @flag: flag to be cleared
   * @q: request queue
   */
  void blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
  {
57d74df90   Christoph Hellwig   block: use atomic...
89
  	clear_bit(flag, &q->queue_flags);
8814ce8a0   Bart Van Assche   block: Introduce ...
90
91
92
93
94
95
96
97
98
99
100
101
102
  }
  EXPORT_SYMBOL(blk_queue_flag_clear);
  
  /**
   * blk_queue_flag_test_and_set - atomically test and set a queue flag
   * @flag: flag to be set
   * @q: request queue
   *
   * Returns the previous value of @flag - 0 if the flag was not set and 1 if
   * the flag was already set.
   */
  bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q)
  {
57d74df90   Christoph Hellwig   block: use atomic...
103
  	return test_and_set_bit(flag, &q->queue_flags);
8814ce8a0   Bart Van Assche   block: Introduce ...
104
105
  }
  EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set);
2a4aa30c5   FUJITA Tomonori   block: rename and...
106
  void blk_rq_init(struct request_queue *q, struct request *rq)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
107
  {
1afb20f30   FUJITA Tomonori   block: make rq_in...
108
  	memset(rq, 0, sizeof(*rq));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
109
  	INIT_LIST_HEAD(&rq->queuelist);
63a713867   Jens Axboe   block: fixup rq_i...
110
  	rq->q = q;
a2dec7b36   Tejun Heo   block: hide reque...
111
  	rq->__sector = (sector_t) -1;
2e662b65f   Jens Axboe   [PATCH] elevator:...
112
113
  	INIT_HLIST_NODE(&rq->hash);
  	RB_CLEAR_NODE(&rq->rb_node);
63a713867   Jens Axboe   block: fixup rq_i...
114
  	rq->tag = -1;
bd166ef18   Jens Axboe   blk-mq-sched: add...
115
  	rq->internal_tag = -1;
522a77756   Omar Sandoval   block: consolidat...
116
  	rq->start_time_ns = ktime_get_ns();
09e099d4b   Jerome Marchand   block: fix accoun...
117
  	rq->part = NULL;
b554db147   Josef Bacik   block: init flush...
118
  	refcount_set(&rq->ref, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
119
  }
2a4aa30c5   FUJITA Tomonori   block: rename and...
120
  EXPORT_SYMBOL(blk_rq_init);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
121

e47bc4eda   Chaitanya Kulkarni   block: add centra...
122
123
124
125
126
127
128
129
  #define REQ_OP_NAME(name) [REQ_OP_##name] = #name
  static const char *const blk_op_name[] = {
  	REQ_OP_NAME(READ),
  	REQ_OP_NAME(WRITE),
  	REQ_OP_NAME(FLUSH),
  	REQ_OP_NAME(DISCARD),
  	REQ_OP_NAME(SECURE_ERASE),
  	REQ_OP_NAME(ZONE_RESET),
6e33dbf28   Chaitanya Kulkarni   blk-zoned: implem...
130
  	REQ_OP_NAME(ZONE_RESET_ALL),
e47bc4eda   Chaitanya Kulkarni   block: add centra...
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
  	REQ_OP_NAME(WRITE_SAME),
  	REQ_OP_NAME(WRITE_ZEROES),
  	REQ_OP_NAME(SCSI_IN),
  	REQ_OP_NAME(SCSI_OUT),
  	REQ_OP_NAME(DRV_IN),
  	REQ_OP_NAME(DRV_OUT),
  };
  #undef REQ_OP_NAME
  
  /**
   * blk_op_str - Return string XXX in the REQ_OP_XXX.
   * @op: REQ_OP_XXX.
   *
   * Description: Centralize block layer function to convert REQ_OP_XXX into
   * string format. Useful in the debugging and tracing bio or request. For
   * invalid REQ_OP_XXX it returns string "UNKNOWN".
   */
  inline const char *blk_op_str(unsigned int op)
  {
  	const char *op_str = "UNKNOWN";
  
  	if (op < ARRAY_SIZE(blk_op_name) && blk_op_name[op])
  		op_str = blk_op_name[op];
  
  	return op_str;
  }
  EXPORT_SYMBOL_GPL(blk_op_str);
2a842acab   Christoph Hellwig   block: introduce ...
158
159
160
161
162
163
164
165
166
167
168
169
170
171
  static const struct {
  	int		errno;
  	const char	*name;
  } blk_errors[] = {
  	[BLK_STS_OK]		= { 0,		"" },
  	[BLK_STS_NOTSUPP]	= { -EOPNOTSUPP, "operation not supported" },
  	[BLK_STS_TIMEOUT]	= { -ETIMEDOUT,	"timeout" },
  	[BLK_STS_NOSPC]		= { -ENOSPC,	"critical space allocation" },
  	[BLK_STS_TRANSPORT]	= { -ENOLINK,	"recoverable transport" },
  	[BLK_STS_TARGET]	= { -EREMOTEIO,	"critical target" },
  	[BLK_STS_NEXUS]		= { -EBADE,	"critical nexus" },
  	[BLK_STS_MEDIUM]	= { -ENODATA,	"critical medium" },
  	[BLK_STS_PROTECTION]	= { -EILSEQ,	"protection" },
  	[BLK_STS_RESOURCE]	= { -ENOMEM,	"kernel resource" },
86ff7c2a8   Ming Lei   blk-mq: introduce...
172
  	[BLK_STS_DEV_RESOURCE]	= { -EBUSY,	"device resource" },
03a07c92a   Goldwyn Rodrigues   block: return on ...
173
  	[BLK_STS_AGAIN]		= { -EAGAIN,	"nonblocking retry" },
2a842acab   Christoph Hellwig   block: introduce ...
174

4e4cbee93   Christoph Hellwig   block: switch bio...
175
176
  	/* device mapper special case, should not leak out: */
  	[BLK_STS_DM_REQUEUE]	= { -EREMCHG, "dm internal retry" },
2a842acab   Christoph Hellwig   block: introduce ...
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
  	/* everything else not covered above: */
  	[BLK_STS_IOERR]		= { -EIO,	"I/O" },
  };
  
  blk_status_t errno_to_blk_status(int errno)
  {
  	int i;
  
  	for (i = 0; i < ARRAY_SIZE(blk_errors); i++) {
  		if (blk_errors[i].errno == errno)
  			return (__force blk_status_t)i;
  	}
  
  	return BLK_STS_IOERR;
  }
  EXPORT_SYMBOL_GPL(errno_to_blk_status);
  
  int blk_status_to_errno(blk_status_t status)
  {
  	int idx = (__force int)status;
34bd9c1c4   Bart Van Assche   block: Fix off-by...
197
  	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
2a842acab   Christoph Hellwig   block: introduce ...
198
199
200
201
  		return -EIO;
  	return blk_errors[idx].errno;
  }
  EXPORT_SYMBOL_GPL(blk_status_to_errno);
178cc590e   Christoph Hellwig   block: improve pr...
202
203
  static void print_req_error(struct request *req, blk_status_t status,
  		const char *caller)
2a842acab   Christoph Hellwig   block: introduce ...
204
205
  {
  	int idx = (__force int)status;
34bd9c1c4   Bart Van Assche   block: Fix off-by...
206
  	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
2a842acab   Christoph Hellwig   block: introduce ...
207
  		return;
178cc590e   Christoph Hellwig   block: improve pr...
208
  	printk_ratelimited(KERN_ERR
b0e5168a7   Chaitanya Kulkarni   block: update pri...
209
210
211
  		"%s: %s error, dev %s, sector %llu op 0x%x:(%s) flags 0x%x "
  		"phys_seg %u prio class %u
  ",
178cc590e   Christoph Hellwig   block: improve pr...
212
  		caller, blk_errors[idx].name,
b0e5168a7   Chaitanya Kulkarni   block: update pri...
213
214
215
216
217
  		req->rq_disk ? req->rq_disk->disk_name : "?",
  		blk_rq_pos(req), req_op(req), blk_op_str(req_op(req)),
  		req->cmd_flags & ~REQ_OP_MASK,
  		req->nr_phys_segments,
  		IOPRIO_PRIO_CLASS(req->ioprio));
2a842acab   Christoph Hellwig   block: introduce ...
218
  }
5bb23a688   NeilBrown   Don't decrement b...
219
  static void req_bio_endio(struct request *rq, struct bio *bio,
2a842acab   Christoph Hellwig   block: introduce ...
220
  			  unsigned int nbytes, blk_status_t error)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
221
  {
78d8e58a0   Mike Snitzer   Revert "block, dm...
222
  	if (error)
4e4cbee93   Christoph Hellwig   block: switch bio...
223
  		bio->bi_status = error;
797e7dbbe   Tejun Heo   [BLOCK] reimpleme...
224

e80640213   Christoph Hellwig   block: split out ...
225
  	if (unlikely(rq->rq_flags & RQF_QUIET))
b7c44ed9d   Jens Axboe   block: manipulate...
226
  		bio_set_flag(bio, BIO_QUIET);
08bafc034   Keith Mannthey   block: Supress Bu...
227

f79ea4161   Kent Overstreet   block: Refactor b...
228
  	bio_advance(bio, nbytes);
7ba1ba12e   Martin K. Petersen   block: Block laye...
229

143a87f4c   Tejun Heo   block: improve fl...
230
  	/* don't actually finish bio if it's part of flush sequence */
e80640213   Christoph Hellwig   block: split out ...
231
  	if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
4246a0b63   Christoph Hellwig   block: add a bi_e...
232
  		bio_endio(bio);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
233
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
234

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
235
236
  void blk_dump_rq_flags(struct request *rq, char *msg)
  {
aebf526b5   Christoph Hellwig   block: fold cmd_t...
237
238
239
  	printk(KERN_INFO "%s: dev %s: flags=%llx
  ", msg,
  		rq->rq_disk ? rq->rq_disk->disk_name : "?",
5953316db   Jens Axboe   block: make rq->c...
240
  		(unsigned long long) rq->cmd_flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
241

83096ebf1   Tejun Heo   block: convert to...
242
243
244
245
  	printk(KERN_INFO "  sector %llu, nr/cnr %u/%u
  ",
  	       (unsigned long long)blk_rq_pos(rq),
  	       blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
b4f42e283   Jens Axboe   block: remove str...
246
247
248
  	printk(KERN_INFO "  bio %p, biotail %p, len %u
  ",
  	       rq->bio, rq->biotail, blk_rq_bytes(rq));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
249
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
250
  EXPORT_SYMBOL(blk_dump_rq_flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
251
252
253
254
255
256
257
258
259
  /**
   * blk_sync_queue - cancel any pending callbacks on a queue
   * @q: the queue
   *
   * Description:
   *     The block layer may perform asynchronous callback activity
   *     on a queue, such as calling the unplug function after a timeout.
   *     A block device may call blk_sync_queue to ensure that any
   *     such activity is cancelled, thus allowing it to release resources
59c51591a   Michael Opdenacker   Fix occurrences o...
260
   *     that the callbacks might use. The caller must already have made sure
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
261
262
263
   *     that its ->make_request_fn will not re-add plugging prior to calling
   *     this function.
   *
da5277700   Vivek Goyal   block: Move blk_t...
264
   *     This function does not cancel any asynchronous activity arising
da3dae54e   Masanari Iida   Documentation: Do...
265
   *     out of elevator or throttling code. That would require elevator_exit()
5efd61135   Tejun Heo   blkcg: add blkcg_...
266
   *     and blkcg_exit_queue() to be called with queue lock initialized.
da5277700   Vivek Goyal   block: Move blk_t...
267
   *
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
268
269
270
   */
  void blk_sync_queue(struct request_queue *q)
  {
70ed28b92   Jens Axboe   block: leave the ...
271
  	del_timer_sync(&q->timeout);
4e9b6f208   Bart Van Assche   block: Fix a race...
272
  	cancel_work_sync(&q->timeout_work);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
273
274
275
276
  }
  EXPORT_SYMBOL(blk_sync_queue);
  
  /**
cd84a62e0   Bart Van Assche   block, scsi: Chan...
277
   * blk_set_pm_only - increment pm_only counter
c9254f2dd   Bart Van Assche   block: Add the QU...
278
   * @q: request queue pointer
c9254f2dd   Bart Van Assche   block: Add the QU...
279
   */
cd84a62e0   Bart Van Assche   block, scsi: Chan...
280
  void blk_set_pm_only(struct request_queue *q)
c9254f2dd   Bart Van Assche   block: Add the QU...
281
  {
cd84a62e0   Bart Van Assche   block, scsi: Chan...
282
  	atomic_inc(&q->pm_only);
c9254f2dd   Bart Van Assche   block: Add the QU...
283
  }
cd84a62e0   Bart Van Assche   block, scsi: Chan...
284
  EXPORT_SYMBOL_GPL(blk_set_pm_only);
c9254f2dd   Bart Van Assche   block: Add the QU...
285

cd84a62e0   Bart Van Assche   block, scsi: Chan...
286
  void blk_clear_pm_only(struct request_queue *q)
c9254f2dd   Bart Van Assche   block: Add the QU...
287
  {
cd84a62e0   Bart Van Assche   block, scsi: Chan...
288
289
290
291
292
293
  	int pm_only;
  
  	pm_only = atomic_dec_return(&q->pm_only);
  	WARN_ON_ONCE(pm_only < 0);
  	if (pm_only == 0)
  		wake_up_all(&q->mq_freeze_wq);
c9254f2dd   Bart Van Assche   block: Add the QU...
294
  }
cd84a62e0   Bart Van Assche   block, scsi: Chan...
295
  EXPORT_SYMBOL_GPL(blk_clear_pm_only);
c9254f2dd   Bart Van Assche   block: Add the QU...
296

165125e1e   Jens Axboe   [BLOCK] Get rid o...
297
  void blk_put_queue(struct request_queue *q)
483f4afc4   Al Viro   [PATCH] fix sysfs...
298
299
300
  {
  	kobject_put(&q->kobj);
  }
d86e0e83b   Jens Axboe   block: export blk...
301
  EXPORT_SYMBOL(blk_put_queue);
483f4afc4   Al Viro   [PATCH] fix sysfs...
302

aed3ea94b   Jens Axboe   block: wake up wa...
303
304
  void blk_set_queue_dying(struct request_queue *q)
  {
8814ce8a0   Bart Van Assche   block: Introduce ...
305
  	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
aed3ea94b   Jens Axboe   block: wake up wa...
306

d3cfb2a0a   Ming Lei   block: block new ...
307
308
309
310
311
312
  	/*
  	 * When queue DYING flag is set, we need to block new req
  	 * entering queue, so we call blk_freeze_queue_start() to
  	 * prevent I/O from crossing blk_queue_enter().
  	 */
  	blk_freeze_queue_start(q);
344e9ffcb   Jens Axboe   block: add queue_...
313
  	if (queue_is_mq(q))
aed3ea94b   Jens Axboe   block: wake up wa...
314
  		blk_mq_wake_waiters(q);
055f6e18e   Ming Lei   block: Make q_usa...
315
316
317
  
  	/* Make blk_queue_enter() reexamine the DYING flag. */
  	wake_up_all(&q->mq_freeze_wq);
aed3ea94b   Jens Axboe   block: wake up wa...
318
319
  }
  EXPORT_SYMBOL_GPL(blk_set_queue_dying);
d732580b4   Tejun Heo   block: implement ...
320
  /**
c9a929dde   Tejun Heo   block: fix reques...
321
322
323
   * blk_cleanup_queue - shutdown a request queue
   * @q: request queue to shutdown
   *
c246e80d8   Bart Van Assche   block: Avoid that...
324
325
   * Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and
   * put it.  All future requests will be failed immediately with -ENODEV.
c94a96ac9   Vivek Goyal   block: Initialize...
326
   */
6728cb0e6   Jens Axboe   block: make core ...
327
  void blk_cleanup_queue(struct request_queue *q)
483f4afc4   Al Viro   [PATCH] fix sysfs...
328
  {
3f3299d5c   Bart Van Assche   block: Rename que...
329
  	/* mark @q DYING, no new request or merges will be allowed afterwards */
483f4afc4   Al Viro   [PATCH] fix sysfs...
330
  	mutex_lock(&q->sysfs_lock);
aed3ea94b   Jens Axboe   block: wake up wa...
331
  	blk_set_queue_dying(q);
6ecf23afa   Tejun Heo   block: extend que...
332

57d74df90   Christoph Hellwig   block: use atomic...
333
334
335
  	blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
  	blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
  	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
c9a929dde   Tejun Heo   block: fix reques...
336
  	mutex_unlock(&q->sysfs_lock);
c246e80d8   Bart Van Assche   block: Avoid that...
337
338
  	/*
  	 * Drain all requests queued before DYING marking. Set DEAD flag to
67ed8b738   Bart Van Assche   block: Fix a comm...
339
340
  	 * prevent that blk_mq_run_hw_queues() accesses the hardware queues
  	 * after draining finished.
c246e80d8   Bart Van Assche   block: Avoid that...
341
  	 */
3ef28e83a   Dan Williams   block: generic re...
342
  	blk_freeze_queue(q);
c57cdf7a9   Ming Lei   block: call rq_qo...
343
344
  
  	rq_qos_exit(q);
57d74df90   Christoph Hellwig   block: use atomic...
345
  	blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
c9a929dde   Tejun Heo   block: fix reques...
346

5a48fc147   Dan Williams   block: blk_flush_...
347
348
  	/* for synchronous bio-based driver finish in-flight integrity i/o */
  	blk_flush_integrity();
c9a929dde   Tejun Heo   block: fix reques...
349
  	/* @q won't process any more request, flush async actions */
dc3b17cc8   Jan Kara   block: Use pointe...
350
  	del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer);
c9a929dde   Tejun Heo   block: fix reques...
351
  	blk_sync_queue(q);
344e9ffcb   Jens Axboe   block: add queue_...
352
  	if (queue_is_mq(q))
c7e2d94b3   Ming Lei   blk-mq: free hw q...
353
  		blk_mq_exit_queue(q);
a1ce35fa4   Jens Axboe   block: remove dea...
354

c3e221921   Ming Lei   block: free sched...
355
356
357
358
359
360
361
362
363
364
365
366
  	/*
  	 * In theory, request pool of sched_tags belongs to request queue.
  	 * However, the current implementation requires tag_set for freeing
  	 * requests, so free the pool now.
  	 *
  	 * Queue has become frozen, there can't be any in-queue requests, so
  	 * it is safe to free requests now.
  	 */
  	mutex_lock(&q->sysfs_lock);
  	if (q->elevator)
  		blk_mq_sched_free_requests(q);
  	mutex_unlock(&q->sysfs_lock);
3ef28e83a   Dan Williams   block: generic re...
367
  	percpu_ref_exit(&q->q_usage_counter);
45a9c9d90   Bart Van Assche   blk-mq: Fix a use...
368

c9a929dde   Tejun Heo   block: fix reques...
369
  	/* @q is and will stay empty, shutdown and put */
483f4afc4   Al Viro   [PATCH] fix sysfs...
370
371
  	blk_put_queue(q);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
372
  EXPORT_SYMBOL(blk_cleanup_queue);
165125e1e   Jens Axboe   [BLOCK] Get rid o...
373
  struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
374
  {
6d4696423   Christoph Hellwig   block: remove the...
375
  	return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE);
1946089a1   Christoph Lameter   [PATCH] NUMA awar...
376
377
  }
  EXPORT_SYMBOL(blk_alloc_queue);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
378

3a0a52997   Bart Van Assche   block, scsi: Make...
379
380
381
382
383
  /**
   * blk_queue_enter() - try to increase q->q_usage_counter
   * @q: request queue pointer
   * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PREEMPT
   */
9a95e4ef7   Bart Van Assche   block, nvme: Intr...
384
  int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
3ef28e83a   Dan Williams   block: generic re...
385
  {
cd84a62e0   Bart Van Assche   block, scsi: Chan...
386
  	const bool pm = flags & BLK_MQ_REQ_PREEMPT;
3a0a52997   Bart Van Assche   block, scsi: Make...
387

3ef28e83a   Dan Williams   block: generic re...
388
  	while (true) {
3a0a52997   Bart Van Assche   block, scsi: Make...
389
  		bool success = false;
3ef28e83a   Dan Williams   block: generic re...
390

818e0fa29   Bart Van Assche   block: Change a r...
391
  		rcu_read_lock();
3a0a52997   Bart Van Assche   block, scsi: Make...
392
393
  		if (percpu_ref_tryget_live(&q->q_usage_counter)) {
  			/*
cd84a62e0   Bart Van Assche   block, scsi: Chan...
394
395
396
  			 * The code that increments the pm_only counter is
  			 * responsible for ensuring that that counter is
  			 * globally visible before the queue is unfrozen.
3a0a52997   Bart Van Assche   block, scsi: Make...
397
  			 */
cd84a62e0   Bart Van Assche   block, scsi: Chan...
398
  			if (pm || !blk_queue_pm_only(q)) {
3a0a52997   Bart Van Assche   block, scsi: Make...
399
400
401
402
403
  				success = true;
  			} else {
  				percpu_ref_put(&q->q_usage_counter);
  			}
  		}
818e0fa29   Bart Van Assche   block: Change a r...
404
  		rcu_read_unlock();
3a0a52997   Bart Van Assche   block, scsi: Make...
405
406
  
  		if (success)
3ef28e83a   Dan Williams   block: generic re...
407
  			return 0;
3a0a52997   Bart Van Assche   block, scsi: Make...
408
  		if (flags & BLK_MQ_REQ_NOWAIT)
3ef28e83a   Dan Williams   block: generic re...
409
  			return -EBUSY;
5ed61d3f0   Ming Lei   block: add a read...
410
  		/*
1671d522c   Ming Lei   block: rename blk...
411
  		 * read pair of barrier in blk_freeze_queue_start(),
5ed61d3f0   Ming Lei   block: add a read...
412
  		 * we need to order reading __PERCPU_REF_DEAD flag of
d3cfb2a0a   Ming Lei   block: block new ...
413
414
415
  		 * .q_usage_counter and reading .mq_freeze_depth or
  		 * queue dying flag, otherwise the following wait may
  		 * never return if the two reads are reordered.
5ed61d3f0   Ming Lei   block: add a read...
416
417
  		 */
  		smp_rmb();
1dc3039bc   Alan Jenkins   block: do not use...
418
  		wait_event(q->mq_freeze_wq,
7996a8b55   Bob Liu   blk-mq: fix hang ...
419
  			   (!q->mq_freeze_depth &&
0d25bd072   Bart Van Assche   block: Schedule r...
420
421
  			    (pm || (blk_pm_request_resume(q),
  				    !blk_queue_pm_only(q)))) ||
1dc3039bc   Alan Jenkins   block: do not use...
422
  			   blk_queue_dying(q));
3ef28e83a   Dan Williams   block: generic re...
423
424
  		if (blk_queue_dying(q))
  			return -ENODEV;
3ef28e83a   Dan Williams   block: generic re...
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
  	}
  }
  
  void blk_queue_exit(struct request_queue *q)
  {
  	percpu_ref_put(&q->q_usage_counter);
  }
  
  static void blk_queue_usage_counter_release(struct percpu_ref *ref)
  {
  	struct request_queue *q =
  		container_of(ref, struct request_queue, q_usage_counter);
  
  	wake_up_all(&q->mq_freeze_wq);
  }
bca237a52   Kees Cook   block/laptop_mode...
440
  static void blk_rq_timed_out_timer(struct timer_list *t)
287922eb0   Christoph Hellwig   block: defer time...
441
  {
bca237a52   Kees Cook   block/laptop_mode...
442
  	struct request_queue *q = from_timer(q, t, timeout);
287922eb0   Christoph Hellwig   block: defer time...
443
444
445
  
  	kblockd_schedule_work(&q->timeout_work);
  }
2e3c18d0a   Tetsuo Handa   block: pass no-op...
446
447
448
  static void blk_timeout_work(struct work_struct *work)
  {
  }
498f6650a   Bart Van Assche   block: Fix a race...
449
450
451
452
  /**
   * blk_alloc_queue_node - allocate a request queue
   * @gfp_mask: memory allocation flags
   * @node_id: NUMA node to allocate memory from
498f6650a   Bart Van Assche   block: Fix a race...
453
   */
6d4696423   Christoph Hellwig   block: remove the...
454
  struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
1946089a1   Christoph Lameter   [PATCH] NUMA awar...
455
  {
165125e1e   Jens Axboe   [BLOCK] Get rid o...
456
  	struct request_queue *q;
338aa96d5   Kent Overstreet   block: convert bo...
457
  	int ret;
1946089a1   Christoph Lameter   [PATCH] NUMA awar...
458

8324aa91d   Jens Axboe   block: split tag ...
459
  	q = kmem_cache_alloc_node(blk_requestq_cachep,
94f6030ca   Christoph Lameter   Slab allocators: ...
460
  				gfp_mask | __GFP_ZERO, node_id);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
461
462
  	if (!q)
  		return NULL;
cbf62af35   Christoph Hellwig   block: move initi...
463
  	q->last_merge = NULL;
cbf62af35   Christoph Hellwig   block: move initi...
464

00380a404   Dan Carpenter   block: blk_alloc_...
465
  	q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
a73f730d0   Tejun Heo   block, cfq: move ...
466
  	if (q->id < 0)
3d2936f45   Ming Lei   block: only alloc...
467
  		goto fail_q;
a73f730d0   Tejun Heo   block, cfq: move ...
468

338aa96d5   Kent Overstreet   block: convert bo...
469
470
  	ret = bioset_init(&q->bio_split, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
  	if (ret)
54efd50bf   Kent Overstreet   block: make gener...
471
  		goto fail_id;
d03f6cdc1   Jan Kara   block: Dynamicall...
472
473
474
  	q->backing_dev_info = bdi_alloc_node(gfp_mask, node_id);
  	if (!q->backing_dev_info)
  		goto fail_split;
a83b576c9   Jens Axboe   block: fix stacke...
475
476
477
  	q->stats = blk_alloc_queue_stats();
  	if (!q->stats)
  		goto fail_stats;
b5420237e   Nikolay Borisov   mm: refactor read...
478
  	q->backing_dev_info->ra_pages = VM_READAHEAD_PAGES;
dc3b17cc8   Jan Kara   block: Use pointe...
479
480
  	q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK;
  	q->backing_dev_info->name = "block";
5151412dd   Mike Snitzer   block: initialize...
481
  	q->node = node_id;
0989a025d   Jens Axboe   block: don't over...
482

bca237a52   Kees Cook   block/laptop_mode...
483
484
485
  	timer_setup(&q->backing_dev_info->laptop_mode_wb_timer,
  		    laptop_mode_timer_fn, 0);
  	timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
2e3c18d0a   Tetsuo Handa   block: pass no-op...
486
  	INIT_WORK(&q->timeout_work, blk_timeout_work);
a612fddf0   Tejun Heo   block, cfq: move ...
487
  	INIT_LIST_HEAD(&q->icq_list);
4eef30499   Tejun Heo   blkcg: move per-q...
488
  #ifdef CONFIG_BLK_CGROUP
e8989fae3   Tejun Heo   blkcg: unify blkg...
489
  	INIT_LIST_HEAD(&q->blkg_list);
4eef30499   Tejun Heo   blkcg: move per-q...
490
  #endif
483f4afc4   Al Viro   [PATCH] fix sysfs...
491

8324aa91d   Jens Axboe   block: split tag ...
492
  	kobject_init(&q->kobj, &blk_queue_ktype);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
493

5acb3cc2c   Waiman Long   blktrace: Fix pot...
494
495
496
  #ifdef CONFIG_BLK_DEV_IO_TRACE
  	mutex_init(&q->blk_trace_mutex);
  #endif
483f4afc4   Al Viro   [PATCH] fix sysfs...
497
  	mutex_init(&q->sysfs_lock);
cecf5d87f   Ming Lei   block: split .sys...
498
  	mutex_init(&q->sysfs_dir_lock);
0d945c1f9   Christoph Hellwig   block: remove the...
499
  	spin_lock_init(&q->queue_lock);
c94a96ac9   Vivek Goyal   block: Initialize...
500

320ae51fe   Jens Axboe   blk-mq: new multi...
501
  	init_waitqueue_head(&q->mq_freeze_wq);
7996a8b55   Bob Liu   blk-mq: fix hang ...
502
  	mutex_init(&q->mq_freeze_lock);
320ae51fe   Jens Axboe   blk-mq: new multi...
503

3ef28e83a   Dan Williams   block: generic re...
504
505
506
507
508
509
510
  	/*
  	 * Init percpu_ref in atomic mode so that it's faster to shutdown.
  	 * See blk_register_queue() for details.
  	 */
  	if (percpu_ref_init(&q->q_usage_counter,
  				blk_queue_usage_counter_release,
  				PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
fff4996b7   Mikulas Patocka   blk-core: Fix mem...
511
  		goto fail_bdi;
f51b802c1   Tejun Heo   blkcg: use the us...
512

3ef28e83a   Dan Williams   block: generic re...
513
514
  	if (blkcg_init_queue(q))
  		goto fail_ref;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
515
  	return q;
a73f730d0   Tejun Heo   block, cfq: move ...
516

3ef28e83a   Dan Williams   block: generic re...
517
518
  fail_ref:
  	percpu_ref_exit(&q->q_usage_counter);
fff4996b7   Mikulas Patocka   blk-core: Fix mem...
519
  fail_bdi:
a83b576c9   Jens Axboe   block: fix stacke...
520
521
  	blk_free_queue_stats(q->stats);
  fail_stats:
d03f6cdc1   Jan Kara   block: Dynamicall...
522
  	bdi_put(q->backing_dev_info);
54efd50bf   Kent Overstreet   block: make gener...
523
  fail_split:
338aa96d5   Kent Overstreet   block: convert bo...
524
  	bioset_exit(&q->bio_split);
a73f730d0   Tejun Heo   block, cfq: move ...
525
526
527
528
529
  fail_id:
  	ida_simple_remove(&blk_queue_ida, q->id);
  fail_q:
  	kmem_cache_free(blk_requestq_cachep, q);
  	return NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
530
  }
1946089a1   Christoph Lameter   [PATCH] NUMA awar...
531
  EXPORT_SYMBOL(blk_alloc_queue_node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
532

09ac46c42   Tejun Heo   block: misc updat...
533
  bool blk_get_queue(struct request_queue *q)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
534
  {
3f3299d5c   Bart Van Assche   block: Rename que...
535
  	if (likely(!blk_queue_dying(q))) {
09ac46c42   Tejun Heo   block: misc updat...
536
537
  		__blk_get_queue(q);
  		return true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
538
  	}
09ac46c42   Tejun Heo   block: misc updat...
539
  	return false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
540
  }
d86e0e83b   Jens Axboe   block: export blk...
541
  EXPORT_SYMBOL(blk_get_queue);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
542

a1ce35fa4   Jens Axboe   block: remove dea...
543
544
545
546
547
  /**
   * blk_get_request - allocate a request
   * @q: request queue to allocate a request for
   * @op: operation (REQ_OP_*) and REQ_* flags, e.g. REQ_SYNC.
   * @flags: BLK_MQ_REQ_* flags, e.g. BLK_MQ_REQ_NOWAIT.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
548
   */
a1ce35fa4   Jens Axboe   block: remove dea...
549
550
  struct request *blk_get_request(struct request_queue *q, unsigned int op,
  				blk_mq_req_flags_t flags)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
551
  {
a1ce35fa4   Jens Axboe   block: remove dea...
552
  	struct request *req;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
553

a1ce35fa4   Jens Axboe   block: remove dea...
554
555
  	WARN_ON_ONCE(op & REQ_NOWAIT);
  	WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PREEMPT));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
556

a1ce35fa4   Jens Axboe   block: remove dea...
557
558
559
  	req = blk_mq_alloc_request(q, op, flags);
  	if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
  		q->mq_ops->initialize_rq_fn(req);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
560

a1ce35fa4   Jens Axboe   block: remove dea...
561
  	return req;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
562
  }
a1ce35fa4   Jens Axboe   block: remove dea...
563
  EXPORT_SYMBOL(blk_get_request);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
564

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
565
566
  void blk_put_request(struct request *req)
  {
a1ce35fa4   Jens Axboe   block: remove dea...
567
  	blk_mq_free_request(req);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
568
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
569
  EXPORT_SYMBOL(blk_put_request);
14ccb66b3   Christoph Hellwig   block: remove the...
570
571
  bool bio_attempt_back_merge(struct request *req, struct bio *bio,
  		unsigned int nr_segs)
73c101011   Jens Axboe   block: initial pa...
572
  {
1eff9d322   Jens Axboe   block: rename bio...
573
  	const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
73c101011   Jens Axboe   block: initial pa...
574

14ccb66b3   Christoph Hellwig   block: remove the...
575
  	if (!ll_back_merge_fn(req, bio, nr_segs))
73c101011   Jens Axboe   block: initial pa...
576
  		return false;
14ccb66b3   Christoph Hellwig   block: remove the...
577
  	trace_block_bio_backmerge(req->q, req, bio);
d3e65ffff   Tejun Heo   block/rq_qos: add...
578
  	rq_qos_merge(req->q, req, bio);
73c101011   Jens Axboe   block: initial pa...
579
580
581
582
583
584
  
  	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
  		blk_rq_set_mixed_merge(req);
  
  	req->biotail->bi_next = bio;
  	req->biotail = bio;
4f024f379   Kent Overstreet   block: Abstract o...
585
  	req->__data_len += bio->bi_iter.bi_size;
73c101011   Jens Axboe   block: initial pa...
586

320ae51fe   Jens Axboe   blk-mq: new multi...
587
  	blk_account_io_start(req, false);
73c101011   Jens Axboe   block: initial pa...
588
589
  	return true;
  }
14ccb66b3   Christoph Hellwig   block: remove the...
590
591
  bool bio_attempt_front_merge(struct request *req, struct bio *bio,
  		unsigned int nr_segs)
73c101011   Jens Axboe   block: initial pa...
592
  {
1eff9d322   Jens Axboe   block: rename bio...
593
  	const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
73c101011   Jens Axboe   block: initial pa...
594

14ccb66b3   Christoph Hellwig   block: remove the...
595
  	if (!ll_front_merge_fn(req, bio, nr_segs))
73c101011   Jens Axboe   block: initial pa...
596
  		return false;
14ccb66b3   Christoph Hellwig   block: remove the...
597
  	trace_block_bio_frontmerge(req->q, req, bio);
d3e65ffff   Tejun Heo   block/rq_qos: add...
598
  	rq_qos_merge(req->q, req, bio);
73c101011   Jens Axboe   block: initial pa...
599
600
601
  
  	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
  		blk_rq_set_mixed_merge(req);
73c101011   Jens Axboe   block: initial pa...
602
603
  	bio->bi_next = req->bio;
  	req->bio = bio;
4f024f379   Kent Overstreet   block: Abstract o...
604
605
  	req->__sector = bio->bi_iter.bi_sector;
  	req->__data_len += bio->bi_iter.bi_size;
73c101011   Jens Axboe   block: initial pa...
606

320ae51fe   Jens Axboe   blk-mq: new multi...
607
  	blk_account_io_start(req, false);
73c101011   Jens Axboe   block: initial pa...
608
609
  	return true;
  }
1e739730c   Christoph Hellwig   block: optionally...
610
611
612
613
614
615
616
617
618
619
  bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
  		struct bio *bio)
  {
  	unsigned short segments = blk_rq_nr_discard_segments(req);
  
  	if (segments >= queue_max_discard_segments(q))
  		goto no_merge;
  	if (blk_rq_sectors(req) + bio_sectors(bio) >
  	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
  		goto no_merge;
d3e65ffff   Tejun Heo   block/rq_qos: add...
620
  	rq_qos_merge(q, req, bio);
1e739730c   Christoph Hellwig   block: optionally...
621
622
623
  	req->biotail->bi_next = bio;
  	req->biotail = bio;
  	req->__data_len += bio->bi_iter.bi_size;
1e739730c   Christoph Hellwig   block: optionally...
624
625
626
627
628
629
630
631
  	req->nr_phys_segments = segments + 1;
  
  	blk_account_io_start(req, false);
  	return true;
  no_merge:
  	req_set_nomerge(q, req);
  	return false;
  }
bd87b5898   Tejun Heo   block: drop @tsk ...
632
  /**
320ae51fe   Jens Axboe   blk-mq: new multi...
633
   * blk_attempt_plug_merge - try to merge with %current's plugged list
bd87b5898   Tejun Heo   block: drop @tsk ...
634
635
   * @q: request_queue new bio is being queued at
   * @bio: new bio being queued
14ccb66b3   Christoph Hellwig   block: remove the...
636
   * @nr_segs: number of segments in @bio
ccc2600b8   Randy Dunlap   block: fix blk-co...
637
638
639
   * @same_queue_rq: pointer to &struct request that gets filled in when
   * another request associated with @q is found on the plug list
   * (optional, may be %NULL)
bd87b5898   Tejun Heo   block: drop @tsk ...
640
641
642
643
644
   *
   * Determine whether @bio being queued on @q can be merged with a request
   * on %current's plugged list.  Returns %true if merge was successful,
   * otherwise %false.
   *
07c2bd373   Tejun Heo   block: don't call...
645
646
647
648
649
650
   * Plugging coalesces IOs from the same issuer for the same purpose without
   * going through @q->queue_lock.  As such it's more of an issuing mechanism
   * than scheduling, and the request, while may have elvpriv data, is not
   * added on the elevator at this point.  In addition, we don't have
   * reliable access to the elevator outside queue lock.  Only check basic
   * merging parameters without querying the elevator.
da41a589f   Robert Elliott   blk-mq: Micro-opt...
651
652
   *
   * Caller must ensure !blk_queue_nomerges(q) beforehand.
73c101011   Jens Axboe   block: initial pa...
653
   */
320ae51fe   Jens Axboe   blk-mq: new multi...
654
  bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
14ccb66b3   Christoph Hellwig   block: remove the...
655
  		unsigned int nr_segs, struct request **same_queue_rq)
73c101011   Jens Axboe   block: initial pa...
656
657
658
  {
  	struct blk_plug *plug;
  	struct request *rq;
92f399c72   Shaohua Li   blk-mq: mq plug l...
659
  	struct list_head *plug_list;
73c101011   Jens Axboe   block: initial pa...
660

b49773e7b   Damien Le Moal   block: Disable wr...
661
  	plug = blk_mq_plug(q, bio);
73c101011   Jens Axboe   block: initial pa...
662
  	if (!plug)
34fe7c054   Christoph Hellwig   block: enumify EL...
663
  		return false;
73c101011   Jens Axboe   block: initial pa...
664

a1ce35fa4   Jens Axboe   block: remove dea...
665
  	plug_list = &plug->mq_list;
92f399c72   Shaohua Li   blk-mq: mq plug l...
666
667
  
  	list_for_each_entry_reverse(rq, plug_list, queuelist) {
34fe7c054   Christoph Hellwig   block: enumify EL...
668
  		bool merged = false;
73c101011   Jens Axboe   block: initial pa...
669

5f0ed774e   Jens Axboe   block: sum reques...
670
  		if (rq->q == q && same_queue_rq) {
5b3f341f0   Shaohua Li   blk-mq: make plug...
671
672
673
674
675
  			/*
  			 * Only blk-mq multiple hardware queues case checks the
  			 * rq in the same queue, there should be only one such
  			 * rq in a queue
  			 **/
5f0ed774e   Jens Axboe   block: sum reques...
676
  			*same_queue_rq = rq;
5b3f341f0   Shaohua Li   blk-mq: make plug...
677
  		}
56ebdaf2f   Shaohua Li   block: simplify f...
678

07c2bd373   Tejun Heo   block: don't call...
679
  		if (rq->q != q || !blk_rq_merge_ok(rq, bio))
73c101011   Jens Axboe   block: initial pa...
680
  			continue;
34fe7c054   Christoph Hellwig   block: enumify EL...
681
682
  		switch (blk_try_merge(rq, bio)) {
  		case ELEVATOR_BACK_MERGE:
14ccb66b3   Christoph Hellwig   block: remove the...
683
  			merged = bio_attempt_back_merge(rq, bio, nr_segs);
34fe7c054   Christoph Hellwig   block: enumify EL...
684
685
  			break;
  		case ELEVATOR_FRONT_MERGE:
14ccb66b3   Christoph Hellwig   block: remove the...
686
  			merged = bio_attempt_front_merge(rq, bio, nr_segs);
34fe7c054   Christoph Hellwig   block: enumify EL...
687
  			break;
1e739730c   Christoph Hellwig   block: optionally...
688
689
690
  		case ELEVATOR_DISCARD_MERGE:
  			merged = bio_attempt_discard_merge(q, rq, bio);
  			break;
34fe7c054   Christoph Hellwig   block: enumify EL...
691
692
  		default:
  			break;
73c101011   Jens Axboe   block: initial pa...
693
  		}
34fe7c054   Christoph Hellwig   block: enumify EL...
694
695
696
  
  		if (merged)
  			return true;
73c101011   Jens Axboe   block: initial pa...
697
  	}
34fe7c054   Christoph Hellwig   block: enumify EL...
698
699
  
  	return false;
73c101011   Jens Axboe   block: initial pa...
700
  }
52c5e62d4   Christoph Hellwig   block: bio_check_...
701
  static void handle_bad_sector(struct bio *bio, sector_t maxsector)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
702
703
704
705
706
  {
  	char b[BDEVNAME_SIZE];
  
  	printk(KERN_INFO "attempt to access beyond end of device
  ");
6296b9604   Mike Christie   block, drivers, f...
707
708
  	printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu
  ",
74d46992e   Christoph Hellwig   block: replace bi...
709
  			bio_devname(bio, b), bio->bi_opf,
f73a1c7d1   Kent Overstreet   block: Add bio_en...
710
  			(unsigned long long)bio_end_sector(bio),
52c5e62d4   Christoph Hellwig   block: bio_check_...
711
  			(long long)maxsector);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
712
  }
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
713
714
715
716
717
718
719
720
721
  #ifdef CONFIG_FAIL_MAKE_REQUEST
  
  static DECLARE_FAULT_ATTR(fail_make_request);
  
  static int __init setup_fail_make_request(char *str)
  {
  	return setup_fault_attr(&fail_make_request, str);
  }
  __setup("fail_make_request=", setup_fail_make_request);
b2c9cd379   Akinobu Mita   fail_make_request...
722
  static bool should_fail_request(struct hd_struct *part, unsigned int bytes)
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
723
  {
b2c9cd379   Akinobu Mita   fail_make_request...
724
  	return part->make_it_fail && should_fail(&fail_make_request, bytes);
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
725
726
727
728
  }
  
  static int __init fail_make_request_debugfs(void)
  {
dd48c085c   Akinobu Mita   fault-injection: ...
729
730
  	struct dentry *dir = fault_create_debugfs_attr("fail_make_request",
  						NULL, &fail_make_request);
21f9fcd81   Duan Jiong   block: replace IS...
731
  	return PTR_ERR_OR_ZERO(dir);
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
732
733
734
735
736
  }
  
  late_initcall(fail_make_request_debugfs);
  
  #else /* CONFIG_FAIL_MAKE_REQUEST */
b2c9cd379   Akinobu Mita   fail_make_request...
737
738
  static inline bool should_fail_request(struct hd_struct *part,
  					unsigned int bytes)
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
739
  {
b2c9cd379   Akinobu Mita   fail_make_request...
740
  	return false;
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
741
742
743
  }
  
  #endif /* CONFIG_FAIL_MAKE_REQUEST */
721c7fc70   Ilya Dryomov   block: fail op_is...
744
745
  static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
  {
b089cfd95   Jens Axboe   block: don't warn...
746
  	const int op = bio_op(bio);
8b2ded1c9   Mikulas Patocka   block: don't warn...
747
  	if (part->policy && op_is_write(op)) {
721c7fc70   Ilya Dryomov   block: fail op_is...
748
  		char b[BDEVNAME_SIZE];
8b2ded1c9   Mikulas Patocka   block: don't warn...
749
750
  		if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
  			return false;
a32e236eb   Linus Torvalds   Partially revert ...
751
  		WARN_ONCE(1,
721c7fc70   Ilya Dryomov   block: fail op_is...
752
753
754
755
  		       "generic_make_request: Trying to write "
  			"to read-only block-device %s (partno %d)
  ",
  			bio_devname(bio, b), part->partno);
a32e236eb   Linus Torvalds   Partially revert ...
756
757
  		/* Older lvm-tools actually trigger this */
  		return false;
721c7fc70   Ilya Dryomov   block: fail op_is...
758
759
760
761
  	}
  
  	return false;
  }
30abb3a67   Howard McLauchlan   block: Add should...
762
763
764
765
766
767
768
  static noinline int should_fail_bio(struct bio *bio)
  {
  	if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
  		return -EIO;
  	return 0;
  }
  ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO);
c07e2b412   Jens Axboe   block: factor our...
769
  /*
52c5e62d4   Christoph Hellwig   block: bio_check_...
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
   * Check whether this bio extends beyond the end of the device or partition.
   * This may well happen - the kernel calls bread() without checking the size of
   * the device, e.g., when mounting a file system.
   */
  static inline int bio_check_eod(struct bio *bio, sector_t maxsector)
  {
  	unsigned int nr_sectors = bio_sectors(bio);
  
  	if (nr_sectors && maxsector &&
  	    (nr_sectors > maxsector ||
  	     bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
  		handle_bad_sector(bio, maxsector);
  		return -EIO;
  	}
  	return 0;
  }
  
  /*
74d46992e   Christoph Hellwig   block: replace bi...
788
789
790
791
792
   * Remap block n of partition p to block n+start(p) of the disk.
   */
  static inline int blk_partition_remap(struct bio *bio)
  {
  	struct hd_struct *p;
52c5e62d4   Christoph Hellwig   block: bio_check_...
793
  	int ret = -EIO;
74d46992e   Christoph Hellwig   block: replace bi...
794

721c7fc70   Ilya Dryomov   block: fail op_is...
795
796
  	rcu_read_lock();
  	p = __disk_get_part(bio->bi_disk, bio->bi_partno);
52c5e62d4   Christoph Hellwig   block: bio_check_...
797
798
799
800
801
  	if (unlikely(!p))
  		goto out;
  	if (unlikely(should_fail_request(p, bio->bi_iter.bi_size)))
  		goto out;
  	if (unlikely(bio_check_ro(bio, p)))
721c7fc70   Ilya Dryomov   block: fail op_is...
802
  		goto out;
721c7fc70   Ilya Dryomov   block: fail op_is...
803

74d46992e   Christoph Hellwig   block: replace bi...
804
805
806
807
  	/*
  	 * Zone reset does not include bi_size so bio_sectors() is always 0.
  	 * Include a test for the reset op code and perform the remap if needed.
  	 */
52c5e62d4   Christoph Hellwig   block: bio_check_...
808
809
810
811
  	if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) {
  		if (bio_check_eod(bio, part_nr_sects_read(p)))
  			goto out;
  		bio->bi_iter.bi_sector += p->start_sect;
52c5e62d4   Christoph Hellwig   block: bio_check_...
812
813
814
  		trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
  				      bio->bi_iter.bi_sector - p->start_sect);
  	}
c04fa44b7   Hannes Reinecke   block: always set...
815
  	bio->bi_partno = 0;
52c5e62d4   Christoph Hellwig   block: bio_check_...
816
  	ret = 0;
721c7fc70   Ilya Dryomov   block: fail op_is...
817
818
  out:
  	rcu_read_unlock();
74d46992e   Christoph Hellwig   block: replace bi...
819
820
  	return ret;
  }
27a84d54c   Christoph Hellwig   block: refactor g...
821
822
  static noinline_for_stack bool
  generic_make_request_checks(struct bio *bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
823
  {
165125e1e   Jens Axboe   [BLOCK] Get rid o...
824
  	struct request_queue *q;
5a7bbad27   Christoph Hellwig   block: remove sup...
825
  	int nr_sectors = bio_sectors(bio);
4e4cbee93   Christoph Hellwig   block: switch bio...
826
  	blk_status_t status = BLK_STS_IOERR;
5a7bbad27   Christoph Hellwig   block: remove sup...
827
  	char b[BDEVNAME_SIZE];
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
828
829
  
  	might_sleep();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
830

74d46992e   Christoph Hellwig   block: replace bi...
831
  	q = bio->bi_disk->queue;
5a7bbad27   Christoph Hellwig   block: remove sup...
832
833
834
835
836
  	if (unlikely(!q)) {
  		printk(KERN_ERR
  		       "generic_make_request: Trying to access "
  			"nonexistent block-device %s (%Lu)
  ",
74d46992e   Christoph Hellwig   block: replace bi...
837
  			bio_devname(bio, b), (long long)bio->bi_iter.bi_sector);
5a7bbad27   Christoph Hellwig   block: remove sup...
838
839
  		goto end_io;
  	}
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
840

03a07c92a   Goldwyn Rodrigues   block: return on ...
841
  	/*
2ac95310f   Roman Penyaev   block: end bio wi...
842
843
844
  	 * Non-mq queues do not honor REQ_NOWAIT, so complete a bio
  	 * with BLK_STS_AGAIN status in order to catch -EAGAIN and
  	 * to give a chance to the caller to repeat request gracefully.
03a07c92a   Goldwyn Rodrigues   block: return on ...
845
  	 */
2ac95310f   Roman Penyaev   block: end bio wi...
846
847
848
849
  	if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q)) {
  		status = BLK_STS_AGAIN;
  		goto end_io;
  	}
03a07c92a   Goldwyn Rodrigues   block: return on ...
850

30abb3a67   Howard McLauchlan   block: Add should...
851
  	if (should_fail_bio(bio))
5a7bbad27   Christoph Hellwig   block: remove sup...
852
  		goto end_io;
2056a782f   Jens Axboe   [PATCH] Block que...
853

52c5e62d4   Christoph Hellwig   block: bio_check_...
854
855
  	if (bio->bi_partno) {
  		if (unlikely(blk_partition_remap(bio)))
721c7fc70   Ilya Dryomov   block: fail op_is...
856
857
  			goto end_io;
  	} else {
52c5e62d4   Christoph Hellwig   block: bio_check_...
858
859
860
  		if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0)))
  			goto end_io;
  		if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk))))
721c7fc70   Ilya Dryomov   block: fail op_is...
861
862
  			goto end_io;
  	}
2056a782f   Jens Axboe   [PATCH] Block que...
863

5a7bbad27   Christoph Hellwig   block: remove sup...
864
865
866
867
868
  	/*
  	 * Filter flush bio's early so that make_request based
  	 * drivers without flush support don't have to worry
  	 * about them.
  	 */
f3a8ab7d5   Jens Axboe   block: cleanup re...
869
  	if (op_is_flush(bio->bi_opf) &&
c888a8f95   Jens Axboe   block: kill off q...
870
  	    !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
1eff9d322   Jens Axboe   block: rename bio...
871
  		bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
5a7bbad27   Christoph Hellwig   block: remove sup...
872
  		if (!nr_sectors) {
4e4cbee93   Christoph Hellwig   block: switch bio...
873
  			status = BLK_STS_OK;
51fd77bd9   Jens Axboe   [BLOCK] Don't all...
874
875
  			goto end_io;
  		}
5a7bbad27   Christoph Hellwig   block: remove sup...
876
  	}
5ddfe9691   NeilBrown   [PATCH] md: check...
877

d04c406f2   Christoph Hellwig   block: clear REQ_...
878
879
  	if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
  		bio->bi_opf &= ~REQ_HIPRI;
288dab8a3   Christoph Hellwig   block: add a sepa...
880
881
882
883
884
885
886
887
888
889
  	switch (bio_op(bio)) {
  	case REQ_OP_DISCARD:
  		if (!blk_queue_discard(q))
  			goto not_supported;
  		break;
  	case REQ_OP_SECURE_ERASE:
  		if (!blk_queue_secure_erase(q))
  			goto not_supported;
  		break;
  	case REQ_OP_WRITE_SAME:
74d46992e   Christoph Hellwig   block: replace bi...
890
  		if (!q->limits.max_write_same_sectors)
288dab8a3   Christoph Hellwig   block: add a sepa...
891
  			goto not_supported;
58886785d   Nicolai Stange   block: fix uninte...
892
  		break;
2d253440b   Shaun Tancheff   block: Define zon...
893
  	case REQ_OP_ZONE_RESET:
74d46992e   Christoph Hellwig   block: replace bi...
894
  		if (!blk_queue_is_zoned(q))
2d253440b   Shaun Tancheff   block: Define zon...
895
  			goto not_supported;
288dab8a3   Christoph Hellwig   block: add a sepa...
896
  		break;
6e33dbf28   Chaitanya Kulkarni   blk-zoned: implem...
897
898
899
900
  	case REQ_OP_ZONE_RESET_ALL:
  		if (!blk_queue_is_zoned(q) || !blk_queue_zone_resetall(q))
  			goto not_supported;
  		break;
a6f0788ec   Chaitanya Kulkarni   block: add suppor...
901
  	case REQ_OP_WRITE_ZEROES:
74d46992e   Christoph Hellwig   block: replace bi...
902
  		if (!q->limits.max_write_zeroes_sectors)
a6f0788ec   Chaitanya Kulkarni   block: add suppor...
903
904
  			goto not_supported;
  		break;
288dab8a3   Christoph Hellwig   block: add a sepa...
905
906
  	default:
  		break;
5a7bbad27   Christoph Hellwig   block: remove sup...
907
  	}
01edede41   Minchan Kim   block: trace bio ...
908

7f4b35d15   Tejun Heo   block: allocate i...
909
910
911
912
913
914
915
  	/*
  	 * Various block parts want %current->io_context and lazy ioc
  	 * allocation ends up trading a lot of pain for a small amount of
  	 * memory.  Just allocate it upfront.  This may fail and block
  	 * layer knows how to live with it.
  	 */
  	create_io_context(GFP_ATOMIC, q->node);
ae1188963   Tejun Heo   blkcg: consolidat...
916
917
  	if (!blkcg_bio_issue_check(q, bio))
  		return false;
27a84d54c   Christoph Hellwig   block: refactor g...
918

fbbaf700e   NeilBrown   block: trace comp...
919
920
921
922
923
924
925
  	if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
  		trace_block_bio_queue(q, bio);
  		/* Now that enqueuing has been traced, we need to trace
  		 * completion as well.
  		 */
  		bio_set_flag(bio, BIO_TRACE_COMPLETION);
  	}
27a84d54c   Christoph Hellwig   block: refactor g...
926
  	return true;
a7384677b   Tejun Heo   block: remove dup...
927

288dab8a3   Christoph Hellwig   block: add a sepa...
928
  not_supported:
4e4cbee93   Christoph Hellwig   block: switch bio...
929
  	status = BLK_STS_NOTSUPP;
a7384677b   Tejun Heo   block: remove dup...
930
  end_io:
4e4cbee93   Christoph Hellwig   block: switch bio...
931
  	bio->bi_status = status;
4246a0b63   Christoph Hellwig   block: add a bi_e...
932
  	bio_endio(bio);
27a84d54c   Christoph Hellwig   block: refactor g...
933
  	return false;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
934
  }
27a84d54c   Christoph Hellwig   block: refactor g...
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
  /**
   * generic_make_request - hand a buffer to its device driver for I/O
   * @bio:  The bio describing the location in memory and on the device.
   *
   * generic_make_request() is used to make I/O requests of block
   * devices. It is passed a &struct bio, which describes the I/O that needs
   * to be done.
   *
   * generic_make_request() does not return any status.  The
   * success/failure status of the request, along with notification of
   * completion, is delivered asynchronously through the bio->bi_end_io
   * function described (one day) else where.
   *
   * The caller of generic_make_request must make sure that bi_io_vec
   * are set to describe the memory buffer, and that bi_dev and bi_sector are
   * set to describe the device address, and the
   * bi_end_io and optionally bi_private are set to describe how
   * completion notification should be signaled.
   *
   * generic_make_request and the drivers it calls may use bi_next if this
   * bio happens to be merged with someone else, and may resubmit the bio to
   * a lower device by calling into generic_make_request recursively, which
   * means the bio should NOT be touched after the call to ->make_request_fn.
d89d87965   Neil Brown   When stacked bloc...
958
   */
dece16353   Jens Axboe   block: change ->m...
959
  blk_qc_t generic_make_request(struct bio *bio)
d89d87965   Neil Brown   When stacked bloc...
960
  {
f5fe1b519   NeilBrown   blk: Ensure users...
961
962
963
964
965
966
967
968
  	/*
  	 * bio_list_on_stack[0] contains bios submitted by the current
  	 * make_request_fn.
  	 * bio_list_on_stack[1] contains bios that were submitted before
  	 * the current make_request_fn, but that haven't been processed
  	 * yet.
  	 */
  	struct bio_list bio_list_on_stack[2];
dece16353   Jens Axboe   block: change ->m...
969
  	blk_qc_t ret = BLK_QC_T_NONE;
bddd87c7e   Akinobu Mita   blk-core: use BIO...
970

27a84d54c   Christoph Hellwig   block: refactor g...
971
  	if (!generic_make_request_checks(bio))
dece16353   Jens Axboe   block: change ->m...
972
  		goto out;
27a84d54c   Christoph Hellwig   block: refactor g...
973
974
975
976
977
978
979
980
981
982
983
  
  	/*
  	 * We only want one ->make_request_fn to be active at a time, else
  	 * stack usage with stacked devices could be a problem.  So use
  	 * current->bio_list to keep a list of requests submited by a
  	 * make_request_fn function.  current->bio_list is also used as a
  	 * flag to say if generic_make_request is currently active in this
  	 * task or not.  If it is NULL, then no make_request is active.  If
  	 * it is non-NULL, then a make_request is active, and new requests
  	 * should be added at the tail
  	 */
bddd87c7e   Akinobu Mita   blk-core: use BIO...
984
  	if (current->bio_list) {
f5fe1b519   NeilBrown   blk: Ensure users...
985
  		bio_list_add(&current->bio_list[0], bio);
dece16353   Jens Axboe   block: change ->m...
986
  		goto out;
d89d87965   Neil Brown   When stacked bloc...
987
  	}
27a84d54c   Christoph Hellwig   block: refactor g...
988

d89d87965   Neil Brown   When stacked bloc...
989
990
991
992
993
  	/* following loop may be a bit non-obvious, and so deserves some
  	 * explanation.
  	 * Before entering the loop, bio->bi_next is NULL (as all callers
  	 * ensure that) so we have a list with a single bio.
  	 * We pretend that we have just taken it off a longer list, so
bddd87c7e   Akinobu Mita   blk-core: use BIO...
994
995
  	 * we assign bio_list to a pointer to the bio_list_on_stack,
  	 * thus initialising the bio_list of new bios to be
27a84d54c   Christoph Hellwig   block: refactor g...
996
  	 * added.  ->make_request() may indeed add some more bios
d89d87965   Neil Brown   When stacked bloc...
997
998
999
  	 * through a recursive call to generic_make_request.  If it
  	 * did, we find a non-NULL value in bio_list and re-enter the loop
  	 * from the top.  In this case we really did just take the bio
bddd87c7e   Akinobu Mita   blk-core: use BIO...
1000
  	 * of the top of the list (no pretending) and so remove it from
27a84d54c   Christoph Hellwig   block: refactor g...
1001
  	 * bio_list, and call into ->make_request() again.
d89d87965   Neil Brown   When stacked bloc...
1002
1003
  	 */
  	BUG_ON(bio->bi_next);
f5fe1b519   NeilBrown   blk: Ensure users...
1004
1005
  	bio_list_init(&bio_list_on_stack[0]);
  	current->bio_list = bio_list_on_stack;
d89d87965   Neil Brown   When stacked bloc...
1006
  	do {
fe2008640   Ming Lei   block: don't prot...
1007
1008
1009
  		struct request_queue *q = bio->bi_disk->queue;
  		blk_mq_req_flags_t flags = bio->bi_opf & REQ_NOWAIT ?
  			BLK_MQ_REQ_NOWAIT : 0;
27a84d54c   Christoph Hellwig   block: refactor g...
1010

fe2008640   Ming Lei   block: don't prot...
1011
  		if (likely(blk_queue_enter(q, flags) == 0)) {
79bd99596   NeilBrown   blk: improve orde...
1012
1013
1014
  			struct bio_list lower, same;
  
  			/* Create a fresh bio_list for all subordinate requests */
f5fe1b519   NeilBrown   blk: Ensure users...
1015
1016
  			bio_list_on_stack[1] = bio_list_on_stack[0];
  			bio_list_init(&bio_list_on_stack[0]);
dece16353   Jens Axboe   block: change ->m...
1017
  			ret = q->make_request_fn(q, bio);
3ef28e83a   Dan Williams   block: generic re...
1018

fe2008640   Ming Lei   block: don't prot...
1019
  			blk_queue_exit(q);
79bd99596   NeilBrown   blk: improve orde...
1020
1021
1022
1023
1024
  			/* sort new bios into those for a lower level
  			 * and those for the same level
  			 */
  			bio_list_init(&lower);
  			bio_list_init(&same);
f5fe1b519   NeilBrown   blk: Ensure users...
1025
  			while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
74d46992e   Christoph Hellwig   block: replace bi...
1026
  				if (q == bio->bi_disk->queue)
79bd99596   NeilBrown   blk: improve orde...
1027
1028
1029
1030
  					bio_list_add(&same, bio);
  				else
  					bio_list_add(&lower, bio);
  			/* now assemble so we handle the lowest level first */
f5fe1b519   NeilBrown   blk: Ensure users...
1031
1032
1033
  			bio_list_merge(&bio_list_on_stack[0], &lower);
  			bio_list_merge(&bio_list_on_stack[0], &same);
  			bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
3ef28e83a   Dan Williams   block: generic re...
1034
  		} else {
03a07c92a   Goldwyn Rodrigues   block: return on ...
1035
1036
1037
1038
1039
  			if (unlikely(!blk_queue_dying(q) &&
  					(bio->bi_opf & REQ_NOWAIT)))
  				bio_wouldblock_error(bio);
  			else
  				bio_io_error(bio);
3ef28e83a   Dan Williams   block: generic re...
1040
  		}
f5fe1b519   NeilBrown   blk: Ensure users...
1041
  		bio = bio_list_pop(&bio_list_on_stack[0]);
d89d87965   Neil Brown   When stacked bloc...
1042
  	} while (bio);
bddd87c7e   Akinobu Mita   blk-core: use BIO...
1043
  	current->bio_list = NULL; /* deactivate */
dece16353   Jens Axboe   block: change ->m...
1044
1045
1046
  
  out:
  	return ret;
d89d87965   Neil Brown   When stacked bloc...
1047
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1048
1049
1050
  EXPORT_SYMBOL(generic_make_request);
  
  /**
f421e1d9a   Christoph Hellwig   block: provide a ...
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
   * direct_make_request - hand a buffer directly to its device driver for I/O
   * @bio:  The bio describing the location in memory and on the device.
   *
   * This function behaves like generic_make_request(), but does not protect
   * against recursion.  Must only be used if the called driver is known
   * to not call generic_make_request (or direct_make_request) again from
   * its make_request function.  (Calling direct_make_request again from
   * a workqueue is perfectly fine as that doesn't recurse).
   */
  blk_qc_t direct_make_request(struct bio *bio)
  {
  	struct request_queue *q = bio->bi_disk->queue;
  	bool nowait = bio->bi_opf & REQ_NOWAIT;
  	blk_qc_t ret;
  
  	if (!generic_make_request_checks(bio))
  		return BLK_QC_T_NONE;
3a0a52997   Bart Van Assche   block, scsi: Make...
1068
  	if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) {
f421e1d9a   Christoph Hellwig   block: provide a ...
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
  		if (nowait && !blk_queue_dying(q))
  			bio->bi_status = BLK_STS_AGAIN;
  		else
  			bio->bi_status = BLK_STS_IOERR;
  		bio_endio(bio);
  		return BLK_QC_T_NONE;
  	}
  
  	ret = q->make_request_fn(q, bio);
  	blk_queue_exit(q);
  	return ret;
  }
  EXPORT_SYMBOL_GPL(direct_make_request);
  
  /**
710027a48   Randy Dunlap   Add some block/ s...
1084
   * submit_bio - submit a bio to the block device layer for I/O
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1085
1086
1087
1088
   * @bio: The &struct bio which describes the I/O
   *
   * submit_bio() is very similar in purpose to generic_make_request(), and
   * uses that function to do most of the work. Both are fairly rough
710027a48   Randy Dunlap   Add some block/ s...
1089
   * interfaces; @bio must be presetup and ready for I/O.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1090
1091
   *
   */
4e49ea4a3   Mike Christie   block/fs/drivers:...
1092
  blk_qc_t submit_bio(struct bio *bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1093
  {
b8e24a930   Johannes Weiner   block: annotate r...
1094
1095
1096
  	bool workingset_read = false;
  	unsigned long pflags;
  	blk_qc_t ret;
d3f77dfdc   Tejun Heo   blkcg: implement ...
1097
1098
  	if (blkcg_punt_bio_submit(bio))
  		return BLK_QC_T_NONE;
bf2de6f5a   Jens Axboe   block: Initial su...
1099
1100
1101
1102
  	/*
  	 * If it's a regular read/write or a barrier with data attached,
  	 * go through the normal accounting stuff before submission.
  	 */
e2a60da74   Martin K. Petersen   block: Clean up s...
1103
  	if (bio_has_data(bio)) {
4363ac7c1   Martin K. Petersen   block: Implement ...
1104
  		unsigned int count;
95fe6c1a2   Mike Christie   block, fs, mm, dr...
1105
  		if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
7c5a0dcf5   Jiufei Xue   block: fix the co...
1106
  			count = queue_logical_block_size(bio->bi_disk->queue) >> 9;
4363ac7c1   Martin K. Petersen   block: Implement ...
1107
1108
  		else
  			count = bio_sectors(bio);
a8ebb056a   Mike Christie   block, drivers, c...
1109
  		if (op_is_write(bio_op(bio))) {
bf2de6f5a   Jens Axboe   block: Initial su...
1110
1111
  			count_vm_events(PGPGOUT, count);
  		} else {
b8e24a930   Johannes Weiner   block: annotate r...
1112
1113
  			if (bio_flagged(bio, BIO_WORKINGSET))
  				workingset_read = true;
4f024f379   Kent Overstreet   block: Abstract o...
1114
  			task_io_account_read(bio->bi_iter.bi_size);
bf2de6f5a   Jens Axboe   block: Initial su...
1115
1116
1117
1118
1119
  			count_vm_events(PGPGIN, count);
  		}
  
  		if (unlikely(block_dump)) {
  			char b[BDEVNAME_SIZE];
8dcbdc742   San Mehat   block: block_dump...
1120
1121
  			printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)
  ",
ba25f9dcc   Pavel Emelyanov   Use helpers to ob...
1122
  			current->comm, task_pid_nr(current),
a8ebb056a   Mike Christie   block, drivers, c...
1123
  				op_is_write(bio_op(bio)) ? "WRITE" : "READ",
4f024f379   Kent Overstreet   block: Abstract o...
1124
  				(unsigned long long)bio->bi_iter.bi_sector,
74d46992e   Christoph Hellwig   block: replace bi...
1125
  				bio_devname(bio, b), count);
bf2de6f5a   Jens Axboe   block: Initial su...
1126
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1127
  	}
b8e24a930   Johannes Weiner   block: annotate r...
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
  	/*
  	 * If we're reading data that is part of the userspace
  	 * workingset, count submission time as memory stall. When the
  	 * device is congested, or the submitting cgroup IO-throttled,
  	 * submission can be a significant part of overall IO time.
  	 */
  	if (workingset_read)
  		psi_memstall_enter(&pflags);
  
  	ret = generic_make_request(bio);
  
  	if (workingset_read)
  		psi_memstall_leave(&pflags);
  
  	return ret;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1143
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1144
  EXPORT_SYMBOL(submit_bio);
1052b8ac5   Jens Axboe   blk-mq: when poll...
1145
  /**
bf4e6b4e7   Hannes Reinecke   block: Always che...
1146
1147
   * blk_cloned_rq_check_limits - Helper function to check a cloned request
   *                              for new the queue limits
82124d603   Kiyoshi Ueda   block: add reques...
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
   * @q:  the queue
   * @rq: the request being checked
   *
   * Description:
   *    @rq may have been made based on weaker limitations of upper-level queues
   *    in request stacking drivers, and it may violate the limitation of @q.
   *    Since the block layer and the underlying device driver trust @rq
   *    after it is inserted to @q, it should be checked against @q before
   *    the insertion using this generic function.
   *
82124d603   Kiyoshi Ueda   block: add reques...
1158
   *    Request stacking drivers like request-based dm may change the queue
bf4e6b4e7   Hannes Reinecke   block: Always che...
1159
1160
   *    limits when retrying requests on other queues. Those requests need
   *    to be checked against the new queue limits again during dispatch.
82124d603   Kiyoshi Ueda   block: add reques...
1161
   */
bf4e6b4e7   Hannes Reinecke   block: Always che...
1162
1163
  static int blk_cloned_rq_check_limits(struct request_queue *q,
  				      struct request *rq)
82124d603   Kiyoshi Ueda   block: add reques...
1164
  {
8fe0d473f   Mike Christie   block: convert me...
1165
  	if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, req_op(rq))) {
61939b12d   John Pittman   block: print offe...
1166
1167
1168
1169
  		printk(KERN_ERR "%s: over max size limit. (%u > %u)
  ",
  			__func__, blk_rq_sectors(rq),
  			blk_queue_get_max_sectors(q, req_op(rq)));
82124d603   Kiyoshi Ueda   block: add reques...
1170
1171
1172
1173
1174
1175
1176
1177
1178
  		return -EIO;
  	}
  
  	/*
  	 * queue's settings related to segment counting like q->bounce_pfn
  	 * may differ from that of other stacking queues.
  	 * Recalculate it to check the request correctly on this queue's
  	 * limitation.
  	 */
e9cd19c0c   Christoph Hellwig   block: simplify b...
1179
  	rq->nr_phys_segments = blk_recalc_rq_segments(rq);
8a78362c4   Martin K. Petersen   block: Consolidat...
1180
  	if (rq->nr_phys_segments > queue_max_segments(q)) {
61939b12d   John Pittman   block: print offe...
1181
1182
1183
  		printk(KERN_ERR "%s: over max segments limit. (%hu > %hu)
  ",
  			__func__, rq->nr_phys_segments, queue_max_segments(q));
82124d603   Kiyoshi Ueda   block: add reques...
1184
1185
1186
1187
1188
  		return -EIO;
  	}
  
  	return 0;
  }
82124d603   Kiyoshi Ueda   block: add reques...
1189
1190
1191
1192
1193
1194
  
  /**
   * blk_insert_cloned_request - Helper for stacking drivers to submit a request
   * @q:  the queue to submit the request
   * @rq: the request being queued
   */
2a842acab   Christoph Hellwig   block: introduce ...
1195
  blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
82124d603   Kiyoshi Ueda   block: add reques...
1196
  {
bf4e6b4e7   Hannes Reinecke   block: Always che...
1197
  	if (blk_cloned_rq_check_limits(q, rq))
2a842acab   Christoph Hellwig   block: introduce ...
1198
  		return BLK_STS_IOERR;
82124d603   Kiyoshi Ueda   block: add reques...
1199

b2c9cd379   Akinobu Mita   fail_make_request...
1200
1201
  	if (rq->rq_disk &&
  	    should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
2a842acab   Christoph Hellwig   block: introduce ...
1202
  		return BLK_STS_IOERR;
82124d603   Kiyoshi Ueda   block: add reques...
1203

a1ce35fa4   Jens Axboe   block: remove dea...
1204
1205
  	if (blk_queue_io_stat(q))
  		blk_account_io_start(rq, true);
82124d603   Kiyoshi Ueda   block: add reques...
1206
1207
  
  	/*
a1ce35fa4   Jens Axboe   block: remove dea...
1208
1209
1210
  	 * Since we have a scheduler attached on the top device,
  	 * bypass a potential scheduler on the bottom device for
  	 * insert.
82124d603   Kiyoshi Ueda   block: add reques...
1211
  	 */
fd9c40f64   Bart Van Assche   block: Revert v5....
1212
  	return blk_mq_request_issue_directly(rq, true);
82124d603   Kiyoshi Ueda   block: add reques...
1213
1214
  }
  EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
80a761fd3   Tejun Heo   block: implement ...
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
  /**
   * blk_rq_err_bytes - determine number of bytes till the next failure boundary
   * @rq: request to examine
   *
   * Description:
   *     A request could be merge of IOs which require different failure
   *     handling.  This function determines the number of bytes which
   *     can be failed from the beginning of the request without
   *     crossing into area which need to be retried further.
   *
   * Return:
   *     The number of bytes to fail.
80a761fd3   Tejun Heo   block: implement ...
1227
1228
1229
1230
1231
1232
   */
  unsigned int blk_rq_err_bytes(const struct request *rq)
  {
  	unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
  	unsigned int bytes = 0;
  	struct bio *bio;
e80640213   Christoph Hellwig   block: split out ...
1233
  	if (!(rq->rq_flags & RQF_MIXED_MERGE))
80a761fd3   Tejun Heo   block: implement ...
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
  		return blk_rq_bytes(rq);
  
  	/*
  	 * Currently the only 'mixing' which can happen is between
  	 * different fastfail types.  We can safely fail portions
  	 * which have all the failfast bits that the first one has -
  	 * the ones which are at least as eager to fail as the first
  	 * one.
  	 */
  	for (bio = rq->bio; bio; bio = bio->bi_next) {
1eff9d322   Jens Axboe   block: rename bio...
1244
  		if ((bio->bi_opf & ff) != ff)
80a761fd3   Tejun Heo   block: implement ...
1245
  			break;
4f024f379   Kent Overstreet   block: Abstract o...
1246
  		bytes += bio->bi_iter.bi_size;
80a761fd3   Tejun Heo   block: implement ...
1247
1248
1249
1250
1251
1252
1253
  	}
  
  	/* this could lead to infinite loop */
  	BUG_ON(blk_rq_bytes(rq) && !bytes);
  	return bytes;
  }
  EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
320ae51fe   Jens Axboe   blk-mq: new multi...
1254
  void blk_account_io_completion(struct request *req, unsigned int bytes)
bc58ba946   Jens Axboe   block: add sysfs ...
1255
  {
c2553b584   Jens Axboe   block: make blk_d...
1256
  	if (blk_do_io_stat(req)) {
ddcf35d39   Michael Callahan   block: Add and us...
1257
  		const int sgrp = op_stat_group(req_op(req));
bc58ba946   Jens Axboe   block: add sysfs ...
1258
  		struct hd_struct *part;
bc58ba946   Jens Axboe   block: add sysfs ...
1259

112f158f6   Mike Snitzer   block: stop passi...
1260
  		part_stat_lock();
09e099d4b   Jerome Marchand   block: fix accoun...
1261
  		part = req->part;
112f158f6   Mike Snitzer   block: stop passi...
1262
  		part_stat_add(part, sectors[sgrp], bytes >> 9);
bc58ba946   Jens Axboe   block: add sysfs ...
1263
1264
1265
  		part_stat_unlock();
  	}
  }
522a77756   Omar Sandoval   block: consolidat...
1266
  void blk_account_io_done(struct request *req, u64 now)
bc58ba946   Jens Axboe   block: add sysfs ...
1267
  {
bc58ba946   Jens Axboe   block: add sysfs ...
1268
  	/*
dd4c133f3   Tejun Heo   block: rename bar...
1269
1270
1271
  	 * Account IO completion.  flush_rq isn't accounted as a
  	 * normal IO on queueing nor completion.  Accounting the
  	 * containing request is enough.
bc58ba946   Jens Axboe   block: add sysfs ...
1272
  	 */
e80640213   Christoph Hellwig   block: split out ...
1273
  	if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
ddcf35d39   Michael Callahan   block: Add and us...
1274
  		const int sgrp = op_stat_group(req_op(req));
bc58ba946   Jens Axboe   block: add sysfs ...
1275
  		struct hd_struct *part;
bc58ba946   Jens Axboe   block: add sysfs ...
1276

112f158f6   Mike Snitzer   block: stop passi...
1277
  		part_stat_lock();
09e099d4b   Jerome Marchand   block: fix accoun...
1278
  		part = req->part;
bc58ba946   Jens Axboe   block: add sysfs ...
1279

5b18b5a73   Mikulas Patocka   block: delete par...
1280
  		update_io_ticks(part, jiffies);
112f158f6   Mike Snitzer   block: stop passi...
1281
1282
  		part_stat_inc(part, ios[sgrp]);
  		part_stat_add(part, nsecs[sgrp], now - req->start_time_ns);
5b18b5a73   Mikulas Patocka   block: delete par...
1283
  		part_stat_add(part, time_in_queue, nsecs_to_jiffies64(now - req->start_time_ns));
ddcf35d39   Michael Callahan   block: Add and us...
1284
  		part_dec_in_flight(req->q, part, rq_data_dir(req));
bc58ba946   Jens Axboe   block: add sysfs ...
1285

6c23a9681   Jens Axboe   block: add intern...
1286
  		hd_struct_put(part);
bc58ba946   Jens Axboe   block: add sysfs ...
1287
1288
1289
  		part_stat_unlock();
  	}
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
1290
1291
1292
1293
  void blk_account_io_start(struct request *rq, bool new_io)
  {
  	struct hd_struct *part;
  	int rw = rq_data_dir(rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
1294
1295
1296
  
  	if (!blk_do_io_stat(rq))
  		return;
112f158f6   Mike Snitzer   block: stop passi...
1297
  	part_stat_lock();
320ae51fe   Jens Axboe   blk-mq: new multi...
1298
1299
1300
  
  	if (!new_io) {
  		part = rq->part;
112f158f6   Mike Snitzer   block: stop passi...
1301
  		part_stat_inc(part, merges[rw]);
320ae51fe   Jens Axboe   blk-mq: new multi...
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
  	} else {
  		part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
  		if (!hd_struct_try_get(part)) {
  			/*
  			 * The partition is already being removed,
  			 * the request will be accounted on the disk only
  			 *
  			 * We take a reference on disk->part0 although that
  			 * partition will never be deleted, so we can treat
  			 * it as any other partition.
  			 */
  			part = &rq->rq_disk->part0;
  			hd_struct_get(part);
  		}
d62e26b3f   Jens Axboe   block: pass in qu...
1316
  		part_inc_in_flight(rq->q, part, rw);
320ae51fe   Jens Axboe   blk-mq: new multi...
1317
1318
  		rq->part = part;
  	}
5b18b5a73   Mikulas Patocka   block: delete par...
1319
  	update_io_ticks(part, jiffies);
320ae51fe   Jens Axboe   blk-mq: new multi...
1320
1321
  	part_stat_unlock();
  }
ef71de8b1   Christoph Hellwig   block: add a blk_...
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
  /*
   * Steal bios from a request and add them to a bio list.
   * The request must not have been partially completed before.
   */
  void blk_steal_bios(struct bio_list *list, struct request *rq)
  {
  	if (rq->bio) {
  		if (list->tail)
  			list->tail->bi_next = rq->bio;
  		else
  			list->head = rq->bio;
  		list->tail = rq->biotail;
  
  		rq->bio = NULL;
  		rq->biotail = NULL;
  	}
  
  	rq->__data_len = 0;
  }
  EXPORT_SYMBOL_GPL(blk_steal_bios);
9934c8c04   Tejun Heo   block: implement ...
1342
  /**
2e60e0229   Tejun Heo   block: clean up r...
1343
   * blk_update_request - Special helper function for request stacking drivers
8ebf97560   Randy Dunlap   block: fix kernel...
1344
   * @req:      the request being processed
2a842acab   Christoph Hellwig   block: introduce ...
1345
   * @error:    block status code
8ebf97560   Randy Dunlap   block: fix kernel...
1346
   * @nr_bytes: number of bytes to complete @req
3bcddeac1   Kiyoshi Ueda   blk_end_request: ...
1347
1348
   *
   * Description:
8ebf97560   Randy Dunlap   block: fix kernel...
1349
1350
1351
   *     Ends I/O on a number of bytes attached to @req, but doesn't complete
   *     the request structure even if @req doesn't have leftover.
   *     If @req has leftover, sets it up for the next range of segments.
2e60e0229   Tejun Heo   block: clean up r...
1352
1353
1354
   *
   *     This special helper function is only for request stacking drivers
   *     (e.g. request-based dm) so that they can handle partial completion.
3a211b715   Pavel Begunkov   blk-core: Remove ...
1355
   *     Actual device drivers should use blk_mq_end_request instead.
2e60e0229   Tejun Heo   block: clean up r...
1356
1357
1358
   *
   *     Passing the result of blk_rq_bytes() as @nr_bytes guarantees
   *     %false return from this function.
3bcddeac1   Kiyoshi Ueda   blk_end_request: ...
1359
   *
1954e9a99   Bart Van Assche   block: Document h...
1360
1361
1362
1363
   * Note:
   *	The RQF_SPECIAL_PAYLOAD flag is ignored on purpose in both
   *	blk_rq_bytes() and in blk_update_request().
   *
3bcddeac1   Kiyoshi Ueda   blk_end_request: ...
1364
   * Return:
2e60e0229   Tejun Heo   block: clean up r...
1365
1366
   *     %false - this request doesn't have any more data
   *     %true  - this request has more data
3bcddeac1   Kiyoshi Ueda   blk_end_request: ...
1367
   **/
2a842acab   Christoph Hellwig   block: introduce ...
1368
1369
  bool blk_update_request(struct request *req, blk_status_t error,
  		unsigned int nr_bytes)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1370
  {
f79ea4161   Kent Overstreet   block: Refactor b...
1371
  	int total_bytes;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1372

2a842acab   Christoph Hellwig   block: introduce ...
1373
  	trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes);
4a0efdc93   Hannes Reinecke   block: misplaced ...
1374

2e60e0229   Tejun Heo   block: clean up r...
1375
1376
  	if (!req->bio)
  		return false;
54d4e6ab9   Max Gurtovoy   block: centralize...
1377
1378
1379
1380
1381
  #ifdef CONFIG_BLK_DEV_INTEGRITY
  	if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
  	    error == BLK_STS_OK)
  		req->q->integrity.profile->complete_fn(req, nr_bytes);
  #endif
2a842acab   Christoph Hellwig   block: introduce ...
1382
1383
  	if (unlikely(error && !blk_rq_is_passthrough(req) &&
  		     !(req->rq_flags & RQF_QUIET)))
178cc590e   Christoph Hellwig   block: improve pr...
1384
  		print_req_error(req, error, __func__);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1385

bc58ba946   Jens Axboe   block: add sysfs ...
1386
  	blk_account_io_completion(req, nr_bytes);
d72d904a5   Jens Axboe   [BLOCK] Update re...
1387

f79ea4161   Kent Overstreet   block: Refactor b...
1388
1389
1390
  	total_bytes = 0;
  	while (req->bio) {
  		struct bio *bio = req->bio;
4f024f379   Kent Overstreet   block: Abstract o...
1391
  		unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1392

9c24c10a2   Bart Van Assche   Revert "block: Ad...
1393
  		if (bio_bytes == bio->bi_iter.bi_size)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1394
  			req->bio = bio->bi_next;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1395

fbbaf700e   NeilBrown   block: trace comp...
1396
1397
  		/* Completion has already been traced */
  		bio_clear_flag(bio, BIO_TRACE_COMPLETION);
f79ea4161   Kent Overstreet   block: Refactor b...
1398
  		req_bio_endio(req, bio, bio_bytes, error);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1399

f79ea4161   Kent Overstreet   block: Refactor b...
1400
1401
  		total_bytes += bio_bytes;
  		nr_bytes -= bio_bytes;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1402

f79ea4161   Kent Overstreet   block: Refactor b...
1403
1404
  		if (!nr_bytes)
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1405
1406
1407
1408
1409
  	}
  
  	/*
  	 * completely done
  	 */
2e60e0229   Tejun Heo   block: clean up r...
1410
1411
1412
1413
1414
1415
  	if (!req->bio) {
  		/*
  		 * Reset counters so that the request stacking driver
  		 * can find how many bytes remain in the request
  		 * later.
  		 */
a2dec7b36   Tejun Heo   block: hide reque...
1416
  		req->__data_len = 0;
2e60e0229   Tejun Heo   block: clean up r...
1417
1418
  		return false;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1419

a2dec7b36   Tejun Heo   block: hide reque...
1420
  	req->__data_len -= total_bytes;
2e46e8b27   Tejun Heo   block: drop reque...
1421
1422
  
  	/* update sector only for requests with clear definition of sector */
57292b58d   Christoph Hellwig   block: introduce ...
1423
  	if (!blk_rq_is_passthrough(req))
a2dec7b36   Tejun Heo   block: hide reque...
1424
  		req->__sector += total_bytes >> 9;
2e46e8b27   Tejun Heo   block: drop reque...
1425

80a761fd3   Tejun Heo   block: implement ...
1426
  	/* mixed attributes always follow the first bio */
e80640213   Christoph Hellwig   block: split out ...
1427
  	if (req->rq_flags & RQF_MIXED_MERGE) {
80a761fd3   Tejun Heo   block: implement ...
1428
  		req->cmd_flags &= ~REQ_FAILFAST_MASK;
1eff9d322   Jens Axboe   block: rename bio...
1429
  		req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK;
80a761fd3   Tejun Heo   block: implement ...
1430
  	}
ed6565e73   Christoph Hellwig   block: handle par...
1431
1432
1433
1434
1435
1436
1437
1438
1439
  	if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) {
  		/*
  		 * If total number of sectors is less than the first segment
  		 * size, something has gone terribly wrong.
  		 */
  		if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
  			blk_dump_rq_flags(req, "request botched");
  			req->__data_len = blk_rq_cur_bytes(req);
  		}
2e46e8b27   Tejun Heo   block: drop reque...
1440

ed6565e73   Christoph Hellwig   block: handle par...
1441
  		/* recalculate the number of segments */
e9cd19c0c   Christoph Hellwig   block: simplify b...
1442
  		req->nr_phys_segments = blk_recalc_rq_segments(req);
ed6565e73   Christoph Hellwig   block: handle par...
1443
  	}
2e46e8b27   Tejun Heo   block: drop reque...
1444

2e60e0229   Tejun Heo   block: clean up r...
1445
  	return true;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1446
  }
2e60e0229   Tejun Heo   block: clean up r...
1447
  EXPORT_SYMBOL_GPL(blk_update_request);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1448

2d4dc890b   Ilya Loginov   block: add helper...
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
  #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
  /**
   * rq_flush_dcache_pages - Helper function to flush all pages in a request
   * @rq: the request to be flushed
   *
   * Description:
   *     Flush all pages in @rq.
   */
  void rq_flush_dcache_pages(struct request *rq)
  {
  	struct req_iterator iter;
7988613b0   Kent Overstreet   block: Convert bi...
1460
  	struct bio_vec bvec;
2d4dc890b   Ilya Loginov   block: add helper...
1461
1462
  
  	rq_for_each_segment(bvec, rq, iter)
7988613b0   Kent Overstreet   block: Convert bi...
1463
  		flush_dcache_page(bvec.bv_page);
2d4dc890b   Ilya Loginov   block: add helper...
1464
1465
1466
  }
  EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
  #endif
ef9e3facd   Kiyoshi Ueda   block: add lld bu...
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
  /**
   * blk_lld_busy - Check if underlying low-level drivers of a device are busy
   * @q : the queue of the device being checked
   *
   * Description:
   *    Check if underlying low-level drivers of a device are busy.
   *    If the drivers want to export their busy state, they must set own
   *    exporting function using blk_queue_lld_busy() first.
   *
   *    Basically, this function is used only by request stacking drivers
   *    to stop dispatching requests to underlying devices when underlying
   *    devices are busy.  This behavior helps more I/O merging on the queue
   *    of the request stacking driver and prevents I/O throughput regression
   *    on burst I/O load.
   *
   * Return:
   *    0 - Not busy (The request stacking driver should dispatch request)
   *    1 - Busy (The request stacking driver should stop dispatching request)
   */
  int blk_lld_busy(struct request_queue *q)
  {
344e9ffcb   Jens Axboe   block: add queue_...
1488
  	if (queue_is_mq(q) && q->mq_ops->busy)
9ba20527f   Jens Axboe   blk-mq: provide m...
1489
  		return q->mq_ops->busy(q);
ef9e3facd   Kiyoshi Ueda   block: add lld bu...
1490
1491
1492
1493
  
  	return 0;
  }
  EXPORT_SYMBOL_GPL(blk_lld_busy);
78d8e58a0   Mike Snitzer   Revert "block, dm...
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
  /**
   * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
   * @rq: the clone request to be cleaned up
   *
   * Description:
   *     Free all bios in @rq for a cloned request.
   */
  void blk_rq_unprep_clone(struct request *rq)
  {
  	struct bio *bio;
  
  	while ((bio = rq->bio) != NULL) {
  		rq->bio = bio->bi_next;
  
  		bio_put(bio);
  	}
  }
  EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
  
  /*
   * Copy attributes of the original request to the clone request.
   * The actual data parts (e.g. ->cmd, ->sense) are not copied.
   */
  static void __blk_rq_prep_clone(struct request *dst, struct request *src)
b0fd271d5   Kiyoshi Ueda   block: add reques...
1518
  {
b0fd271d5   Kiyoshi Ueda   block: add reques...
1519
1520
  	dst->__sector = blk_rq_pos(src);
  	dst->__data_len = blk_rq_bytes(src);
297ba57dc   Bart Van Assche   block: Fix clonin...
1521
1522
1523
1524
  	if (src->rq_flags & RQF_SPECIAL_PAYLOAD) {
  		dst->rq_flags |= RQF_SPECIAL_PAYLOAD;
  		dst->special_vec = src->special_vec;
  	}
b0fd271d5   Kiyoshi Ueda   block: add reques...
1525
1526
1527
  	dst->nr_phys_segments = src->nr_phys_segments;
  	dst->ioprio = src->ioprio;
  	dst->extra_len = src->extra_len;
78d8e58a0   Mike Snitzer   Revert "block, dm...
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
  }
  
  /**
   * blk_rq_prep_clone - Helper function to setup clone request
   * @rq: the request to be setup
   * @rq_src: original request to be cloned
   * @bs: bio_set that bios for clone are allocated from
   * @gfp_mask: memory allocation mask for bio
   * @bio_ctr: setup function to be called for each clone bio.
   *           Returns %0 for success, non %0 for failure.
   * @data: private data to be passed to @bio_ctr
   *
   * Description:
   *     Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
   *     The actual data parts of @rq_src (e.g. ->cmd, ->sense)
   *     are not copied, and copying such parts is the caller's responsibility.
   *     Also, pages which the original bios are pointing to are not copied
   *     and the cloned bios just point same pages.
   *     So cloned bios must be completed before original bios, which means
   *     the caller must complete @rq before @rq_src.
   */
  int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
  		      struct bio_set *bs, gfp_t gfp_mask,
  		      int (*bio_ctr)(struct bio *, struct bio *, void *),
  		      void *data)
  {
  	struct bio *bio, *bio_src;
  
  	if (!bs)
f4f8154a0   Kent Overstreet   block: Use bioset...
1557
  		bs = &fs_bio_set;
78d8e58a0   Mike Snitzer   Revert "block, dm...
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
  
  	__rq_for_each_bio(bio_src, rq_src) {
  		bio = bio_clone_fast(bio_src, gfp_mask, bs);
  		if (!bio)
  			goto free_and_out;
  
  		if (bio_ctr && bio_ctr(bio, bio_src, data))
  			goto free_and_out;
  
  		if (rq->bio) {
  			rq->biotail->bi_next = bio;
  			rq->biotail = bio;
  		} else
  			rq->bio = rq->biotail = bio;
  	}
  
  	__blk_rq_prep_clone(rq, rq_src);
  
  	return 0;
  
  free_and_out:
  	if (bio)
  		bio_put(bio);
  	blk_rq_unprep_clone(rq);
  
  	return -ENOMEM;
b0fd271d5   Kiyoshi Ueda   block: add reques...
1584
1585
  }
  EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
59c3d45e4   Jens Axboe   block: remove 'q'...
1586
  int kblockd_schedule_work(struct work_struct *work)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1587
1588
1589
  {
  	return queue_work(kblockd_workqueue, work);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1590
  EXPORT_SYMBOL(kblockd_schedule_work);
ee63cfa7f   Jens Axboe   block: add kblock...
1591
1592
1593
1594
1595
  int kblockd_schedule_work_on(int cpu, struct work_struct *work)
  {
  	return queue_work_on(cpu, kblockd_workqueue, work);
  }
  EXPORT_SYMBOL(kblockd_schedule_work_on);
818cd1cba   Jens Axboe   block: add kblock...
1596
1597
1598
1599
1600
1601
  int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork,
  				unsigned long delay)
  {
  	return mod_delayed_work_on(cpu, kblockd_workqueue, dwork, delay);
  }
  EXPORT_SYMBOL(kblockd_mod_delayed_work_on);
75df71362   Suresh Jayaraman   block: document b...
1602
1603
1604
1605
1606
  /**
   * blk_start_plug - initialize blk_plug and track it inside the task_struct
   * @plug:	The &struct blk_plug that needs to be initialized
   *
   * Description:
40405851a   Jeff Moyer   block: clarify do...
1607
1608
1609
1610
1611
1612
1613
1614
1615
   *   blk_start_plug() indicates to the block layer an intent by the caller
   *   to submit multiple I/O requests in a batch.  The block layer may use
   *   this hint to defer submitting I/Os from the caller until blk_finish_plug()
   *   is called.  However, the block layer may choose to submit requests
   *   before a call to blk_finish_plug() if the number of queued I/Os
   *   exceeds %BLK_MAX_REQUEST_COUNT, or if the size of the I/O is larger than
   *   %BLK_PLUG_FLUSH_SIZE.  The queued I/Os may also be submitted early if
   *   the task schedules (see below).
   *
75df71362   Suresh Jayaraman   block: document b...
1616
1617
1618
1619
1620
1621
1622
1623
1624
   *   Tracking blk_plug inside the task_struct will help with auto-flushing the
   *   pending I/O should the task end up blocking between blk_start_plug() and
   *   blk_finish_plug(). This is important from a performance perspective, but
   *   also ensures that we don't deadlock. For instance, if the task is blocking
   *   for a memory allocation, memory reclaim could end up wanting to free a
   *   page belonging to that request that is currently residing in our private
   *   plug. By flushing the pending I/O when the process goes to sleep, we avoid
   *   this kind of deadlock.
   */
73c101011   Jens Axboe   block: initial pa...
1625
1626
1627
  void blk_start_plug(struct blk_plug *plug)
  {
  	struct task_struct *tsk = current;
dd6cf3e18   Shaohua Li   blk: clean up plug
1628
1629
1630
1631
1632
  	/*
  	 * If this is a nested plug, don't actually assign it.
  	 */
  	if (tsk->plug)
  		return;
320ae51fe   Jens Axboe   blk-mq: new multi...
1633
  	INIT_LIST_HEAD(&plug->mq_list);
048c9374a   NeilBrown   block: Enhance ne...
1634
  	INIT_LIST_HEAD(&plug->cb_list);
5f0ed774e   Jens Axboe   block: sum reques...
1635
  	plug->rq_count = 0;
ce5b009cf   Jens Axboe   block: improve lo...
1636
  	plug->multiple_queues = false;
5f0ed774e   Jens Axboe   block: sum reques...
1637

73c101011   Jens Axboe   block: initial pa...
1638
  	/*
dd6cf3e18   Shaohua Li   blk: clean up plug
1639
1640
  	 * Store ordering should not be needed here, since a potential
  	 * preempt will imply a full memory barrier
73c101011   Jens Axboe   block: initial pa...
1641
  	 */
dd6cf3e18   Shaohua Li   blk: clean up plug
1642
  	tsk->plug = plug;
73c101011   Jens Axboe   block: initial pa...
1643
1644
  }
  EXPORT_SYMBOL(blk_start_plug);
74018dc30   NeilBrown   blk: pass from_sc...
1645
  static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
048c9374a   NeilBrown   block: Enhance ne...
1646
1647
  {
  	LIST_HEAD(callbacks);
2a7d5559b   Shaohua Li   block: stack unplug
1648
1649
  	while (!list_empty(&plug->cb_list)) {
  		list_splice_init(&plug->cb_list, &callbacks);
048c9374a   NeilBrown   block: Enhance ne...
1650

2a7d5559b   Shaohua Li   block: stack unplug
1651
1652
  		while (!list_empty(&callbacks)) {
  			struct blk_plug_cb *cb = list_first_entry(&callbacks,
048c9374a   NeilBrown   block: Enhance ne...
1653
1654
  							  struct blk_plug_cb,
  							  list);
2a7d5559b   Shaohua Li   block: stack unplug
1655
  			list_del(&cb->list);
74018dc30   NeilBrown   blk: pass from_sc...
1656
  			cb->callback(cb, from_schedule);
2a7d5559b   Shaohua Li   block: stack unplug
1657
  		}
048c9374a   NeilBrown   block: Enhance ne...
1658
1659
  	}
  }
9cbb17508   NeilBrown   blk: centralize n...
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
  struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data,
  				      int size)
  {
  	struct blk_plug *plug = current->plug;
  	struct blk_plug_cb *cb;
  
  	if (!plug)
  		return NULL;
  
  	list_for_each_entry(cb, &plug->cb_list, list)
  		if (cb->callback == unplug && cb->data == data)
  			return cb;
  
  	/* Not currently on the callback list */
  	BUG_ON(size < sizeof(*cb));
  	cb = kzalloc(size, GFP_ATOMIC);
  	if (cb) {
  		cb->data = data;
  		cb->callback = unplug;
  		list_add(&cb->list, &plug->cb_list);
  	}
  	return cb;
  }
  EXPORT_SYMBOL(blk_check_plugged);
49cac01e1   Jens Axboe   block: make unplu...
1684
  void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
73c101011   Jens Axboe   block: initial pa...
1685
  {
74018dc30   NeilBrown   blk: pass from_sc...
1686
  	flush_plug_callbacks(plug, from_schedule);
320ae51fe   Jens Axboe   blk-mq: new multi...
1687
1688
1689
  
  	if (!list_empty(&plug->mq_list))
  		blk_mq_flush_plug_list(plug, from_schedule);
73c101011   Jens Axboe   block: initial pa...
1690
  }
73c101011   Jens Axboe   block: initial pa...
1691

40405851a   Jeff Moyer   block: clarify do...
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
  /**
   * blk_finish_plug - mark the end of a batch of submitted I/O
   * @plug:	The &struct blk_plug passed to blk_start_plug()
   *
   * Description:
   * Indicate that a batch of I/O submissions is complete.  This function
   * must be paired with an initial call to blk_start_plug().  The intent
   * is to allow the block layer to optimize I/O submission.  See the
   * documentation for blk_start_plug() for more information.
   */
73c101011   Jens Axboe   block: initial pa...
1702
1703
  void blk_finish_plug(struct blk_plug *plug)
  {
dd6cf3e18   Shaohua Li   blk: clean up plug
1704
1705
  	if (plug != current->plug)
  		return;
f6603783f   Jens Axboe   block: only force...
1706
  	blk_flush_plug_list(plug, false);
73c101011   Jens Axboe   block: initial pa...
1707

dd6cf3e18   Shaohua Li   blk: clean up plug
1708
  	current->plug = NULL;
73c101011   Jens Axboe   block: initial pa...
1709
  }
88b996cd0   Christoph Hellwig   block: cleanup th...
1710
  EXPORT_SYMBOL(blk_finish_plug);
73c101011   Jens Axboe   block: initial pa...
1711

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1712
1713
  int __init blk_dev_init(void)
  {
ef295ecf0   Christoph Hellwig   block: better op ...
1714
1715
  	BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
  	BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
0762b23d2   Maninder Singh   block: use FIELD_...
1716
  			FIELD_SIZEOF(struct request, cmd_flags));
ef295ecf0   Christoph Hellwig   block: better op ...
1717
1718
  	BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
  			FIELD_SIZEOF(struct bio, bi_opf));
9eb55b030   Nikanth Karthikesan   block: catch tryi...
1719

89b90be2d   Tejun Heo   block: make kbloc...
1720
1721
  	/* used for unplugging and affects IO latency/throughput - HIGHPRI */
  	kblockd_workqueue = alloc_workqueue("kblockd",
28747fcd2   Matias Bjørling   block: remove WQ_...
1722
  					    WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1723
1724
1725
  	if (!kblockd_workqueue)
  		panic("Failed to create kblockd
  ");
c2789bd40   Ilya Dryomov   block: rename req...
1726
  	blk_requestq_cachep = kmem_cache_create("request_queue",
165125e1e   Jens Axboe   [BLOCK] Get rid o...
1727
  			sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1728

18fbda91c   Omar Sandoval   block: use same b...
1729
1730
1731
  #ifdef CONFIG_DEBUG_FS
  	blk_debugfs_root = debugfs_create_dir("block", NULL);
  #endif
d38ecf935   Jens Axboe   io context sharin...
1732
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1733
  }