Blame view

block/blk-core.c 52.9 KB
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
2
3
4
5
   * Copyright (C) 1991, 1992 Linus Torvalds
   * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
   * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
   * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
6728cb0e6   Jens Axboe   block: make core ...
6
7
   * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au>
   *	-  July2000
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
8
9
10
11
12
13
   * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001
   */
  
  /*
   * This handles all read/write requests to block devices
   */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
14
15
16
17
18
19
20
21
22
23
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/backing-dev.h>
  #include <linux/bio.h>
  #include <linux/blkdev.h>
  #include <linux/highmem.h>
  #include <linux/mm.h>
  #include <linux/kernel_stat.h>
  #include <linux/string.h>
  #include <linux/init.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
24
25
26
27
  #include <linux/completion.h>
  #include <linux/slab.h>
  #include <linux/swap.h>
  #include <linux/writeback.h>
faccbd4b2   Andrew Morton   [PATCH] io-accoun...
28
  #include <linux/task_io_accounting_ops.h>
ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
29
30
  #include <linux/interrupt.h>
  #include <linux/cpu.h>
2056a782f   Jens Axboe   [PATCH] Block que...
31
  #include <linux/blktrace_api.h>
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
32
  #include <linux/fault-inject.h>
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
33

8324aa91d   Jens Axboe   block: split tag ...
34
  #include "blk.h"
165125e1e   Jens Axboe   [BLOCK] Get rid o...
35
  static int __make_request(struct request_queue *q, struct bio *bio);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
36
37
38
39
  
  /*
   * For the allocated request tables
   */
8324aa91d   Jens Axboe   block: split tag ...
40
  struct kmem_cache *request_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
41
42
43
44
  
  /*
   * For queue allocation
   */
6728cb0e6   Jens Axboe   block: make core ...
45
  struct kmem_cache *blk_requestq_cachep;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
46
47
  
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
48
49
   * Controlling structure to kblockd
   */
ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
50
  static struct workqueue_struct *kblockd_workqueue;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
51

ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
52
  static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
26b8256e2   Jens Axboe   block: get rid of...
53
54
55
56
57
58
59
60
61
62
63
64
65
66
  static void drive_stat_acct(struct request *rq, int new_io)
  {
  	int rw = rq_data_dir(rq);
  
  	if (!blk_fs_request(rq) || !rq->rq_disk)
  		return;
  
  	if (!new_io) {
  		__disk_stat_inc(rq->rq_disk, merges[rw]);
  	} else {
  		disk_round_stats(rq->rq_disk);
  		rq->rq_disk->in_flight++;
  	}
  }
8324aa91d   Jens Axboe   block: split tag ...
67
  void blk_queue_congestion_threshold(struct request_queue *q)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
68
69
70
71
72
73
74
75
76
77
78
79
80
  {
  	int nr;
  
  	nr = q->nr_requests - (q->nr_requests / 8) + 1;
  	if (nr > q->nr_requests)
  		nr = q->nr_requests;
  	q->nr_congestion_on = nr;
  
  	nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
  	if (nr < 1)
  		nr = 1;
  	q->nr_congestion_off = nr;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
81
82
83
84
85
86
87
88
89
90
91
92
  /**
   * blk_get_backing_dev_info - get the address of a queue's backing_dev_info
   * @bdev:	device
   *
   * Locates the passed device's request queue and returns the address of its
   * backing_dev_info
   *
   * Will return NULL if the request queue cannot be located.
   */
  struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
  {
  	struct backing_dev_info *ret = NULL;
165125e1e   Jens Axboe   [BLOCK] Get rid o...
93
  	struct request_queue *q = bdev_get_queue(bdev);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
94
95
96
97
98
  
  	if (q)
  		ret = &q->backing_dev_info;
  	return ret;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
99
  EXPORT_SYMBOL(blk_get_backing_dev_info);
86db1e297   Jens Axboe   block: continue l...
100
  void rq_init(struct request_queue *q, struct request *rq)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
101
102
  {
  	INIT_LIST_HEAD(&rq->queuelist);
ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
103
  	INIT_LIST_HEAD(&rq->donelist);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
104
105
  
  	rq->errors = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
106
  	rq->bio = rq->biotail = NULL;
2e662b65f   Jens Axboe   [PATCH] elevator:...
107
108
  	INIT_HLIST_NODE(&rq->hash);
  	RB_CLEAR_NODE(&rq->rb_node);
22e2c507c   Jens Axboe   [PATCH] Update cf...
109
  	rq->ioprio = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
110
111
112
  	rq->buffer = NULL;
  	rq->ref_count = 1;
  	rq->q = q;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
113
114
115
  	rq->special = NULL;
  	rq->data_len = 0;
  	rq->data = NULL;
df46b9a44   Mike Christie   [PATCH] Add blk_r...
116
  	rq->nr_phys_segments = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
117
118
119
  	rq->sense = NULL;
  	rq->end_io = NULL;
  	rq->end_io_data = NULL;
ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
120
  	rq->completion_data = NULL;
abae1fde6   FUJITA Tomonori   add a struct requ...
121
  	rq->next_rq = NULL;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
122
  }
5bb23a688   NeilBrown   Don't decrement b...
123
124
  static void req_bio_endio(struct request *rq, struct bio *bio,
  			  unsigned int nbytes, int error)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
125
  {
165125e1e   Jens Axboe   [BLOCK] Get rid o...
126
  	struct request_queue *q = rq->q;
797e7dbbe   Tejun Heo   [BLOCK] reimpleme...
127

5bb23a688   NeilBrown   Don't decrement b...
128
129
130
131
132
  	if (&q->bar_rq != rq) {
  		if (error)
  			clear_bit(BIO_UPTODATE, &bio->bi_flags);
  		else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
  			error = -EIO;
797e7dbbe   Tejun Heo   [BLOCK] reimpleme...
133

5bb23a688   NeilBrown   Don't decrement b...
134
  		if (unlikely(nbytes > bio->bi_size)) {
6728cb0e6   Jens Axboe   block: make core ...
135
136
  			printk(KERN_ERR "%s: want %u bytes done, %u left
  ",
5bb23a688   NeilBrown   Don't decrement b...
137
138
139
  			       __FUNCTION__, nbytes, bio->bi_size);
  			nbytes = bio->bi_size;
  		}
797e7dbbe   Tejun Heo   [BLOCK] reimpleme...
140

5bb23a688   NeilBrown   Don't decrement b...
141
142
143
  		bio->bi_size -= nbytes;
  		bio->bi_sector += (nbytes >> 9);
  		if (bio->bi_size == 0)
6712ecf8f   NeilBrown   Drop 'size' argum...
144
  			bio_endio(bio, error);
5bb23a688   NeilBrown   Don't decrement b...
145
146
147
148
149
150
151
152
153
  	} else {
  
  		/*
  		 * Okay, this is the barrier request in progress, just
  		 * record the error;
  		 */
  		if (error && !q->orderr)
  			q->orderr = error;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
154
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
155

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
156
157
158
  void blk_dump_rq_flags(struct request *rq, char *msg)
  {
  	int bit;
6728cb0e6   Jens Axboe   block: make core ...
159
160
  	printk(KERN_INFO "%s: dev %s: type=%x, flags=%x
  ", msg,
4aff5e233   Jens Axboe   [PATCH] Split str...
161
162
  		rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
  		rq->cmd_flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
163

6728cb0e6   Jens Axboe   block: make core ...
164
165
166
167
168
169
170
171
172
173
  	printk(KERN_INFO "  sector %llu, nr/cnr %lu/%u
  ",
  						(unsigned long long)rq->sector,
  						rq->nr_sectors,
  						rq->current_nr_sectors);
  	printk(KERN_INFO "  bio %p, biotail %p, buffer %p, data %p, len %u
  ",
  						rq->bio, rq->biotail,
  						rq->buffer, rq->data,
  						rq->data_len);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
174

4aff5e233   Jens Axboe   [PATCH] Split str...
175
  	if (blk_pc_request(rq)) {
6728cb0e6   Jens Axboe   block: make core ...
176
  		printk(KERN_INFO "  cdb: ");
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
177
178
179
180
181
182
  		for (bit = 0; bit < sizeof(rq->cmd); bit++)
  			printk("%02x ", rq->cmd[bit]);
  		printk("
  ");
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
183
  EXPORT_SYMBOL(blk_dump_rq_flags);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
184
185
186
187
188
189
190
191
  /*
   * "plug" the device if there are no outstanding requests: this will
   * force the transfer to start only after we have put all the requests
   * on the list.
   *
   * This is called with interrupts off and no requests on the queue and
   * with the queue lock held.
   */
165125e1e   Jens Axboe   [BLOCK] Get rid o...
192
  void blk_plug_device(struct request_queue *q)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
193
194
195
196
197
198
199
  {
  	WARN_ON(!irqs_disabled());
  
  	/*
  	 * don't plug a stopped queue, it must be paired with blk_start_queue()
  	 * which will restart the queueing
  	 */
7daac4902   Coywolf Qi Hunt   [patch] cleanup: ...
200
  	if (blk_queue_stopped(q))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
201
  		return;
2056a782f   Jens Axboe   [PATCH] Block que...
202
  	if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
203
  		mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
2056a782f   Jens Axboe   [PATCH] Block que...
204
205
  		blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
206
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
207
208
209
210
211
212
  EXPORT_SYMBOL(blk_plug_device);
  
  /*
   * remove the queue from the plugged list, if present. called with
   * queue lock held and interrupts disabled.
   */
165125e1e   Jens Axboe   [BLOCK] Get rid o...
213
  int blk_remove_plug(struct request_queue *q)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
214
215
216
217
218
219
220
221
222
  {
  	WARN_ON(!irqs_disabled());
  
  	if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
  		return 0;
  
  	del_timer(&q->unplug_timer);
  	return 1;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
223
224
225
226
227
  EXPORT_SYMBOL(blk_remove_plug);
  
  /*
   * remove the plug and let it rip..
   */
165125e1e   Jens Axboe   [BLOCK] Get rid o...
228
  void __generic_unplug_device(struct request_queue *q)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
229
  {
7daac4902   Coywolf Qi Hunt   [patch] cleanup: ...
230
  	if (unlikely(blk_queue_stopped(q)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
231
232
233
234
  		return;
  
  	if (!blk_remove_plug(q))
  		return;
22e2c507c   Jens Axboe   [PATCH] Update cf...
235
  	q->request_fn(q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
236
237
238
239
240
  }
  EXPORT_SYMBOL(__generic_unplug_device);
  
  /**
   * generic_unplug_device - fire a request queue
165125e1e   Jens Axboe   [BLOCK] Get rid o...
241
   * @q:    The &struct request_queue in question
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
242
243
244
245
246
247
248
249
   *
   * Description:
   *   Linux uses plugging to build bigger requests queues before letting
   *   the device have at them. If a queue is plugged, the I/O scheduler
   *   is still adding and merging requests on the queue. Once the queue
   *   gets unplugged, the request_fn defined for the queue is invoked and
   *   transfers started.
   **/
165125e1e   Jens Axboe   [BLOCK] Get rid o...
250
  void generic_unplug_device(struct request_queue *q)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
251
252
253
254
255
256
257
258
259
260
  {
  	spin_lock_irq(q->queue_lock);
  	__generic_unplug_device(q);
  	spin_unlock_irq(q->queue_lock);
  }
  EXPORT_SYMBOL(generic_unplug_device);
  
  static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
  				   struct page *page)
  {
165125e1e   Jens Axboe   [BLOCK] Get rid o...
261
  	struct request_queue *q = bdi->unplug_io_data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
262

2ad8b1ef1   Alan D. Brunelle   Add UNPLUG traces...
263
  	blk_unplug(q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
264
  }
86db1e297   Jens Axboe   block: continue l...
265
  void blk_unplug_work(struct work_struct *work)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
266
  {
165125e1e   Jens Axboe   [BLOCK] Get rid o...
267
268
  	struct request_queue *q =
  		container_of(work, struct request_queue, unplug_work);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
269

2056a782f   Jens Axboe   [PATCH] Block que...
270
271
  	blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
  				q->rq.count[READ] + q->rq.count[WRITE]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
272
273
  	q->unplug_fn(q);
  }
86db1e297   Jens Axboe   block: continue l...
274
  void blk_unplug_timeout(unsigned long data)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
275
  {
165125e1e   Jens Axboe   [BLOCK] Get rid o...
276
  	struct request_queue *q = (struct request_queue *)data;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
277

2056a782f   Jens Axboe   [PATCH] Block que...
278
279
  	blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
  				q->rq.count[READ] + q->rq.count[WRITE]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
280
281
  	kblockd_schedule_work(&q->unplug_work);
  }
2ad8b1ef1   Alan D. Brunelle   Add UNPLUG traces...
282
283
284
285
286
287
288
289
290
291
292
293
294
  void blk_unplug(struct request_queue *q)
  {
  	/*
  	 * devices don't necessarily have an ->unplug_fn defined
  	 */
  	if (q->unplug_fn) {
  		blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
  					q->rq.count[READ] + q->rq.count[WRITE]);
  
  		q->unplug_fn(q);
  	}
  }
  EXPORT_SYMBOL(blk_unplug);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
295
296
  /**
   * blk_start_queue - restart a previously stopped queue
165125e1e   Jens Axboe   [BLOCK] Get rid o...
297
   * @q:    The &struct request_queue in question
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
298
299
300
301
302
303
   *
   * Description:
   *   blk_start_queue() will clear the stop flag on the queue, and call
   *   the request_fn for the queue if it was in a stopped state when
   *   entered. Also see blk_stop_queue(). Queue lock must be held.
   **/
165125e1e   Jens Axboe   [BLOCK] Get rid o...
304
  void blk_start_queue(struct request_queue *q)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
305
  {
a038e2536   Paolo 'Blaisorblade' Giarrusso   [PATCH] blk_start...
306
  	WARN_ON(!irqs_disabled());
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
307
308
309
310
311
312
313
314
315
316
317
318
319
320
  	clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
  
  	/*
  	 * one level of recursion is ok and is much faster than kicking
  	 * the unplug handling
  	 */
  	if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
  		q->request_fn(q);
  		clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
  	} else {
  		blk_plug_device(q);
  		kblockd_schedule_work(&q->unplug_work);
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
321
322
323
324
  EXPORT_SYMBOL(blk_start_queue);
  
  /**
   * blk_stop_queue - stop a queue
165125e1e   Jens Axboe   [BLOCK] Get rid o...
325
   * @q:    The &struct request_queue in question
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
326
327
328
329
330
331
332
333
334
335
336
   *
   * Description:
   *   The Linux block layer assumes that a block driver will consume all
   *   entries on the request queue when the request_fn strategy is called.
   *   Often this will not happen, because of hardware limitations (queue
   *   depth settings). If a device driver gets a 'queue full' response,
   *   or if it simply chooses not to queue more I/O at one point, it can
   *   call this function to prevent the request_fn from being called until
   *   the driver has signalled it's ready to go again. This happens by calling
   *   blk_start_queue() to restart queue operations. Queue lock must be held.
   **/
165125e1e   Jens Axboe   [BLOCK] Get rid o...
337
  void blk_stop_queue(struct request_queue *q)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
  {
  	blk_remove_plug(q);
  	set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
  }
  EXPORT_SYMBOL(blk_stop_queue);
  
  /**
   * blk_sync_queue - cancel any pending callbacks on a queue
   * @q: the queue
   *
   * Description:
   *     The block layer may perform asynchronous callback activity
   *     on a queue, such as calling the unplug function after a timeout.
   *     A block device may call blk_sync_queue to ensure that any
   *     such activity is cancelled, thus allowing it to release resources
59c51591a   Michael Opdenacker   Fix occurrences o...
353
   *     that the callbacks might use. The caller must already have made sure
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
354
355
356
357
358
359
360
   *     that its ->make_request_fn will not re-add plugging prior to calling
   *     this function.
   *
   */
  void blk_sync_queue(struct request_queue *q)
  {
  	del_timer_sync(&q->unplug_timer);
abbeb88d0   Oleg Nesterov   blk_sync_queue() ...
361
  	kblockd_flush_work(&q->unplug_work);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
362
363
364
365
366
367
368
369
370
371
372
373
374
  }
  EXPORT_SYMBOL(blk_sync_queue);
  
  /**
   * blk_run_queue - run a single device queue
   * @q:	The queue to run
   */
  void blk_run_queue(struct request_queue *q)
  {
  	unsigned long flags;
  
  	spin_lock_irqsave(q->queue_lock, flags);
  	blk_remove_plug(q);
dac07ec12   Jens Axboe   [BLOCK] limit req...
375
376
377
378
379
380
381
382
383
384
385
386
387
388
  
  	/*
  	 * Only recurse once to avoid overrunning the stack, let the unplug
  	 * handling reinvoke the handler shortly if we already got there.
  	 */
  	if (!elv_queue_empty(q)) {
  		if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
  			q->request_fn(q);
  			clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
  		} else {
  			blk_plug_device(q);
  			kblockd_schedule_work(&q->unplug_work);
  		}
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
389
390
391
  	spin_unlock_irqrestore(q->queue_lock, flags);
  }
  EXPORT_SYMBOL(blk_run_queue);
165125e1e   Jens Axboe   [BLOCK] Get rid o...
392
  void blk_put_queue(struct request_queue *q)
483f4afc4   Al Viro   [PATCH] fix sysfs...
393
394
395
396
  {
  	kobject_put(&q->kobj);
  }
  EXPORT_SYMBOL(blk_put_queue);
6728cb0e6   Jens Axboe   block: make core ...
397
  void blk_cleanup_queue(struct request_queue *q)
483f4afc4   Al Viro   [PATCH] fix sysfs...
398
399
400
401
402
403
404
405
406
407
  {
  	mutex_lock(&q->sysfs_lock);
  	set_bit(QUEUE_FLAG_DEAD, &q->queue_flags);
  	mutex_unlock(&q->sysfs_lock);
  
  	if (q->elevator)
  		elevator_exit(q->elevator);
  
  	blk_put_queue(q);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
408
  EXPORT_SYMBOL(blk_cleanup_queue);
165125e1e   Jens Axboe   [BLOCK] Get rid o...
409
  static int blk_init_free_list(struct request_queue *q)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
410
411
412
413
414
  {
  	struct request_list *rl = &q->rq;
  
  	rl->count[READ] = rl->count[WRITE] = 0;
  	rl->starved[READ] = rl->starved[WRITE] = 0;
cb98fc8bb   Tejun Heo   [BLOCK] Reimpleme...
415
  	rl->elvpriv = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
416
417
  	init_waitqueue_head(&rl->wait[READ]);
  	init_waitqueue_head(&rl->wait[WRITE]);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
418

1946089a1   Christoph Lameter   [PATCH] NUMA awar...
419
420
  	rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
  				mempool_free_slab, request_cachep, q->node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
421
422
423
424
425
426
  
  	if (!rl->rq_pool)
  		return -ENOMEM;
  
  	return 0;
  }
165125e1e   Jens Axboe   [BLOCK] Get rid o...
427
  struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
428
  {
1946089a1   Christoph Lameter   [PATCH] NUMA awar...
429
430
431
  	return blk_alloc_queue_node(gfp_mask, -1);
  }
  EXPORT_SYMBOL(blk_alloc_queue);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
432

165125e1e   Jens Axboe   [BLOCK] Get rid o...
433
  struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
1946089a1   Christoph Lameter   [PATCH] NUMA awar...
434
  {
165125e1e   Jens Axboe   [BLOCK] Get rid o...
435
  	struct request_queue *q;
e0bf68dde   Peter Zijlstra   mm: bdi init hooks
436
  	int err;
1946089a1   Christoph Lameter   [PATCH] NUMA awar...
437

8324aa91d   Jens Axboe   block: split tag ...
438
  	q = kmem_cache_alloc_node(blk_requestq_cachep,
94f6030ca   Christoph Lameter   Slab allocators: ...
439
  				gfp_mask | __GFP_ZERO, node_id);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
440
441
  	if (!q)
  		return NULL;
e0bf68dde   Peter Zijlstra   mm: bdi init hooks
442
443
444
445
  	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
  	q->backing_dev_info.unplug_io_data = q;
  	err = bdi_init(&q->backing_dev_info);
  	if (err) {
8324aa91d   Jens Axboe   block: split tag ...
446
  		kmem_cache_free(blk_requestq_cachep, q);
e0bf68dde   Peter Zijlstra   mm: bdi init hooks
447
448
  		return NULL;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
449
  	init_timer(&q->unplug_timer);
483f4afc4   Al Viro   [PATCH] fix sysfs...
450

8324aa91d   Jens Axboe   block: split tag ...
451
  	kobject_init(&q->kobj, &blk_queue_ktype);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
452

483f4afc4   Al Viro   [PATCH] fix sysfs...
453
  	mutex_init(&q->sysfs_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
454
455
  	return q;
  }
1946089a1   Christoph Lameter   [PATCH] NUMA awar...
456
  EXPORT_SYMBOL(blk_alloc_queue_node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
  
  /**
   * blk_init_queue  - prepare a request queue for use with a block device
   * @rfn:  The function to be called to process requests that have been
   *        placed on the queue.
   * @lock: Request queue spin lock
   *
   * Description:
   *    If a block device wishes to use the standard request handling procedures,
   *    which sorts requests and coalesces adjacent requests, then it must
   *    call blk_init_queue().  The function @rfn will be called when there
   *    are requests on the queue that need to be processed.  If the device
   *    supports plugging, then @rfn may not be called immediately when requests
   *    are available on the queue, but may be called at some time later instead.
   *    Plugged queues are generally unplugged when a buffer belonging to one
   *    of the requests on the queue is needed, or due to memory pressure.
   *
   *    @rfn is not required, or even expected, to remove all requests off the
   *    queue, but only as many as it can handle at a time.  If it does leave
   *    requests on the queue, it is responsible for arranging that the requests
   *    get dealt with eventually.
   *
   *    The queue spin lock must be held while manipulating the requests on the
a038e2536   Paolo 'Blaisorblade' Giarrusso   [PATCH] blk_start...
480
481
   *    request queue; this lock will be taken also from interrupt context, so irq
   *    disabling is needed for it.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
482
483
484
485
486
487
488
489
   *
   *    Function returns a pointer to the initialized request queue, or NULL if
   *    it didn't succeed.
   *
   * Note:
   *    blk_init_queue() must be paired with a blk_cleanup_queue() call
   *    when the block device is deactivated (such as at module unload).
   **/
1946089a1   Christoph Lameter   [PATCH] NUMA awar...
490

165125e1e   Jens Axboe   [BLOCK] Get rid o...
491
  struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
492
  {
1946089a1   Christoph Lameter   [PATCH] NUMA awar...
493
494
495
  	return blk_init_queue_node(rfn, lock, -1);
  }
  EXPORT_SYMBOL(blk_init_queue);
165125e1e   Jens Axboe   [BLOCK] Get rid o...
496
  struct request_queue *
1946089a1   Christoph Lameter   [PATCH] NUMA awar...
497
498
  blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
  {
165125e1e   Jens Axboe   [BLOCK] Get rid o...
499
  	struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
500
501
502
  
  	if (!q)
  		return NULL;
1946089a1   Christoph Lameter   [PATCH] NUMA awar...
503
  	q->node = node_id;
8669aafdb   Al Viro   [PATCH] fix doubl...
504
  	if (blk_init_free_list(q)) {
8324aa91d   Jens Axboe   block: split tag ...
505
  		kmem_cache_free(blk_requestq_cachep, q);
8669aafdb   Al Viro   [PATCH] fix doubl...
506
507
  		return NULL;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
508

152587deb   Jens Axboe   [PATCH] fix NMI l...
509
510
511
512
513
514
515
516
  	/*
  	 * if caller didn't supply a lock, they get per-queue locking with
  	 * our embedded lock
  	 */
  	if (!lock) {
  		spin_lock_init(&q->__queue_lock);
  		lock = &q->__queue_lock;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
517
  	q->request_fn		= rfn;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
518
519
520
521
522
523
524
525
526
527
528
529
  	q->prep_rq_fn		= NULL;
  	q->unplug_fn		= generic_unplug_device;
  	q->queue_flags		= (1 << QUEUE_FLAG_CLUSTER);
  	q->queue_lock		= lock;
  
  	blk_queue_segment_boundary(q, 0xffffffff);
  
  	blk_queue_make_request(q, __make_request);
  	blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
  
  	blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
  	blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
44ec95425   Alan Stern   [SCSI] sg: cap re...
530
  	q->sg_reserved_size = INT_MAX;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
531
532
533
534
535
536
537
  	/*
  	 * all done
  	 */
  	if (!elevator_init(q, NULL)) {
  		blk_queue_congestion_threshold(q);
  		return q;
  	}
8669aafdb   Al Viro   [PATCH] fix doubl...
538
  	blk_put_queue(q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
539
540
  	return NULL;
  }
1946089a1   Christoph Lameter   [PATCH] NUMA awar...
541
  EXPORT_SYMBOL(blk_init_queue_node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
542

165125e1e   Jens Axboe   [BLOCK] Get rid o...
543
  int blk_get_queue(struct request_queue *q)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
544
  {
fde6ad224   Nick Piggin   [PATCH] blk: bran...
545
  	if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
483f4afc4   Al Viro   [PATCH] fix sysfs...
546
  		kobject_get(&q->kobj);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
547
548
549
550
551
  		return 0;
  	}
  
  	return 1;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
552
  EXPORT_SYMBOL(blk_get_queue);
165125e1e   Jens Axboe   [BLOCK] Get rid o...
553
  static inline void blk_free_request(struct request_queue *q, struct request *rq)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
554
  {
4aff5e233   Jens Axboe   [PATCH] Split str...
555
  	if (rq->cmd_flags & REQ_ELVPRIV)
cb98fc8bb   Tejun Heo   [BLOCK] Reimpleme...
556
  		elv_put_request(q, rq);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
557
558
  	mempool_free(rq, q->rq.rq_pool);
  }
1ea25ecb7   Jens Axboe   [PATCH] Audit blo...
559
  static struct request *
165125e1e   Jens Axboe   [BLOCK] Get rid o...
560
  blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
561
562
563
564
565
566
567
  {
  	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
  
  	if (!rq)
  		return NULL;
  
  	/*
4aff5e233   Jens Axboe   [PATCH] Split str...
568
  	 * first three bits are identical in rq->cmd_flags and bio->bi_rw,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
569
570
  	 * see bio.h and blkdev.h
  	 */
49171e5c6   Jens Axboe   [PATCH] Remove st...
571
  	rq->cmd_flags = rw | REQ_ALLOCED;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
572

cb98fc8bb   Tejun Heo   [BLOCK] Reimpleme...
573
  	if (priv) {
cb78b285c   Jens Axboe   [PATCH] Drop usel...
574
  		if (unlikely(elv_set_request(q, rq, gfp_mask))) {
cb98fc8bb   Tejun Heo   [BLOCK] Reimpleme...
575
576
577
  			mempool_free(rq, q->rq.rq_pool);
  			return NULL;
  		}
4aff5e233   Jens Axboe   [PATCH] Split str...
578
  		rq->cmd_flags |= REQ_ELVPRIV;
cb98fc8bb   Tejun Heo   [BLOCK] Reimpleme...
579
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
580

cb98fc8bb   Tejun Heo   [BLOCK] Reimpleme...
581
  	return rq;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
582
583
584
585
586
587
  }
  
  /*
   * ioc_batching returns true if the ioc is a valid batching request and
   * should be given priority access to a request.
   */
165125e1e   Jens Axboe   [BLOCK] Get rid o...
588
  static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
  {
  	if (!ioc)
  		return 0;
  
  	/*
  	 * Make sure the process is able to allocate at least 1 request
  	 * even if the batch times out, otherwise we could theoretically
  	 * lose wakeups.
  	 */
  	return ioc->nr_batch_requests == q->nr_batching ||
  		(ioc->nr_batch_requests > 0
  		&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
  }
  
  /*
   * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This
   * will cause the process to be a "batcher" on all queues in the system. This
   * is the behaviour we want though - once it gets a wakeup it should be given
   * a nice run.
   */
165125e1e   Jens Axboe   [BLOCK] Get rid o...
609
  static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
610
611
612
613
614
615
616
  {
  	if (!ioc || ioc_batching(q, ioc))
  		return;
  
  	ioc->nr_batch_requests = q->nr_batching;
  	ioc->last_waited = jiffies;
  }
165125e1e   Jens Axboe   [BLOCK] Get rid o...
617
  static void __freed_request(struct request_queue *q, int rw)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
618
619
620
621
  {
  	struct request_list *rl = &q->rq;
  
  	if (rl->count[rw] < queue_congestion_off_threshold(q))
79e2de4bc   Thomas Maier   [PATCH] export cl...
622
  		blk_clear_queue_congested(q, rw);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
623
624
  
  	if (rl->count[rw] + 1 <= q->nr_requests) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
625
626
627
628
629
630
631
632
633
634
635
  		if (waitqueue_active(&rl->wait[rw]))
  			wake_up(&rl->wait[rw]);
  
  		blk_clear_queue_full(q, rw);
  	}
  }
  
  /*
   * A request has just been released.  Account for it, update the full and
   * congestion status, wake up any waiters.   Called under q->queue_lock.
   */
165125e1e   Jens Axboe   [BLOCK] Get rid o...
636
  static void freed_request(struct request_queue *q, int rw, int priv)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
637
638
639
640
  {
  	struct request_list *rl = &q->rq;
  
  	rl->count[rw]--;
cb98fc8bb   Tejun Heo   [BLOCK] Reimpleme...
641
642
  	if (priv)
  		rl->elvpriv--;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
643
644
645
646
647
  
  	__freed_request(q, rw);
  
  	if (unlikely(rl->starved[rw ^ 1]))
  		__freed_request(q, rw ^ 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
648
649
650
651
  }
  
  #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
  /*
d6344532a   Nick Piggin   [PATCH] blk: redu...
652
653
654
   * Get a free request, queue_lock must be held.
   * Returns NULL on failure, with queue_lock held.
   * Returns !NULL on success, with queue_lock *not held*.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
655
   */
165125e1e   Jens Axboe   [BLOCK] Get rid o...
656
  static struct request *get_request(struct request_queue *q, int rw_flags,
7749a8d42   Jens Axboe   [PATCH] Propagate...
657
  				   struct bio *bio, gfp_t gfp_mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
658
659
660
  {
  	struct request *rq = NULL;
  	struct request_list *rl = &q->rq;
88ee5ef15   Jens Axboe   [BLOCK] ll_rw_blk...
661
  	struct io_context *ioc = NULL;
7749a8d42   Jens Axboe   [PATCH] Propagate...
662
  	const int rw = rw_flags & 0x01;
88ee5ef15   Jens Axboe   [BLOCK] ll_rw_blk...
663
  	int may_queue, priv;
7749a8d42   Jens Axboe   [PATCH] Propagate...
664
  	may_queue = elv_may_queue(q, rw_flags);
88ee5ef15   Jens Axboe   [BLOCK] ll_rw_blk...
665
666
667
668
669
  	if (may_queue == ELV_MQUEUE_NO)
  		goto rq_starved;
  
  	if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
  		if (rl->count[rw]+1 >= q->nr_requests) {
b5deef901   Jens Axboe   [PATCH] Make sure...
670
  			ioc = current_io_context(GFP_ATOMIC, q->node);
88ee5ef15   Jens Axboe   [BLOCK] ll_rw_blk...
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
  			/*
  			 * The queue will fill after this allocation, so set
  			 * it as full, and mark this process as "batching".
  			 * This process will be allowed to complete a batch of
  			 * requests, others will be blocked.
  			 */
  			if (!blk_queue_full(q, rw)) {
  				ioc_set_batching(q, ioc);
  				blk_set_queue_full(q, rw);
  			} else {
  				if (may_queue != ELV_MQUEUE_MUST
  						&& !ioc_batching(q, ioc)) {
  					/*
  					 * The queue is full and the allocating
  					 * process is not a "batcher", and not
  					 * exempted by the IO scheduler
  					 */
  					goto out;
  				}
  			}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
691
  		}
79e2de4bc   Thomas Maier   [PATCH] export cl...
692
  		blk_set_queue_congested(q, rw);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
693
  	}
082cf69eb   Jens Axboe   [PATCH] ll_rw_blk...
694
695
696
697
698
  	/*
  	 * Only allow batching queuers to allocate up to 50% over the defined
  	 * limit of requests, otherwise we could have thousands of requests
  	 * allocated with any setting of ->nr_requests
  	 */
fd782a4a9   Hugh Dickins   [PATCH] Fix get_r...
699
  	if (rl->count[rw] >= (3 * q->nr_requests / 2))
082cf69eb   Jens Axboe   [PATCH] ll_rw_blk...
700
  		goto out;
fd782a4a9   Hugh Dickins   [PATCH] Fix get_r...
701

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
702
703
  	rl->count[rw]++;
  	rl->starved[rw] = 0;
cb98fc8bb   Tejun Heo   [BLOCK] Reimpleme...
704

64521d1a3   Jens Axboe   [BLOCK] elevator ...
705
  	priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
cb98fc8bb   Tejun Heo   [BLOCK] Reimpleme...
706
707
  	if (priv)
  		rl->elvpriv++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
708
  	spin_unlock_irq(q->queue_lock);
7749a8d42   Jens Axboe   [PATCH] Propagate...
709
  	rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
88ee5ef15   Jens Axboe   [BLOCK] ll_rw_blk...
710
  	if (unlikely(!rq)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
711
712
713
714
715
716
717
718
  		/*
  		 * Allocation failed presumably due to memory. Undo anything
  		 * we might have messed up.
  		 *
  		 * Allocating task should really be put onto the front of the
  		 * wait queue, but this is pretty rare.
  		 */
  		spin_lock_irq(q->queue_lock);
cb98fc8bb   Tejun Heo   [BLOCK] Reimpleme...
719
  		freed_request(q, rw, priv);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
720
721
722
723
724
725
726
727
728
729
730
  
  		/*
  		 * in the very unlikely event that allocation failed and no
  		 * requests for this direction was pending, mark us starved
  		 * so that freeing of a request in the other direction will
  		 * notice us. another possible fix would be to split the
  		 * rq mempool into READ and WRITE
  		 */
  rq_starved:
  		if (unlikely(rl->count[rw] == 0))
  			rl->starved[rw] = 1;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
731
732
  		goto out;
  	}
88ee5ef15   Jens Axboe   [BLOCK] ll_rw_blk...
733
734
735
736
737
738
  	/*
  	 * ioc may be NULL here, and ioc_batching will be false. That's
  	 * OK, if the queue is under the request limit then requests need
  	 * not count toward the nr_batch_requests limit. There will always
  	 * be some limit enforced by BLK_BATCH_TIME.
  	 */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
739
740
  	if (ioc_batching(q, ioc))
  		ioc->nr_batch_requests--;
6728cb0e6   Jens Axboe   block: make core ...
741

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
742
  	rq_init(q, rq);
2056a782f   Jens Axboe   [PATCH] Block que...
743
744
  
  	blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
745
  out:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
746
747
748
749
750
751
  	return rq;
  }
  
  /*
   * No available requests for this queue, unplug the device and wait for some
   * requests to become available.
d6344532a   Nick Piggin   [PATCH] blk: redu...
752
753
   *
   * Called with q->queue_lock held, and returns with it unlocked.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
754
   */
165125e1e   Jens Axboe   [BLOCK] Get rid o...
755
  static struct request *get_request_wait(struct request_queue *q, int rw_flags,
22e2c507c   Jens Axboe   [PATCH] Update cf...
756
  					struct bio *bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
757
  {
7749a8d42   Jens Axboe   [PATCH] Propagate...
758
  	const int rw = rw_flags & 0x01;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
759
  	struct request *rq;
7749a8d42   Jens Axboe   [PATCH] Propagate...
760
  	rq = get_request(q, rw_flags, bio, GFP_NOIO);
450991bc1   Nick Piggin   [PATCH] blk: __ma...
761
762
  	while (!rq) {
  		DEFINE_WAIT(wait);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
763
764
765
766
  		struct request_list *rl = &q->rq;
  
  		prepare_to_wait_exclusive(&rl->wait[rw], &wait,
  				TASK_UNINTERRUPTIBLE);
7749a8d42   Jens Axboe   [PATCH] Propagate...
767
  		rq = get_request(q, rw_flags, bio, GFP_NOIO);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
768
769
770
  
  		if (!rq) {
  			struct io_context *ioc;
2056a782f   Jens Axboe   [PATCH] Block que...
771
  			blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
d6344532a   Nick Piggin   [PATCH] blk: redu...
772
773
  			__generic_unplug_device(q);
  			spin_unlock_irq(q->queue_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
774
775
776
777
778
779
780
781
  			io_schedule();
  
  			/*
  			 * After sleeping, we become a "batching" process and
  			 * will be able to allocate at least one request, and
  			 * up to a big batch of them for a small period time.
  			 * See ioc_batching, ioc_set_batching
  			 */
b5deef901   Jens Axboe   [PATCH] Make sure...
782
  			ioc = current_io_context(GFP_NOIO, q->node);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
783
  			ioc_set_batching(q, ioc);
d6344532a   Nick Piggin   [PATCH] blk: redu...
784
785
  
  			spin_lock_irq(q->queue_lock);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
786
787
  		}
  		finish_wait(&rl->wait[rw], &wait);
450991bc1   Nick Piggin   [PATCH] blk: __ma...
788
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
789
790
791
  
  	return rq;
  }
165125e1e   Jens Axboe   [BLOCK] Get rid o...
792
  struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
793
794
795
796
  {
  	struct request *rq;
  
  	BUG_ON(rw != READ && rw != WRITE);
d6344532a   Nick Piggin   [PATCH] blk: redu...
797
798
  	spin_lock_irq(q->queue_lock);
  	if (gfp_mask & __GFP_WAIT) {
22e2c507c   Jens Axboe   [PATCH] Update cf...
799
  		rq = get_request_wait(q, rw, NULL);
d6344532a   Nick Piggin   [PATCH] blk: redu...
800
  	} else {
22e2c507c   Jens Axboe   [PATCH] Update cf...
801
  		rq = get_request(q, rw, NULL, gfp_mask);
d6344532a   Nick Piggin   [PATCH] blk: redu...
802
803
804
805
  		if (!rq)
  			spin_unlock_irq(q->queue_lock);
  	}
  	/* q->queue_lock is unlocked at this point */
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
806
807
808
  
  	return rq;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
809
810
811
  EXPORT_SYMBOL(blk_get_request);
  
  /**
dc72ef4ae   Jens Axboe   [PATCH] Add blk_s...
812
813
814
815
816
817
818
819
820
   * blk_start_queueing - initiate dispatch of requests to device
   * @q:		request queue to kick into gear
   *
   * This is basically a helper to remove the need to know whether a queue
   * is plugged or not if someone just wants to initiate dispatch of requests
   * for this queue.
   *
   * The queue lock must be held with interrupts disabled.
   */
165125e1e   Jens Axboe   [BLOCK] Get rid o...
821
  void blk_start_queueing(struct request_queue *q)
dc72ef4ae   Jens Axboe   [PATCH] Add blk_s...
822
823
824
825
826
827
828
829
830
  {
  	if (!blk_queue_plugged(q))
  		q->request_fn(q);
  	else
  		__generic_unplug_device(q);
  }
  EXPORT_SYMBOL(blk_start_queueing);
  
  /**
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
831
832
833
834
835
836
837
838
839
   * blk_requeue_request - put a request back on queue
   * @q:		request queue where request should be inserted
   * @rq:		request to be inserted
   *
   * Description:
   *    Drivers often keep queueing requests until the hardware cannot accept
   *    more, when that condition happens we need to put the request back
   *    on the queue. Must be called with queue lock held.
   */
165125e1e   Jens Axboe   [BLOCK] Get rid o...
840
  void blk_requeue_request(struct request_queue *q, struct request *rq)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
841
  {
2056a782f   Jens Axboe   [PATCH] Block que...
842
  	blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
843
844
845
846
847
  	if (blk_rq_tagged(rq))
  		blk_queue_end_tag(q, rq);
  
  	elv_requeue_request(q, rq);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
848
849
850
851
852
853
854
855
  EXPORT_SYMBOL(blk_requeue_request);
  
  /**
   * blk_insert_request - insert a special request in to a request queue
   * @q:		request queue where request should be inserted
   * @rq:		request to be inserted
   * @at_head:	insert request at head or tail of queue
   * @data:	private data
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
856
857
858
859
860
861
862
863
864
865
866
867
868
   *
   * Description:
   *    Many block devices need to execute commands asynchronously, so they don't
   *    block the whole kernel from preemption during request execution.  This is
   *    accomplished normally by inserting aritficial requests tagged as
   *    REQ_SPECIAL in to the corresponding request queue, and letting them be
   *    scheduled for actual execution by the request queue.
   *
   *    We have the option of inserting the head or the tail of the queue.
   *    Typically we use the tail for new ioctls and so forth.  We use the head
   *    of the queue for things like a QUEUE_FULL message from a device, or a
   *    host that is unable to accept a particular command.
   */
165125e1e   Jens Axboe   [BLOCK] Get rid o...
869
  void blk_insert_request(struct request_queue *q, struct request *rq,
867d1191f   Tejun Heo   [SCSI] remove req...
870
  			int at_head, void *data)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
871
  {
867d1191f   Tejun Heo   [SCSI] remove req...
872
  	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
873
874
875
876
877
878
879
  	unsigned long flags;
  
  	/*
  	 * tell I/O scheduler that this isn't a regular read/write (ie it
  	 * must not attempt merges on this) and that it acts as a soft
  	 * barrier
  	 */
4aff5e233   Jens Axboe   [PATCH] Split str...
880
881
  	rq->cmd_type = REQ_TYPE_SPECIAL;
  	rq->cmd_flags |= REQ_SOFTBARRIER;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
882
883
884
885
886
887
888
889
  
  	rq->special = data;
  
  	spin_lock_irqsave(q->queue_lock, flags);
  
  	/*
  	 * If command is tagged, release the tag
  	 */
867d1191f   Tejun Heo   [SCSI] remove req...
890
891
  	if (blk_rq_tagged(rq))
  		blk_queue_end_tag(q, rq);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
892

b238b3d4b   Jerome Marchand   block layer: remo...
893
  	drive_stat_acct(rq, 1);
867d1191f   Tejun Heo   [SCSI] remove req...
894
  	__elv_add_request(q, rq, where, 0);
dc72ef4ae   Jens Axboe   [PATCH] Add blk_s...
895
  	blk_start_queueing(q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
896
897
  	spin_unlock_irqrestore(q->queue_lock, flags);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
898
  EXPORT_SYMBOL(blk_insert_request);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
899
900
901
902
903
  /*
   * add-request adds a request to the linked list.
   * queue lock is held and interrupts disabled, as we muck with the
   * request queue list.
   */
6728cb0e6   Jens Axboe   block: make core ...
904
  static inline void add_request(struct request_queue *q, struct request *req)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
905
  {
b238b3d4b   Jerome Marchand   block layer: remo...
906
  	drive_stat_acct(req, 1);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
907

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
908
909
910
911
912
913
  	/*
  	 * elevator indicated where it wants this request to be
  	 * inserted at elevator_merge time
  	 */
  	__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
  }
6728cb0e6   Jens Axboe   block: make core ...
914

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
  /*
   * disk_round_stats()	- Round off the performance stats on a struct
   * disk_stats.
   *
   * The average IO queue length and utilisation statistics are maintained
   * by observing the current state of the queue length and the amount of
   * time it has been in this state for.
   *
   * Normally, that accounting is done on IO completion, but that can result
   * in more than a second's worth of IO being accounted for within any one
   * second, leading to >100% utilisation.  To deal with that, we call this
   * function to do a round-off before returning the results when reading
   * /proc/diskstats.  This accounts immediately for all queue usage up to
   * the current jiffies and restarts the counters again.
   */
  void disk_round_stats(struct gendisk *disk)
  {
  	unsigned long now = jiffies;
b2982649c   Kenneth W Chen   Following the sam...
933
934
  	if (now == disk->stamp)
  		return;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
935

20e5c81fc   Kenneth W Chen   [patch] remove ge...
936
937
938
939
940
  	if (disk->in_flight) {
  		__disk_stat_add(disk, time_in_queue,
  				disk->in_flight * (now - disk->stamp));
  		__disk_stat_add(disk, io_ticks, (now - disk->stamp));
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
941
  	disk->stamp = now;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
942
  }
3eaf840e0   Jun'ichi "Nick" Nomura   [PATCH] device-ma...
943
  EXPORT_SYMBOL_GPL(disk_round_stats);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
944
945
946
  /*
   * queue lock must be held
   */
165125e1e   Jens Axboe   [BLOCK] Get rid o...
947
  void __blk_put_request(struct request_queue *q, struct request *req)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
948
  {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
949
950
951
952
  	if (unlikely(!q))
  		return;
  	if (unlikely(--req->ref_count))
  		return;
8922e16cf   Tejun Heo   [PATCH] 01/05 Imp...
953
  	elv_completed_request(q, req);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
954
955
956
957
  	/*
  	 * Request may not have originated from ll_rw_blk. if not,
  	 * it didn't come out of our reserved rq pools
  	 */
49171e5c6   Jens Axboe   [PATCH] Remove st...
958
  	if (req->cmd_flags & REQ_ALLOCED) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
959
  		int rw = rq_data_dir(req);
4aff5e233   Jens Axboe   [PATCH] Split str...
960
  		int priv = req->cmd_flags & REQ_ELVPRIV;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
961

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
962
  		BUG_ON(!list_empty(&req->queuelist));
9817064b6   Jens Axboe   [PATCH] elevator:...
963
  		BUG_ON(!hlist_unhashed(&req->hash));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
964
965
  
  		blk_free_request(q, req);
cb98fc8bb   Tejun Heo   [BLOCK] Reimpleme...
966
  		freed_request(q, rw, priv);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
967
968
  	}
  }
6e39b69e7   Mike Christie   [SCSI] export blk...
969
  EXPORT_SYMBOL_GPL(__blk_put_request);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
970
971
  void blk_put_request(struct request *req)
  {
8922e16cf   Tejun Heo   [PATCH] 01/05 Imp...
972
  	unsigned long flags;
165125e1e   Jens Axboe   [BLOCK] Get rid o...
973
  	struct request_queue *q = req->q;
8922e16cf   Tejun Heo   [PATCH] 01/05 Imp...
974

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
975
  	/*
8922e16cf   Tejun Heo   [PATCH] 01/05 Imp...
976
977
  	 * Gee, IDE calls in w/ NULL q.  Fix IDE and remove the
  	 * following if (q) test.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
978
  	 */
8922e16cf   Tejun Heo   [PATCH] 01/05 Imp...
979
  	if (q) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
980
981
982
983
984
  		spin_lock_irqsave(q->queue_lock, flags);
  		__blk_put_request(q, req);
  		spin_unlock_irqrestore(q->queue_lock, flags);
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
985
  EXPORT_SYMBOL(blk_put_request);
86db1e297   Jens Axboe   block: continue l...
986
  void init_request_from_bio(struct request *req, struct bio *bio)
52d9e6753   Tejun Heo   [BLOCK] ll_rw_blk...
987
  {
4aff5e233   Jens Axboe   [PATCH] Split str...
988
  	req->cmd_type = REQ_TYPE_FS;
52d9e6753   Tejun Heo   [BLOCK] ll_rw_blk...
989
990
991
992
993
  
  	/*
  	 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
  	 */
  	if (bio_rw_ahead(bio) || bio_failfast(bio))
4aff5e233   Jens Axboe   [PATCH] Split str...
994
  		req->cmd_flags |= REQ_FAILFAST;
52d9e6753   Tejun Heo   [BLOCK] ll_rw_blk...
995
996
997
998
999
  
  	/*
  	 * REQ_BARRIER implies no merging, but lets make it explicit
  	 */
  	if (unlikely(bio_barrier(bio)))
4aff5e233   Jens Axboe   [PATCH] Split str...
1000
  		req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
52d9e6753   Tejun Heo   [BLOCK] ll_rw_blk...
1001

b31dc66a5   Jens Axboe   [PATCH] Kill PF_S...
1002
  	if (bio_sync(bio))
4aff5e233   Jens Axboe   [PATCH] Split str...
1003
  		req->cmd_flags |= REQ_RW_SYNC;
5404bc7a8   Jens Axboe   [PATCH] Allow fil...
1004
1005
  	if (bio_rw_meta(bio))
  		req->cmd_flags |= REQ_RW_META;
b31dc66a5   Jens Axboe   [PATCH] Kill PF_S...
1006

52d9e6753   Tejun Heo   [BLOCK] ll_rw_blk...
1007
1008
  	req->errors = 0;
  	req->hard_sector = req->sector = bio->bi_sector;
52d9e6753   Tejun Heo   [BLOCK] ll_rw_blk...
1009
  	req->ioprio = bio_prio(bio);
52d9e6753   Tejun Heo   [BLOCK] ll_rw_blk...
1010
  	req->start_time = jiffies;
bc1c56fde   NeilBrown   Share code betwee...
1011
  	blk_rq_bio_prep(req->q, req, bio);
52d9e6753   Tejun Heo   [BLOCK] ll_rw_blk...
1012
  }
165125e1e   Jens Axboe   [BLOCK] Get rid o...
1013
  static int __make_request(struct request_queue *q, struct bio *bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1014
  {
450991bc1   Nick Piggin   [PATCH] blk: __ma...
1015
  	struct request *req;
51da90fcb   Jens Axboe   [PATCH] ll_rw_blk...
1016
1017
1018
  	int el_ret, nr_sectors, barrier, err;
  	const unsigned short prio = bio_prio(bio);
  	const int sync = bio_sync(bio);
7749a8d42   Jens Axboe   [PATCH] Propagate...
1019
  	int rw_flags;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1020

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1021
  	nr_sectors = bio_sectors(bio);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1022
1023
1024
1025
1026
1027
1028
  
  	/*
  	 * low level driver can indicate that it wants pages above a
  	 * certain limit bounced to low memory (ie for highmem, or even
  	 * ISA dma in theory)
  	 */
  	blk_queue_bounce(q, &bio);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1029
  	barrier = bio_barrier(bio);
797e7dbbe   Tejun Heo   [BLOCK] reimpleme...
1030
  	if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1031
1032
1033
  		err = -EOPNOTSUPP;
  		goto end_io;
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1034
  	spin_lock_irq(q->queue_lock);
450991bc1   Nick Piggin   [PATCH] blk: __ma...
1035
  	if (unlikely(barrier) || elv_queue_empty(q))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1036
1037
1038
1039
  		goto get_rq;
  
  	el_ret = elv_merge(q, &req, bio);
  	switch (el_ret) {
6728cb0e6   Jens Axboe   block: make core ...
1040
1041
  	case ELEVATOR_BACK_MERGE:
  		BUG_ON(!rq_mergeable(req));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1042

6728cb0e6   Jens Axboe   block: make core ...
1043
1044
  		if (!ll_back_merge_fn(q, req, bio))
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1045

6728cb0e6   Jens Axboe   block: make core ...
1046
  		blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
2056a782f   Jens Axboe   [PATCH] Block que...
1047

6728cb0e6   Jens Axboe   block: make core ...
1048
1049
1050
1051
1052
1053
1054
1055
  		req->biotail->bi_next = bio;
  		req->biotail = bio;
  		req->nr_sectors = req->hard_nr_sectors += nr_sectors;
  		req->ioprio = ioprio_best(req->ioprio, prio);
  		drive_stat_acct(req, 0);
  		if (!attempt_back_merge(q, req))
  			elv_merged_request(q, req, el_ret);
  		goto out;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1056

6728cb0e6   Jens Axboe   block: make core ...
1057
1058
  	case ELEVATOR_FRONT_MERGE:
  		BUG_ON(!rq_mergeable(req));
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1059

6728cb0e6   Jens Axboe   block: make core ...
1060
1061
  		if (!ll_front_merge_fn(q, req, bio))
  			break;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1062

6728cb0e6   Jens Axboe   block: make core ...
1063
  		blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
2056a782f   Jens Axboe   [PATCH] Block que...
1064

6728cb0e6   Jens Axboe   block: make core ...
1065
1066
  		bio->bi_next = req->bio;
  		req->bio = bio;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1067

6728cb0e6   Jens Axboe   block: make core ...
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
  		/*
  		 * may not be valid. if the low level driver said
  		 * it didn't need a bounce buffer then it better
  		 * not touch req->buffer either...
  		 */
  		req->buffer = bio_data(bio);
  		req->current_nr_sectors = bio_cur_sectors(bio);
  		req->hard_cur_sectors = req->current_nr_sectors;
  		req->sector = req->hard_sector = bio->bi_sector;
  		req->nr_sectors = req->hard_nr_sectors += nr_sectors;
  		req->ioprio = ioprio_best(req->ioprio, prio);
  		drive_stat_acct(req, 0);
  		if (!attempt_front_merge(q, req))
  			elv_merged_request(q, req, el_ret);
  		goto out;
  
  	/* ELV_NO_MERGE: elevator says don't/can't merge. */
  	default:
  		;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1087
  	}
450991bc1   Nick Piggin   [PATCH] blk: __ma...
1088
  get_rq:
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1089
  	/*
7749a8d42   Jens Axboe   [PATCH] Propagate...
1090
1091
1092
1093
1094
1095
1096
1097
1098
  	 * This sync check and mask will be re-done in init_request_from_bio(),
  	 * but we need to set it earlier to expose the sync flag to the
  	 * rq allocator and io schedulers.
  	 */
  	rw_flags = bio_data_dir(bio);
  	if (sync)
  		rw_flags |= REQ_RW_SYNC;
  
  	/*
450991bc1   Nick Piggin   [PATCH] blk: __ma...
1099
  	 * Grab a free request. This is might sleep but can not fail.
d6344532a   Nick Piggin   [PATCH] blk: redu...
1100
  	 * Returns with the queue unlocked.
450991bc1   Nick Piggin   [PATCH] blk: __ma...
1101
  	 */
7749a8d42   Jens Axboe   [PATCH] Propagate...
1102
  	req = get_request_wait(q, rw_flags, bio);
d6344532a   Nick Piggin   [PATCH] blk: redu...
1103

450991bc1   Nick Piggin   [PATCH] blk: __ma...
1104
1105
1106
1107
1108
  	/*
  	 * After dropping the lock and possibly sleeping here, our request
  	 * may now be mergeable after it had proven unmergeable (above).
  	 * We don't worry about that case for efficiency. It won't happen
  	 * often, and the elevators are able to handle it.
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1109
  	 */
52d9e6753   Tejun Heo   [BLOCK] ll_rw_blk...
1110
  	init_request_from_bio(req, bio);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1111

450991bc1   Nick Piggin   [PATCH] blk: __ma...
1112
1113
1114
  	spin_lock_irq(q->queue_lock);
  	if (elv_queue_empty(q))
  		blk_plug_device(q);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1115
1116
  	add_request(q, req);
  out:
4a534f93b   Jens Axboe   [PATCH] possible ...
1117
  	if (sync)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1118
1119
1120
1121
1122
1123
  		__generic_unplug_device(q);
  
  	spin_unlock_irq(q->queue_lock);
  	return 0;
  
  end_io:
6712ecf8f   NeilBrown   Drop 'size' argum...
1124
  	bio_endio(bio, err);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1125
1126
1127
1128
1129
1130
1131
1132
1133
  	return 0;
  }
  
  /*
   * If bio->bi_dev is a partition, remap the location
   */
  static inline void blk_partition_remap(struct bio *bio)
  {
  	struct block_device *bdev = bio->bi_bdev;
bf2de6f5a   Jens Axboe   block: Initial su...
1134
  	if (bio_sectors(bio) && bdev != bdev->bd_contains) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1135
  		struct hd_struct *p = bdev->bd_part;
a362357b6   Jens Axboe   [BLOCK] Unify the...
1136
1137
1138
1139
  		const int rw = bio_data_dir(bio);
  
  		p->sectors[rw] += bio_sectors(bio);
  		p->ios[rw]++;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1140

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1141
1142
  		bio->bi_sector += p->start_sect;
  		bio->bi_bdev = bdev->bd_contains;
c7149d6bc   Alan D. Brunelle   Fix remap handlin...
1143
1144
1145
1146
  
  		blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio,
  				    bdev->bd_dev, bio->bi_sector,
  				    bio->bi_sector - p->start_sect);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1147
1148
  	}
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
  static void handle_bad_sector(struct bio *bio)
  {
  	char b[BDEVNAME_SIZE];
  
  	printk(KERN_INFO "attempt to access beyond end of device
  ");
  	printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu
  ",
  			bdevname(bio->bi_bdev, b),
  			bio->bi_rw,
  			(unsigned long long)bio->bi_sector + bio_sectors(bio),
  			(long long)(bio->bi_bdev->bd_inode->i_size >> 9));
  
  	set_bit(BIO_EOF, &bio->bi_flags);
  }
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
  #ifdef CONFIG_FAIL_MAKE_REQUEST
  
  static DECLARE_FAULT_ATTR(fail_make_request);
  
  static int __init setup_fail_make_request(char *str)
  {
  	return setup_fault_attr(&fail_make_request, str);
  }
  __setup("fail_make_request=", setup_fail_make_request);
  
  static int should_fail_request(struct bio *bio)
  {
  	if ((bio->bi_bdev->bd_disk->flags & GENHD_FL_FAIL) ||
  	    (bio->bi_bdev->bd_part && bio->bi_bdev->bd_part->make_it_fail))
  		return should_fail(&fail_make_request, bio->bi_size);
  
  	return 0;
  }
  
  static int __init fail_make_request_debugfs(void)
  {
  	return init_fault_attr_dentries(&fail_make_request,
  					"fail_make_request");
  }
  
  late_initcall(fail_make_request_debugfs);
  
  #else /* CONFIG_FAIL_MAKE_REQUEST */
  
  static inline int should_fail_request(struct bio *bio)
  {
  	return 0;
  }
  
  #endif /* CONFIG_FAIL_MAKE_REQUEST */
c07e2b412   Jens Axboe   block: factor our...
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
  /*
   * Check whether this bio extends beyond the end of the device.
   */
  static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
  {
  	sector_t maxsector;
  
  	if (!nr_sectors)
  		return 0;
  
  	/* Test device or partition size, when known. */
  	maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
  	if (maxsector) {
  		sector_t sector = bio->bi_sector;
  
  		if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
  			/*
  			 * This may well happen - the kernel calls bread()
  			 * without checking the size of the device, e.g., when
  			 * mounting a device.
  			 */
  			handle_bad_sector(bio);
  			return 1;
  		}
  	}
  
  	return 0;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
  /**
   * generic_make_request: hand a buffer to its device driver for I/O
   * @bio:  The bio describing the location in memory and on the device.
   *
   * generic_make_request() is used to make I/O requests of block
   * devices. It is passed a &struct bio, which describes the I/O that needs
   * to be done.
   *
   * generic_make_request() does not return any status.  The
   * success/failure status of the request, along with notification of
   * completion, is delivered asynchronously through the bio->bi_end_io
   * function described (one day) else where.
   *
   * The caller of generic_make_request must make sure that bi_io_vec
   * are set to describe the memory buffer, and that bi_dev and bi_sector are
   * set to describe the device address, and the
   * bi_end_io and optionally bi_private are set to describe how
   * completion notification should be signaled.
   *
   * generic_make_request and the drivers it calls may use bi_next if this
   * bio happens to be merged with someone else, and may change bi_dev and
   * bi_sector for remaps as it sees fit.  So the values of these fields
   * should NOT be depended on after the call to generic_make_request.
   */
d89d87965   Neil Brown   When stacked bloc...
1251
  static inline void __generic_make_request(struct bio *bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1252
  {
165125e1e   Jens Axboe   [BLOCK] Get rid o...
1253
  	struct request_queue *q;
5ddfe9691   NeilBrown   [PATCH] md: check...
1254
  	sector_t old_sector;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1255
  	int ret, nr_sectors = bio_sectors(bio);
2056a782f   Jens Axboe   [PATCH] Block que...
1256
  	dev_t old_dev;
51fd77bd9   Jens Axboe   [BLOCK] Don't all...
1257
  	int err = -EIO;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1258
1259
  
  	might_sleep();
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1260

c07e2b412   Jens Axboe   block: factor our...
1261
1262
  	if (bio_check_eod(bio, nr_sectors))
  		goto end_io;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1263
1264
1265
1266
1267
1268
1269
1270
1271
  
  	/*
  	 * Resolve the mapping until finished. (drivers are
  	 * still free to implement/resolve their own stacking
  	 * by explicitly returning 0)
  	 *
  	 * NOTE: we don't repeat the blk_size check for each new device.
  	 * Stacking drivers are expected to know what they are doing.
  	 */
5ddfe9691   NeilBrown   [PATCH] md: check...
1272
  	old_sector = -1;
2056a782f   Jens Axboe   [PATCH] Block que...
1273
  	old_dev = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
  	do {
  		char b[BDEVNAME_SIZE];
  
  		q = bdev_get_queue(bio->bi_bdev);
  		if (!q) {
  			printk(KERN_ERR
  			       "generic_make_request: Trying to access "
  				"nonexistent block-device %s (%Lu)
  ",
  				bdevname(bio->bi_bdev, b),
  				(long long) bio->bi_sector);
  end_io:
51fd77bd9   Jens Axboe   [BLOCK] Don't all...
1286
  			bio_endio(bio, err);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1287
1288
  			break;
  		}
4fa253f33   Jens Axboe   block: ll_rw_blk....
1289
  		if (unlikely(nr_sectors > q->max_hw_sectors)) {
6728cb0e6   Jens Axboe   block: make core ...
1290
1291
  			printk(KERN_ERR "bio too big device %s (%u > %u)
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1292
1293
1294
1295
1296
  				bdevname(bio->bi_bdev, b),
  				bio_sectors(bio),
  				q->max_hw_sectors);
  			goto end_io;
  		}
fde6ad224   Nick Piggin   [PATCH] blk: bran...
1297
  		if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1298
  			goto end_io;
c17bb4951   Akinobu Mita   [PATCH] fault-inj...
1299
1300
  		if (should_fail_request(bio))
  			goto end_io;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1301
1302
1303
1304
1305
  		/*
  		 * If this device has partitions, remap block n
  		 * of partition p to block n+start(p) of the disk.
  		 */
  		blk_partition_remap(bio);
5ddfe9691   NeilBrown   [PATCH] md: check...
1306
  		if (old_sector != -1)
4fa253f33   Jens Axboe   block: ll_rw_blk....
1307
  			blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
5ddfe9691   NeilBrown   [PATCH] md: check...
1308
  					    old_sector);
2056a782f   Jens Axboe   [PATCH] Block que...
1309
1310
  
  		blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
5ddfe9691   NeilBrown   [PATCH] md: check...
1311
  		old_sector = bio->bi_sector;
2056a782f   Jens Axboe   [PATCH] Block que...
1312
  		old_dev = bio->bi_bdev->bd_dev;
c07e2b412   Jens Axboe   block: factor our...
1313
1314
  		if (bio_check_eod(bio, nr_sectors))
  			goto end_io;
51fd77bd9   Jens Axboe   [BLOCK] Don't all...
1315
1316
1317
1318
  		if (bio_empty_barrier(bio) && !q->prepare_flush_fn) {
  			err = -EOPNOTSUPP;
  			goto end_io;
  		}
5ddfe9691   NeilBrown   [PATCH] md: check...
1319

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1320
1321
1322
  		ret = q->make_request_fn(q, bio);
  	} while (ret);
  }
d89d87965   Neil Brown   When stacked bloc...
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
  /*
   * We only want one ->make_request_fn to be active at a time,
   * else stack usage with stacked devices could be a problem.
   * So use current->bio_{list,tail} to keep a list of requests
   * submited by a make_request_fn function.
   * current->bio_tail is also used as a flag to say if
   * generic_make_request is currently active in this task or not.
   * If it is NULL, then no make_request is active.  If it is non-NULL,
   * then a make_request is active, and new requests should be added
   * at the tail
   */
  void generic_make_request(struct bio *bio)
  {
  	if (current->bio_tail) {
  		/* make_request is active */
  		*(current->bio_tail) = bio;
  		bio->bi_next = NULL;
  		current->bio_tail = &bio->bi_next;
  		return;
  	}
  	/* following loop may be a bit non-obvious, and so deserves some
  	 * explanation.
  	 * Before entering the loop, bio->bi_next is NULL (as all callers
  	 * ensure that) so we have a list with a single bio.
  	 * We pretend that we have just taken it off a longer list, so
  	 * we assign bio_list to the next (which is NULL) and bio_tail
  	 * to &bio_list, thus initialising the bio_list of new bios to be
  	 * added.  __generic_make_request may indeed add some more bios
  	 * through a recursive call to generic_make_request.  If it
  	 * did, we find a non-NULL value in bio_list and re-enter the loop
  	 * from the top.  In this case we really did just take the bio
  	 * of the top of the list (no pretending) and so fixup bio_list and
  	 * bio_tail or bi_next, and call into __generic_make_request again.
  	 *
  	 * The loop was structured like this to make only one call to
  	 * __generic_make_request (which is important as it is large and
  	 * inlined) and to keep the structure simple.
  	 */
  	BUG_ON(bio->bi_next);
  	do {
  		current->bio_list = bio->bi_next;
  		if (bio->bi_next == NULL)
  			current->bio_tail = &current->bio_list;
  		else
  			bio->bi_next = NULL;
  		__generic_make_request(bio);
  		bio = current->bio_list;
  	} while (bio);
  	current->bio_tail = NULL; /* deactivate */
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
  EXPORT_SYMBOL(generic_make_request);
  
  /**
   * submit_bio: submit a bio to the block device layer for I/O
   * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
   * @bio: The &struct bio which describes the I/O
   *
   * submit_bio() is very similar in purpose to generic_make_request(), and
   * uses that function to do most of the work. Both are fairly rough
   * interfaces, @bio must be presetup and ready for I/O.
   *
   */
  void submit_bio(int rw, struct bio *bio)
  {
  	int count = bio_sectors(bio);
22e2c507c   Jens Axboe   [PATCH] Update cf...
1388
  	bio->bi_rw |= rw;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1389

bf2de6f5a   Jens Axboe   block: Initial su...
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
  	/*
  	 * If it's a regular read/write or a barrier with data attached,
  	 * go through the normal accounting stuff before submission.
  	 */
  	if (!bio_empty_barrier(bio)) {
  
  		BIO_BUG_ON(!bio->bi_size);
  		BIO_BUG_ON(!bio->bi_io_vec);
  
  		if (rw & WRITE) {
  			count_vm_events(PGPGOUT, count);
  		} else {
  			task_io_account_read(bio->bi_size);
  			count_vm_events(PGPGIN, count);
  		}
  
  		if (unlikely(block_dump)) {
  			char b[BDEVNAME_SIZE];
  			printk(KERN_DEBUG "%s(%d): %s block %Lu on %s
  ",
ba25f9dcc   Pavel Emelyanov   Use helpers to ob...
1410
  			current->comm, task_pid_nr(current),
bf2de6f5a   Jens Axboe   block: Initial su...
1411
1412
  				(rw & WRITE) ? "WRITE" : "READ",
  				(unsigned long long)bio->bi_sector,
6728cb0e6   Jens Axboe   block: make core ...
1413
  				bdevname(bio->bi_bdev, b));
bf2de6f5a   Jens Axboe   block: Initial su...
1414
  		}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1415
1416
1417
1418
  	}
  
  	generic_make_request(bio);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1419
  EXPORT_SYMBOL(submit_bio);
3bcddeac1   Kiyoshi Ueda   blk_end_request: ...
1420
1421
1422
  /**
   * __end_that_request_first - end I/O on a request
   * @req:      the request being processed
5450d3e1d   Kiyoshi Ueda   blk_end_request: ...
1423
   * @error:    0 for success, < 0 for error
3bcddeac1   Kiyoshi Ueda   blk_end_request: ...
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
   * @nr_bytes: number of bytes to complete
   *
   * Description:
   *     Ends I/O on a number of bytes attached to @req, and sets it up
   *     for the next range of segments (if any) in the cluster.
   *
   * Return:
   *     0 - we are done with this request, call end_that_request_last()
   *     1 - still buffers pending for this request
   **/
5450d3e1d   Kiyoshi Ueda   blk_end_request: ...
1434
  static int __end_that_request_first(struct request *req, int error,
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1435
1436
  				    int nr_bytes)
  {
5450d3e1d   Kiyoshi Ueda   blk_end_request: ...
1437
  	int total_bytes, bio_nbytes, next_idx = 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1438
  	struct bio *bio;
2056a782f   Jens Axboe   [PATCH] Block que...
1439
  	blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1440
  	/*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1441
1442
1443
1444
1445
  	 * for a REQ_BLOCK_PC request, we want to carry any eventual
  	 * sense key with us all the way through
  	 */
  	if (!blk_pc_request(req))
  		req->errors = 0;
6728cb0e6   Jens Axboe   block: make core ...
1446
1447
1448
  	if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {
  		printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu
  ",
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1449
1450
1451
  				req->rq_disk ? req->rq_disk->disk_name : "?",
  				(unsigned long long)req->sector);
  	}
d72d904a5   Jens Axboe   [BLOCK] Update re...
1452
  	if (blk_fs_request(req) && req->rq_disk) {
a362357b6   Jens Axboe   [BLOCK] Unify the...
1453
  		const int rw = rq_data_dir(req);
53e86061b   Jens Axboe   [BLOCK] ll_rw_blk...
1454
  		disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9);
d72d904a5   Jens Axboe   [BLOCK] Update re...
1455
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1456
1457
1458
  	total_bytes = bio_nbytes = 0;
  	while ((bio = req->bio) != NULL) {
  		int nbytes;
bf2de6f5a   Jens Axboe   block: Initial su...
1459
1460
1461
1462
1463
1464
1465
  		/*
  		 * For an empty barrier request, the low level driver must
  		 * store a potential error location in ->sector. We pass
  		 * that back up in ->bi_sector.
  		 */
  		if (blk_empty_barrier(req))
  			bio->bi_sector = req->sector;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1466
1467
1468
  		if (nr_bytes >= bio->bi_size) {
  			req->bio = bio->bi_next;
  			nbytes = bio->bi_size;
5bb23a688   NeilBrown   Don't decrement b...
1469
  			req_bio_endio(req, bio, nbytes, error);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1470
1471
1472
1473
1474
1475
1476
  			next_idx = 0;
  			bio_nbytes = 0;
  		} else {
  			int idx = bio->bi_idx + next_idx;
  
  			if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {
  				blk_dump_rq_flags(req, "__end_that");
6728cb0e6   Jens Axboe   block: make core ...
1477
1478
1479
1480
  				printk(KERN_ERR "%s: bio idx %d >= vcnt %d
  ",
  						__FUNCTION__, bio->bi_idx,
  						bio->bi_vcnt);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
  				break;
  			}
  
  			nbytes = bio_iovec_idx(bio, idx)->bv_len;
  			BIO_BUG_ON(nbytes > bio->bi_size);
  
  			/*
  			 * not a complete bvec done
  			 */
  			if (unlikely(nbytes > nr_bytes)) {
  				bio_nbytes += nr_bytes;
  				total_bytes += nr_bytes;
  				break;
  			}
  
  			/*
  			 * advance to the next vector
  			 */
  			next_idx++;
  			bio_nbytes += nbytes;
  		}
  
  		total_bytes += nbytes;
  		nr_bytes -= nbytes;
6728cb0e6   Jens Axboe   block: make core ...
1505
1506
  		bio = req->bio;
  		if (bio) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
  			/*
  			 * end more in this run, or just return 'not-done'
  			 */
  			if (unlikely(nr_bytes <= 0))
  				break;
  		}
  	}
  
  	/*
  	 * completely done
  	 */
  	if (!req->bio)
  		return 0;
  
  	/*
  	 * if the request wasn't completed, update state
  	 */
  	if (bio_nbytes) {
5bb23a688   NeilBrown   Don't decrement b...
1525
  		req_bio_endio(req, bio, bio_nbytes, error);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1526
1527
1528
1529
1530
1531
1532
1533
1534
  		bio->bi_idx += next_idx;
  		bio_iovec(bio)->bv_offset += nr_bytes;
  		bio_iovec(bio)->bv_len -= nr_bytes;
  	}
  
  	blk_recalc_rq_sectors(req, total_bytes >> 9);
  	blk_recalc_rq_segments(req);
  	return 1;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1535
  /*
ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
1536
1537
1538
1539
1540
   * splice the completion data to a local structure and hand off to
   * process_completion_queue() to complete the requests
   */
  static void blk_done_softirq(struct softirq_action *h)
  {
626ab0e69   Oleg Nesterov   [PATCH] list: use...
1541
  	struct list_head *cpu_list, local_list;
ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
1542
1543
1544
  
  	local_irq_disable();
  	cpu_list = &__get_cpu_var(blk_cpu_done);
626ab0e69   Oleg Nesterov   [PATCH] list: use...
1545
  	list_replace_init(cpu_list, &local_list);
ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
1546
1547
1548
  	local_irq_enable();
  
  	while (!list_empty(&local_list)) {
6728cb0e6   Jens Axboe   block: make core ...
1549
  		struct request *rq;
ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
1550

6728cb0e6   Jens Axboe   block: make core ...
1551
  		rq = list_entry(local_list.next, struct request, donelist);
ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
1552
1553
1554
1555
  		list_del_init(&rq->donelist);
  		rq->q->softirq_done_fn(rq);
  	}
  }
6728cb0e6   Jens Axboe   block: make core ...
1556
1557
  static int __cpuinit blk_cpu_notify(struct notifier_block *self,
  				    unsigned long action, void *hcpu)
ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
1558
1559
1560
1561
1562
  {
  	/*
  	 * If a CPU goes away, splice its entries to the current CPU
  	 * and trigger a run of the softirq
  	 */
8bb784428   Rafael J. Wysocki   Add suspend-relat...
1563
  	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
  		int cpu = (unsigned long) hcpu;
  
  		local_irq_disable();
  		list_splice_init(&per_cpu(blk_cpu_done, cpu),
  				 &__get_cpu_var(blk_cpu_done));
  		raise_softirq_irqoff(BLOCK_SOFTIRQ);
  		local_irq_enable();
  	}
  
  	return NOTIFY_OK;
  }
db47d4753   Satyam Sharma   ll_rw_blk: blk_cp...
1575
  static struct notifier_block blk_cpu_notifier __cpuinitdata = {
ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
1576
1577
  	.notifier_call	= blk_cpu_notify,
  };
ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
1578
1579
1580
1581
1582
1583
  /**
   * blk_complete_request - end I/O on a request
   * @req:      the request being processed
   *
   * Description:
   *     Ends all I/O on a request. It does not handle partial completions,
d6e05edc5   Andreas Mohr   spelling fixes
1584
   *     unless the driver actually implements this in its completion callback
4fa253f33   Jens Axboe   block: ll_rw_blk....
1585
   *     through requeueing. The actual completion happens out-of-order,
ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
   *     through a softirq handler. The user must have registered a completion
   *     callback through blk_queue_softirq_done().
   **/
  
  void blk_complete_request(struct request *req)
  {
  	struct list_head *cpu_list;
  	unsigned long flags;
  
  	BUG_ON(!req->q->softirq_done_fn);
6728cb0e6   Jens Axboe   block: make core ...
1596

ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
1597
1598
1599
1600
1601
1602
1603
1604
  	local_irq_save(flags);
  
  	cpu_list = &__get_cpu_var(blk_cpu_done);
  	list_add_tail(&req->donelist, cpu_list);
  	raise_softirq_irqoff(BLOCK_SOFTIRQ);
  
  	local_irq_restore(flags);
  }
ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
1605
  EXPORT_SYMBOL(blk_complete_request);
6728cb0e6   Jens Axboe   block: make core ...
1606

ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
1607
  /*
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1608
1609
   * queue lock must be held
   */
5450d3e1d   Kiyoshi Ueda   blk_end_request: ...
1610
  static void end_that_request_last(struct request *req, int error)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1611
1612
  {
  	struct gendisk *disk = req->rq_disk;
8ffdc6550   Tejun Heo   [BLOCK] add @upto...
1613

b8286239d   Kiyoshi Ueda   blk_end_request: ...
1614
1615
1616
1617
1618
  	if (blk_rq_tagged(req))
  		blk_queue_end_tag(req->q, req);
  
  	if (blk_queued_rq(req))
  		blkdev_dequeue_request(req);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1619
1620
1621
  
  	if (unlikely(laptop_mode) && blk_fs_request(req))
  		laptop_io_completion();
fd0ff8aa1   Jens Axboe   [PATCH] blk: fix ...
1622
1623
1624
1625
1626
1627
  	/*
  	 * Account IO completion.  bar_rq isn't accounted as a normal
  	 * IO on queueing nor completion.  Accounting the containing
  	 * request is enough.
  	 */
  	if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1628
  		unsigned long duration = jiffies - req->start_time;
a362357b6   Jens Axboe   [BLOCK] Unify the...
1629
1630
1631
1632
  		const int rw = rq_data_dir(req);
  
  		__disk_stat_inc(disk, ios[rw]);
  		__disk_stat_add(disk, ticks[rw], duration);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1633
1634
1635
  		disk_round_stats(disk);
  		disk->in_flight--;
  	}
b8286239d   Kiyoshi Ueda   blk_end_request: ...
1636

1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1637
  	if (req->end_io)
8ffdc6550   Tejun Heo   [BLOCK] add @upto...
1638
  		req->end_io(req, error);
b8286239d   Kiyoshi Ueda   blk_end_request: ...
1639
1640
1641
  	else {
  		if (blk_bidi_rq(req))
  			__blk_put_request(req->next_rq->q, req->next_rq);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1642
  		__blk_put_request(req->q, req);
b8286239d   Kiyoshi Ueda   blk_end_request: ...
1643
  	}
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1644
  }
a0cd12854   Jens Axboe   block: add end_qu...
1645
  static inline void __end_request(struct request *rq, int uptodate,
9e6e39f2c   Kiyoshi Ueda   blk_end_request: ...
1646
  				 unsigned int nr_bytes)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1647
  {
9e6e39f2c   Kiyoshi Ueda   blk_end_request: ...
1648
1649
1650
1651
1652
1653
  	int error = 0;
  
  	if (uptodate <= 0)
  		error = uptodate ? uptodate : -EIO;
  
  	__blk_end_request(rq, error, nr_bytes);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1654
  }
3b11313a6   Kiyoshi Ueda   blk_end_request: ...
1655
1656
1657
1658
  /**
   * blk_rq_bytes - Returns bytes left to complete in the entire request
   **/
  unsigned int blk_rq_bytes(struct request *rq)
a0cd12854   Jens Axboe   block: add end_qu...
1659
1660
1661
1662
1663
1664
  {
  	if (blk_fs_request(rq))
  		return rq->hard_nr_sectors << 9;
  
  	return rq->data_len;
  }
3b11313a6   Kiyoshi Ueda   blk_end_request: ...
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
  EXPORT_SYMBOL_GPL(blk_rq_bytes);
  
  /**
   * blk_rq_cur_bytes - Returns bytes left to complete in the current segment
   **/
  unsigned int blk_rq_cur_bytes(struct request *rq)
  {
  	if (blk_fs_request(rq))
  		return rq->current_nr_sectors << 9;
  
  	if (rq->bio)
  		return rq->bio->bi_size;
  
  	return rq->data_len;
  }
  EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);
a0cd12854   Jens Axboe   block: add end_qu...
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
  
  /**
   * end_queued_request - end all I/O on a queued request
   * @rq:		the request being processed
   * @uptodate:	error value or 0/1 uptodate flag
   *
   * Description:
   *     Ends all I/O on a request, and removes it from the block layer queues.
   *     Not suitable for normal IO completion, unless the driver still has
   *     the request attached to the block layer.
   *
   **/
  void end_queued_request(struct request *rq, int uptodate)
  {
9e6e39f2c   Kiyoshi Ueda   blk_end_request: ...
1695
  	__end_request(rq, uptodate, blk_rq_bytes(rq));
a0cd12854   Jens Axboe   block: add end_qu...
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
  }
  EXPORT_SYMBOL(end_queued_request);
  
  /**
   * end_dequeued_request - end all I/O on a dequeued request
   * @rq:		the request being processed
   * @uptodate:	error value or 0/1 uptodate flag
   *
   * Description:
   *     Ends all I/O on a request. The request must already have been
   *     dequeued using blkdev_dequeue_request(), as is normally the case
   *     for most drivers.
   *
   **/
  void end_dequeued_request(struct request *rq, int uptodate)
  {
9e6e39f2c   Kiyoshi Ueda   blk_end_request: ...
1712
  	__end_request(rq, uptodate, blk_rq_bytes(rq));
a0cd12854   Jens Axboe   block: add end_qu...
1713
1714
1715
1716
1717
1718
  }
  EXPORT_SYMBOL(end_dequeued_request);
  
  
  /**
   * end_request - end I/O on the current segment of the request
8f731f7d8   Randy Dunlap   kernel-api docboo...
1719
   * @req:	the request being processed
a0cd12854   Jens Axboe   block: add end_qu...
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
   * @uptodate:	error value or 0/1 uptodate flag
   *
   * Description:
   *     Ends I/O on the current segment of a request. If that is the only
   *     remaining segment, the request is also completed and freed.
   *
   *     This is a remnant of how older block drivers handled IO completions.
   *     Modern drivers typically end IO on the full request in one go, unless
   *     they have a residual value to account for. For that case this function
   *     isn't really useful, unless the residual just happens to be the
   *     full current segment. In other words, don't use this function in new
   *     code. Either use end_request_completely(), or the
   *     end_that_request_chunk() (along with end_that_request_last()) for
   *     partial completions.
   *
   **/
  void end_request(struct request *req, int uptodate)
  {
9e6e39f2c   Kiyoshi Ueda   blk_end_request: ...
1738
  	__end_request(req, uptodate, req->hard_cur_sectors << 9);
a0cd12854   Jens Axboe   block: add end_qu...
1739
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1740
  EXPORT_SYMBOL(end_request);
336cdb400   Kiyoshi Ueda   blk_end_request: ...
1741
  /**
e19a3ab05   Kiyoshi Ueda   blk_end_request: ...
1742
1743
1744
   * blk_end_io - Generic end_io function to complete a request.
   * @rq:           the request being processed
   * @error:        0 for success, < 0 for error
e3a04fe34   Kiyoshi Ueda   blk_end_request: ...
1745
1746
   * @nr_bytes:     number of bytes to complete @rq
   * @bidi_bytes:   number of bytes to complete @rq->next_rq
e19a3ab05   Kiyoshi Ueda   blk_end_request: ...
1747
1748
1749
1750
   * @drv_callback: function called between completion of bios in the request
   *                and completion of the request.
   *                If the callback returns non 0, this helper returns without
   *                completion of the request.
336cdb400   Kiyoshi Ueda   blk_end_request: ...
1751
1752
   *
   * Description:
e3a04fe34   Kiyoshi Ueda   blk_end_request: ...
1753
   *     Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
336cdb400   Kiyoshi Ueda   blk_end_request: ...
1754
1755
1756
1757
   *     If @rq has leftover, sets it up for the next range of segments.
   *
   * Return:
   *     0 - we are done with this request
e19a3ab05   Kiyoshi Ueda   blk_end_request: ...
1758
   *     1 - this request is not freed yet, it still has pending buffers.
336cdb400   Kiyoshi Ueda   blk_end_request: ...
1759
   **/
22b132102   Jens Axboe   block: new end re...
1760
1761
1762
  static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
  		      unsigned int bidi_bytes,
  		      int (drv_callback)(struct request *))
336cdb400   Kiyoshi Ueda   blk_end_request: ...
1763
1764
1765
  {
  	struct request_queue *q = rq->q;
  	unsigned long flags = 0UL;
336cdb400   Kiyoshi Ueda   blk_end_request: ...
1766
1767
  
  	if (blk_fs_request(rq) || blk_pc_request(rq)) {
5450d3e1d   Kiyoshi Ueda   blk_end_request: ...
1768
  		if (__end_that_request_first(rq, error, nr_bytes))
336cdb400   Kiyoshi Ueda   blk_end_request: ...
1769
  			return 1;
e3a04fe34   Kiyoshi Ueda   blk_end_request: ...
1770
1771
1772
  
  		/* Bidi request must be completed as a whole */
  		if (blk_bidi_rq(rq) &&
5450d3e1d   Kiyoshi Ueda   blk_end_request: ...
1773
  		    __end_that_request_first(rq->next_rq, error, bidi_bytes))
e3a04fe34   Kiyoshi Ueda   blk_end_request: ...
1774
  			return 1;
336cdb400   Kiyoshi Ueda   blk_end_request: ...
1775
  	}
e19a3ab05   Kiyoshi Ueda   blk_end_request: ...
1776
1777
1778
  	/* Special feature for tricky drivers */
  	if (drv_callback && drv_callback(rq))
  		return 1;
336cdb400   Kiyoshi Ueda   blk_end_request: ...
1779
1780
1781
  	add_disk_randomness(rq->rq_disk);
  
  	spin_lock_irqsave(q->queue_lock, flags);
b8286239d   Kiyoshi Ueda   blk_end_request: ...
1782
  	end_that_request_last(rq, error);
336cdb400   Kiyoshi Ueda   blk_end_request: ...
1783
1784
1785
1786
  	spin_unlock_irqrestore(q->queue_lock, flags);
  
  	return 0;
  }
e19a3ab05   Kiyoshi Ueda   blk_end_request: ...
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
  
  /**
   * blk_end_request - Helper function for drivers to complete the request.
   * @rq:       the request being processed
   * @error:    0 for success, < 0 for error
   * @nr_bytes: number of bytes to complete
   *
   * Description:
   *     Ends I/O on a number of bytes attached to @rq.
   *     If @rq has leftover, sets it up for the next range of segments.
   *
   * Return:
   *     0 - we are done with this request
   *     1 - still buffers pending for this request
   **/
22b132102   Jens Axboe   block: new end re...
1802
  int blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
e19a3ab05   Kiyoshi Ueda   blk_end_request: ...
1803
  {
e3a04fe34   Kiyoshi Ueda   blk_end_request: ...
1804
  	return blk_end_io(rq, error, nr_bytes, 0, NULL);
e19a3ab05   Kiyoshi Ueda   blk_end_request: ...
1805
  }
336cdb400   Kiyoshi Ueda   blk_end_request: ...
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
  EXPORT_SYMBOL_GPL(blk_end_request);
  
  /**
   * __blk_end_request - Helper function for drivers to complete the request.
   * @rq:       the request being processed
   * @error:    0 for success, < 0 for error
   * @nr_bytes: number of bytes to complete
   *
   * Description:
   *     Must be called with queue lock held unlike blk_end_request().
   *
   * Return:
   *     0 - we are done with this request
   *     1 - still buffers pending for this request
   **/
22b132102   Jens Axboe   block: new end re...
1821
  int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
336cdb400   Kiyoshi Ueda   blk_end_request: ...
1822
  {
336cdb400   Kiyoshi Ueda   blk_end_request: ...
1823
  	if (blk_fs_request(rq) || blk_pc_request(rq)) {
5450d3e1d   Kiyoshi Ueda   blk_end_request: ...
1824
  		if (__end_that_request_first(rq, error, nr_bytes))
336cdb400   Kiyoshi Ueda   blk_end_request: ...
1825
1826
1827
1828
  			return 1;
  	}
  
  	add_disk_randomness(rq->rq_disk);
b8286239d   Kiyoshi Ueda   blk_end_request: ...
1829
  	end_that_request_last(rq, error);
336cdb400   Kiyoshi Ueda   blk_end_request: ...
1830
1831
1832
1833
  
  	return 0;
  }
  EXPORT_SYMBOL_GPL(__blk_end_request);
e19a3ab05   Kiyoshi Ueda   blk_end_request: ...
1834
  /**
e3a04fe34   Kiyoshi Ueda   blk_end_request: ...
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
   * blk_end_bidi_request - Helper function for drivers to complete bidi request.
   * @rq:         the bidi request being processed
   * @error:      0 for success, < 0 for error
   * @nr_bytes:   number of bytes to complete @rq
   * @bidi_bytes: number of bytes to complete @rq->next_rq
   *
   * Description:
   *     Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
   *
   * Return:
   *     0 - we are done with this request
   *     1 - still buffers pending for this request
   **/
22b132102   Jens Axboe   block: new end re...
1848
1849
  int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,
  			 unsigned int bidi_bytes)
e3a04fe34   Kiyoshi Ueda   blk_end_request: ...
1850
1851
1852
1853
1854
1855
  {
  	return blk_end_io(rq, error, nr_bytes, bidi_bytes, NULL);
  }
  EXPORT_SYMBOL_GPL(blk_end_bidi_request);
  
  /**
e19a3ab05   Kiyoshi Ueda   blk_end_request: ...
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
   * blk_end_request_callback - Special helper function for tricky drivers
   * @rq:           the request being processed
   * @error:        0 for success, < 0 for error
   * @nr_bytes:     number of bytes to complete
   * @drv_callback: function called between completion of bios in the request
   *                and completion of the request.
   *                If the callback returns non 0, this helper returns without
   *                completion of the request.
   *
   * Description:
   *     Ends I/O on a number of bytes attached to @rq.
   *     If @rq has leftover, sets it up for the next range of segments.
   *
   *     This special helper function is used only for existing tricky drivers.
   *     (e.g. cdrom_newpc_intr() of ide-cd)
   *     This interface will be removed when such drivers are rewritten.
   *     Don't use this interface in other places anymore.
   *
   * Return:
   *     0 - we are done with this request
   *     1 - this request is not freed yet.
   *         this request still has pending buffers or
   *         the driver doesn't want to finish this request yet.
   **/
22b132102   Jens Axboe   block: new end re...
1880
1881
  int blk_end_request_callback(struct request *rq, int error,
  			     unsigned int nr_bytes,
e19a3ab05   Kiyoshi Ueda   blk_end_request: ...
1882
1883
  			     int (drv_callback)(struct request *))
  {
e3a04fe34   Kiyoshi Ueda   blk_end_request: ...
1884
  	return blk_end_io(rq, error, nr_bytes, 0, drv_callback);
e19a3ab05   Kiyoshi Ueda   blk_end_request: ...
1885
1886
  }
  EXPORT_SYMBOL_GPL(blk_end_request_callback);
86db1e297   Jens Axboe   block: continue l...
1887
1888
  void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
  		     struct bio *bio)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1889
  {
4aff5e233   Jens Axboe   [PATCH] Split str...
1890
1891
  	/* first two bits are identical in rq->cmd_flags and bio->bi_rw */
  	rq->cmd_flags |= (bio->bi_rw & 3);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1892
1893
1894
1895
1896
1897
1898
  
  	rq->nr_phys_segments = bio_phys_segments(q, bio);
  	rq->nr_hw_segments = bio_hw_segments(q, bio);
  	rq->current_nr_sectors = bio_cur_sectors(bio);
  	rq->hard_cur_sectors = rq->current_nr_sectors;
  	rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
  	rq->buffer = bio_data(bio);
0e75f9063   Mike Christie   [PATCH] block: su...
1899
  	rq->data_len = bio->bi_size;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1900
1901
  
  	rq->bio = rq->biotail = bio;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1902

66846572b   NeilBrown   Stop exporting bl...
1903
1904
1905
  	if (bio->bi_bdev)
  		rq->rq_disk = bio->bi_bdev->bd_disk;
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1906
1907
1908
1909
1910
  
  int kblockd_schedule_work(struct work_struct *work)
  {
  	return queue_work(kblockd_workqueue, work);
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1911
  EXPORT_SYMBOL(kblockd_schedule_work);
19a75d83f   Andrew Morton   kblockd: use flus...
1912
  void kblockd_flush_work(struct work_struct *work)
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1913
  {
28e53bddf   Oleg Nesterov   unify flush_work/...
1914
  	cancel_work_sync(work);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1915
  }
19a75d83f   Andrew Morton   kblockd: use flus...
1916
  EXPORT_SYMBOL(kblockd_flush_work);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1917
1918
1919
  
  int __init blk_dev_init(void)
  {
ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
1920
  	int i;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1921
1922
1923
1924
1925
1926
  	kblockd_workqueue = create_workqueue("kblockd");
  	if (!kblockd_workqueue)
  		panic("Failed to create kblockd
  ");
  
  	request_cachep = kmem_cache_create("blkdev_requests",
20c2df83d   Paul Mundt   mm: Remove slab d...
1927
  			sizeof(struct request), 0, SLAB_PANIC, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1928

8324aa91d   Jens Axboe   block: split tag ...
1929
  	blk_requestq_cachep = kmem_cache_create("blkdev_queue",
165125e1e   Jens Axboe   [BLOCK] Get rid o...
1930
  			sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1931

0a9450227   KAMEZAWA Hiroyuki   [PATCH] for_each_...
1932
  	for_each_possible_cpu(i)
ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
1933
1934
1935
  		INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
  
  	open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
5a67e4c5b   Chandra Seetharaman   [PATCH] cpu hotpl...
1936
  	register_hotcpu_notifier(&blk_cpu_notifier);
ff856bad6   Jens Axboe   [BLOCK] ll_rw_blk...
1937

d38ecf935   Jens Axboe   io context sharin...
1938
  	return 0;
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1939
  }
1da177e4c   Linus Torvalds   Linux-2.6.12-rc2
1940