Blame view

block/blk-barrier.c 10.6 KB
86db1e297   Jens Axboe   block: continue l...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
  /*
   * Functions related to barrier IO handling
   */
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/bio.h>
  #include <linux/blkdev.h>
  
  #include "blk.h"
  
  /**
   * blk_queue_ordered - does this queue support ordered writes
   * @q:        the request queue
   * @ordered:  one of QUEUE_ORDERED_*
   * @prepare_flush_fn: rq setup helper for cache flush ordered writes
   *
   * Description:
   *   For journalled file systems, doing ordered writes on a commit
   *   block instead of explicitly doing wait_on_buffer (which is bad
   *   for performance) can be a big win. Block drivers supporting this
   *   feature should call this function and indicate so.
   *
   **/
  int blk_queue_ordered(struct request_queue *q, unsigned ordered,
  		      prepare_flush_fn *prepare_flush_fn)
  {
313e42999   Tejun Heo   block: reorganize...
27
28
  	if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH |
  					     QUEUE_ORDERED_DO_POSTFLUSH))) {
24c03d47d   Harvey Harrison   block: remove rem...
29
30
  		printk(KERN_ERR "%s: prepare_flush_fn required
  ", __func__);
86db1e297   Jens Axboe   block: continue l...
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
  		return -EINVAL;
  	}
  
  	if (ordered != QUEUE_ORDERED_NONE &&
  	    ordered != QUEUE_ORDERED_DRAIN &&
  	    ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
  	    ordered != QUEUE_ORDERED_DRAIN_FUA &&
  	    ordered != QUEUE_ORDERED_TAG &&
  	    ordered != QUEUE_ORDERED_TAG_FLUSH &&
  	    ordered != QUEUE_ORDERED_TAG_FUA) {
  		printk(KERN_ERR "blk_queue_ordered: bad value %d
  ", ordered);
  		return -EINVAL;
  	}
  
  	q->ordered = ordered;
  	q->next_ordered = ordered;
  	q->prepare_flush_fn = prepare_flush_fn;
  
  	return 0;
  }
86db1e297   Jens Axboe   block: continue l...
52
53
54
55
56
  EXPORT_SYMBOL(blk_queue_ordered);
  
  /*
   * Cache flushing for ordered writes handling
   */
6f6a036e6   Adrian Bunk   block/blk-barrier...
57
  unsigned blk_ordered_cur_seq(struct request_queue *q)
86db1e297   Jens Axboe   block: continue l...
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
  {
  	if (!q->ordseq)
  		return 0;
  	return 1 << ffz(q->ordseq);
  }
  
  unsigned blk_ordered_req_seq(struct request *rq)
  {
  	struct request_queue *q = rq->q;
  
  	BUG_ON(q->ordseq == 0);
  
  	if (rq == &q->pre_flush_rq)
  		return QUEUE_ORDSEQ_PREFLUSH;
  	if (rq == &q->bar_rq)
  		return QUEUE_ORDSEQ_BAR;
  	if (rq == &q->post_flush_rq)
  		return QUEUE_ORDSEQ_POSTFLUSH;
  
  	/*
  	 * !fs requests don't need to follow barrier ordering.  Always
  	 * put them at the front.  This fixes the following deadlock.
  	 *
  	 * http://thread.gmane.org/gmane.linux.kernel/537473
  	 */
  	if (!blk_fs_request(rq))
  		return QUEUE_ORDSEQ_DRAIN;
  
  	if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
  	    (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
  		return QUEUE_ORDSEQ_DRAIN;
  	else
  		return QUEUE_ORDSEQ_DONE;
  }
8f11b3e99   Tejun Heo   block: make barri...
92
  bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
86db1e297   Jens Axboe   block: continue l...
93
94
95
96
97
98
99
100
101
102
  {
  	struct request *rq;
  
  	if (error && !q->orderr)
  		q->orderr = error;
  
  	BUG_ON(q->ordseq & seq);
  	q->ordseq |= seq;
  
  	if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
8f11b3e99   Tejun Heo   block: make barri...
103
  		return false;
86db1e297   Jens Axboe   block: continue l...
104
105
106
107
108
109
  
  	/*
  	 * Okay, sequence complete.
  	 */
  	q->ordseq = 0;
  	rq = q->orig_bar_rq;
40cbbb781   Tejun Heo   block: implement ...
110
  	__blk_end_request_all(rq, q->orderr);
8f11b3e99   Tejun Heo   block: make barri...
111
  	return true;
86db1e297   Jens Axboe   block: continue l...
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
  }
  
  static void pre_flush_end_io(struct request *rq, int error)
  {
  	elv_completed_request(rq->q, rq);
  	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
  }
  
  static void bar_end_io(struct request *rq, int error)
  {
  	elv_completed_request(rq->q, rq);
  	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
  }
  
  static void post_flush_end_io(struct request *rq, int error)
  {
  	elv_completed_request(rq->q, rq);
  	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
  }
  
  static void queue_flush(struct request_queue *q, unsigned which)
  {
  	struct request *rq;
  	rq_end_io_fn *end_io;
313e42999   Tejun Heo   block: reorganize...
136
  	if (which == QUEUE_ORDERED_DO_PREFLUSH) {
86db1e297   Jens Axboe   block: continue l...
137
138
139
140
141
142
  		rq = &q->pre_flush_rq;
  		end_io = pre_flush_end_io;
  	} else {
  		rq = &q->post_flush_rq;
  		end_io = post_flush_end_io;
  	}
2a4aa30c5   FUJITA Tomonori   block: rename and...
143
  	blk_rq_init(q, rq);
1afb20f30   FUJITA Tomonori   block: make rq_in...
144
  	rq->cmd_flags = REQ_HARDBARRIER;
86db1e297   Jens Axboe   block: continue l...
145
146
147
148
149
150
  	rq->rq_disk = q->bar_rq.rq_disk;
  	rq->end_io = end_io;
  	q->prepare_flush_fn(q, rq);
  
  	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
  }
8f11b3e99   Tejun Heo   block: make barri...
151
  static inline bool start_ordered(struct request_queue *q, struct request **rqp)
86db1e297   Jens Axboe   block: continue l...
152
  {
8f11b3e99   Tejun Heo   block: make barri...
153
154
  	struct request *rq = *rqp;
  	unsigned skip = 0;
86db1e297   Jens Axboe   block: continue l...
155
156
157
  	q->orderr = 0;
  	q->ordered = q->next_ordered;
  	q->ordseq |= QUEUE_ORDSEQ_STARTED;
58eea927d   Tejun Heo   block: simplify e...
158
159
160
161
  	/*
  	 * For an empty barrier, there's no actual BAR request, which
  	 * in turn makes POSTFLUSH unnecessary.  Mask them off.
  	 */
5b93629b4   Tejun Heo   block: implement ...
162
  	if (!blk_rq_sectors(rq)) {
58eea927d   Tejun Heo   block: simplify e...
163
164
  		q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
  				QUEUE_ORDERED_DO_POSTFLUSH);
a185eb4bc   Tejun Heo   block: fix empty ...
165
166
167
168
169
170
171
172
173
174
175
176
  		/*
  		 * Empty barrier on a write-through device w/ ordered
  		 * tag has no command to issue and without any command
  		 * to issue, ordering by tag can't be used.  Drain
  		 * instead.
  		 */
  		if ((q->ordered & QUEUE_ORDERED_BY_TAG) &&
  		    !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) {
  			q->ordered &= ~QUEUE_ORDERED_BY_TAG;
  			q->ordered |= QUEUE_ORDERED_BY_DRAIN;
  		}
  	}
58eea927d   Tejun Heo   block: simplify e...
177

f671620e7   Tejun Heo   block: make every...
178
  	/* stash away the original request */
9934c8c04   Tejun Heo   block: implement ...
179
  	blk_dequeue_request(rq);
86db1e297   Jens Axboe   block: continue l...
180
  	q->orig_bar_rq = rq;
f671620e7   Tejun Heo   block: make every...
181
  	rq = NULL;
86db1e297   Jens Axboe   block: continue l...
182
183
184
185
186
  
  	/*
  	 * Queue ordered sequence.  As we stack them at the head, we
  	 * need to queue in reverse order.  Note that we rely on that
  	 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
58eea927d   Tejun Heo   block: simplify e...
187
  	 * request gets inbetween ordered sequence.
86db1e297   Jens Axboe   block: continue l...
188
  	 */
58eea927d   Tejun Heo   block: simplify e...
189
  	if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
313e42999   Tejun Heo   block: reorganize...
190
  		queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
f671620e7   Tejun Heo   block: make every...
191
192
  		rq = &q->post_flush_rq;
  	} else
8f11b3e99   Tejun Heo   block: make barri...
193
  		skip |= QUEUE_ORDSEQ_POSTFLUSH;
86db1e297   Jens Axboe   block: continue l...
194

f671620e7   Tejun Heo   block: make every...
195
196
197
198
199
200
201
202
203
204
205
206
207
208
  	if (q->ordered & QUEUE_ORDERED_DO_BAR) {
  		rq = &q->bar_rq;
  
  		/* initialize proxy request and queue it */
  		blk_rq_init(q, rq);
  		if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
  			rq->cmd_flags |= REQ_RW;
  		if (q->ordered & QUEUE_ORDERED_DO_FUA)
  			rq->cmd_flags |= REQ_FUA;
  		init_request_from_bio(rq, q->orig_bar_rq->bio);
  		rq->end_io = bar_end_io;
  
  		elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
  	} else
8f11b3e99   Tejun Heo   block: make barri...
209
  		skip |= QUEUE_ORDSEQ_BAR;
86db1e297   Jens Axboe   block: continue l...
210

313e42999   Tejun Heo   block: reorganize...
211
212
  	if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
  		queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
86db1e297   Jens Axboe   block: continue l...
213
214
  		rq = &q->pre_flush_rq;
  	} else
8f11b3e99   Tejun Heo   block: make barri...
215
  		skip |= QUEUE_ORDSEQ_PREFLUSH;
86db1e297   Jens Axboe   block: continue l...
216

0a7ae2ff0   Jens Axboe   block: change the...
217
  	if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q))
86db1e297   Jens Axboe   block: continue l...
218
  		rq = NULL;
f671620e7   Tejun Heo   block: make every...
219
  	else
8f11b3e99   Tejun Heo   block: make barri...
220
  		skip |= QUEUE_ORDSEQ_DRAIN;
86db1e297   Jens Axboe   block: continue l...
221

8f11b3e99   Tejun Heo   block: make barri...
222
223
224
225
226
227
228
  	*rqp = rq;
  
  	/*
  	 * Complete skipped sequences.  If whole sequence is complete,
  	 * return false to tell elevator that this request is gone.
  	 */
  	return !blk_ordered_complete_seq(q, skip, 0);
86db1e297   Jens Axboe   block: continue l...
229
  }
8f11b3e99   Tejun Heo   block: make barri...
230
  bool blk_do_ordered(struct request_queue *q, struct request **rqp)
86db1e297   Jens Axboe   block: continue l...
231
232
233
234
235
236
  {
  	struct request *rq = *rqp;
  	const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
  
  	if (!q->ordseq) {
  		if (!is_barrier)
8f11b3e99   Tejun Heo   block: make barri...
237
  			return true;
86db1e297   Jens Axboe   block: continue l...
238

8f11b3e99   Tejun Heo   block: make barri...
239
240
241
  		if (q->next_ordered != QUEUE_ORDERED_NONE)
  			return start_ordered(q, rqp);
  		else {
86db1e297   Jens Axboe   block: continue l...
242
  			/*
a7384677b   Tejun Heo   block: remove dup...
243
244
  			 * Queue ordering not supported.  Terminate
  			 * with prejudice.
86db1e297   Jens Axboe   block: continue l...
245
  			 */
9934c8c04   Tejun Heo   block: implement ...
246
  			blk_dequeue_request(rq);
40cbbb781   Tejun Heo   block: implement ...
247
  			__blk_end_request_all(rq, -EOPNOTSUPP);
86db1e297   Jens Axboe   block: continue l...
248
  			*rqp = NULL;
8f11b3e99   Tejun Heo   block: make barri...
249
  			return false;
86db1e297   Jens Axboe   block: continue l...
250
251
252
253
254
255
256
257
258
259
  		}
  	}
  
  	/*
  	 * Ordered sequence in progress
  	 */
  
  	/* Special requests are not subject to ordering rules. */
  	if (!blk_fs_request(rq) &&
  	    rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
8f11b3e99   Tejun Heo   block: make barri...
260
  		return true;
86db1e297   Jens Axboe   block: continue l...
261

313e42999   Tejun Heo   block: reorganize...
262
  	if (q->ordered & QUEUE_ORDERED_BY_TAG) {
86db1e297   Jens Axboe   block: continue l...
263
264
265
266
267
268
269
270
271
  		/* Ordered by tag.  Blocking the next barrier is enough. */
  		if (is_barrier && rq != &q->bar_rq)
  			*rqp = NULL;
  	} else {
  		/* Ordered by draining.  Wait for turn. */
  		WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
  		if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
  			*rqp = NULL;
  	}
8f11b3e99   Tejun Heo   block: make barri...
272
  	return true;
86db1e297   Jens Axboe   block: continue l...
273
274
275
276
  }
  
  static void bio_end_empty_barrier(struct bio *bio, int err)
  {
cc66b4512   Jens Axboe   block: fix blkdev...
277
278
279
  	if (err) {
  		if (err == -EOPNOTSUPP)
  			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
86db1e297   Jens Axboe   block: continue l...
280
  		clear_bit(BIO_UPTODATE, &bio->bi_flags);
cc66b4512   Jens Axboe   block: fix blkdev...
281
  	}
86db1e297   Jens Axboe   block: continue l...
282
283
284
285
286
287
288
289
290
291
292
293
  
  	complete(bio->bi_private);
  }
  
  /**
   * blkdev_issue_flush - queue a flush
   * @bdev:	blockdev to issue flush for
   * @error_sector:	error sector
   *
   * Description:
   *    Issue a flush for the block device in question. Caller can supply
   *    room for storing the error offset in case of a flush error, if they
dbdac9b71   Theodore Ts'o   block: Fix docume...
294
   *    wish to.
86db1e297   Jens Axboe   block: continue l...
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
   */
  int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
  {
  	DECLARE_COMPLETION_ONSTACK(wait);
  	struct request_queue *q;
  	struct bio *bio;
  	int ret;
  
  	if (bdev->bd_disk == NULL)
  		return -ENXIO;
  
  	q = bdev_get_queue(bdev);
  	if (!q)
  		return -ENXIO;
  
  	bio = bio_alloc(GFP_KERNEL, 0);
86db1e297   Jens Axboe   block: continue l...
311
312
313
  	bio->bi_end_io = bio_end_empty_barrier;
  	bio->bi_private = &wait;
  	bio->bi_bdev = bdev;
2ebca85ab   OGAWA Hirofumi   Use WRITE_BARRIER...
314
  	submit_bio(WRITE_BARRIER, bio);
86db1e297   Jens Axboe   block: continue l...
315
316
317
318
319
320
  
  	wait_for_completion(&wait);
  
  	/*
  	 * The driver must store the error location in ->bi_sector, if
  	 * it supports it. For non-stacked drivers, this should be copied
83096ebf1   Tejun Heo   block: convert to...
321
  	 * from blk_rq_pos(rq).
86db1e297   Jens Axboe   block: continue l...
322
323
324
325
326
  	 */
  	if (error_sector)
  		*error_sector = bio->bi_sector;
  
  	ret = 0;
cc66b4512   Jens Axboe   block: fix blkdev...
327
328
329
  	if (bio_flagged(bio, BIO_EOPNOTSUPP))
  		ret = -EOPNOTSUPP;
  	else if (!bio_flagged(bio, BIO_UPTODATE))
86db1e297   Jens Axboe   block: continue l...
330
331
332
333
334
  		ret = -EIO;
  
  	bio_put(bio);
  	return ret;
  }
86db1e297   Jens Axboe   block: continue l...
335
  EXPORT_SYMBOL(blkdev_issue_flush);
fb2dce862   David Woodhouse   Add 'discard' req...
336
337
338
339
340
341
342
343
  
  static void blkdev_discard_end_io(struct bio *bio, int err)
  {
  	if (err) {
  		if (err == -EOPNOTSUPP)
  			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
  		clear_bit(BIO_UPTODATE, &bio->bi_flags);
  	}
746cd1e7e   Christoph Hellwig   block: use blkdev...
344
345
  	if (bio->bi_private)
  		complete(bio->bi_private);
c15227de1   Christoph Hellwig   block: use normal...
346
  	__free_page(bio_page(bio));
746cd1e7e   Christoph Hellwig   block: use blkdev...
347

fb2dce862   David Woodhouse   Add 'discard' req...
348
349
350
351
352
353
354
355
  	bio_put(bio);
  }
  
  /**
   * blkdev_issue_discard - queue a discard
   * @bdev:	blockdev to issue discard for
   * @sector:	start sector
   * @nr_sects:	number of sectors to discard
3e6053d76   Hugh Dickins   block: adjust blk...
356
   * @gfp_mask:	memory allocation flags (for bio_alloc)
746cd1e7e   Christoph Hellwig   block: use blkdev...
357
   * @flags:	DISCARD_FL_* flags to control behaviour
fb2dce862   David Woodhouse   Add 'discard' req...
358
359
   *
   * Description:
746cd1e7e   Christoph Hellwig   block: use blkdev...
360
   *    Issue a discard request for the sectors in question.
fb2dce862   David Woodhouse   Add 'discard' req...
361
   */
746cd1e7e   Christoph Hellwig   block: use blkdev...
362
363
  int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
  		sector_t nr_sects, gfp_t gfp_mask, int flags)
fb2dce862   David Woodhouse   Add 'discard' req...
364
  {
746cd1e7e   Christoph Hellwig   block: use blkdev...
365
366
367
368
  	DECLARE_COMPLETION_ONSTACK(wait);
  	struct request_queue *q = bdev_get_queue(bdev);
  	int type = flags & DISCARD_FL_BARRIER ?
  		DISCARD_BARRIER : DISCARD_NOBARRIER;
c15227de1   Christoph Hellwig   block: use normal...
369
370
  	struct bio *bio;
  	struct page *page;
fb2dce862   David Woodhouse   Add 'discard' req...
371
  	int ret = 0;
fb2dce862   David Woodhouse   Add 'discard' req...
372
373
  	if (!q)
  		return -ENXIO;
c15227de1   Christoph Hellwig   block: use normal...
374
  	if (!blk_queue_discard(q))
fb2dce862   David Woodhouse   Add 'discard' req...
375
376
377
  		return -EOPNOTSUPP;
  
  	while (nr_sects && !ret) {
c15227de1   Christoph Hellwig   block: use normal...
378
  		unsigned int sector_size = q->limits.logical_block_size;
67efc9258   Christoph Hellwig   block: allow larg...
379
380
  		unsigned int max_discard_sectors =
  			min(q->limits.max_discard_sectors, UINT_MAX >> 9);
fb2dce862   David Woodhouse   Add 'discard' req...
381

c15227de1   Christoph Hellwig   block: use normal...
382
383
384
385
  		bio = bio_alloc(gfp_mask, 1);
  		if (!bio)
  			goto out;
  		bio->bi_sector = sector;
fb2dce862   David Woodhouse   Add 'discard' req...
386
387
  		bio->bi_end_io = blkdev_discard_end_io;
  		bio->bi_bdev = bdev;
746cd1e7e   Christoph Hellwig   block: use blkdev...
388
389
  		if (flags & DISCARD_FL_WAIT)
  			bio->bi_private = &wait;
fb2dce862   David Woodhouse   Add 'discard' req...
390

c15227de1   Christoph Hellwig   block: use normal...
391
392
393
394
395
396
397
398
399
400
  		/*
  		 * Add a zeroed one-sector payload as that's what
  		 * our current implementations need.  If we'll ever need
  		 * more the interface will need revisiting.
  		 */
  		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
  		if (!page)
  			goto out_free_bio;
  		if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
  			goto out_free_page;
fb2dce862   David Woodhouse   Add 'discard' req...
401

c15227de1   Christoph Hellwig   block: use normal...
402
403
404
405
406
  		/*
  		 * And override the bio size - the way discard works we
  		 * touch many more blocks on disk than the actual payload
  		 * length.
  		 */
67efc9258   Christoph Hellwig   block: allow larg...
407
408
409
410
  		if (nr_sects > max_discard_sectors) {
  			bio->bi_size = max_discard_sectors << 9;
  			nr_sects -= max_discard_sectors;
  			sector += max_discard_sectors;
fb2dce862   David Woodhouse   Add 'discard' req...
411
412
413
414
  		} else {
  			bio->bi_size = nr_sects << 9;
  			nr_sects = 0;
  		}
746cd1e7e   Christoph Hellwig   block: use blkdev...
415

fb2dce862   David Woodhouse   Add 'discard' req...
416
  		bio_get(bio);
746cd1e7e   Christoph Hellwig   block: use blkdev...
417
418
419
420
  		submit_bio(type, bio);
  
  		if (flags & DISCARD_FL_WAIT)
  			wait_for_completion(&wait);
fb2dce862   David Woodhouse   Add 'discard' req...
421

fb2dce862   David Woodhouse   Add 'discard' req...
422
423
424
425
426
427
428
  		if (bio_flagged(bio, BIO_EOPNOTSUPP))
  			ret = -EOPNOTSUPP;
  		else if (!bio_flagged(bio, BIO_UPTODATE))
  			ret = -EIO;
  		bio_put(bio);
  	}
  	return ret;
c15227de1   Christoph Hellwig   block: use normal...
429
430
431
432
433
434
  out_free_page:
  	__free_page(page);
  out_free_bio:
  	bio_put(bio);
  out:
  	return -ENOMEM;
fb2dce862   David Woodhouse   Add 'discard' req...
435
436
  }
  EXPORT_SYMBOL(blkdev_issue_discard);