Blame view

block/blk-barrier.c 9.81 KB
86db1e297   Jens Axboe   block: continue l...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
  /*
   * Functions related to barrier IO handling
   */
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/bio.h>
  #include <linux/blkdev.h>
  
  #include "blk.h"
  
  /**
   * blk_queue_ordered - does this queue support ordered writes
   * @q:        the request queue
   * @ordered:  one of QUEUE_ORDERED_*
   * @prepare_flush_fn: rq setup helper for cache flush ordered writes
   *
   * Description:
   *   For journalled file systems, doing ordered writes on a commit
   *   block instead of explicitly doing wait_on_buffer (which is bad
   *   for performance) can be a big win. Block drivers supporting this
   *   feature should call this function and indicate so.
   *
   **/
  int blk_queue_ordered(struct request_queue *q, unsigned ordered,
  		      prepare_flush_fn *prepare_flush_fn)
  {
313e42999   Tejun Heo   block: reorganize...
27
28
  	if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH |
  					     QUEUE_ORDERED_DO_POSTFLUSH))) {
24c03d47d   Harvey Harrison   block: remove rem...
29
30
  		printk(KERN_ERR "%s: prepare_flush_fn required
  ", __func__);
86db1e297   Jens Axboe   block: continue l...
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
  		return -EINVAL;
  	}
  
  	if (ordered != QUEUE_ORDERED_NONE &&
  	    ordered != QUEUE_ORDERED_DRAIN &&
  	    ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
  	    ordered != QUEUE_ORDERED_DRAIN_FUA &&
  	    ordered != QUEUE_ORDERED_TAG &&
  	    ordered != QUEUE_ORDERED_TAG_FLUSH &&
  	    ordered != QUEUE_ORDERED_TAG_FUA) {
  		printk(KERN_ERR "blk_queue_ordered: bad value %d
  ", ordered);
  		return -EINVAL;
  	}
  
  	q->ordered = ordered;
  	q->next_ordered = ordered;
  	q->prepare_flush_fn = prepare_flush_fn;
  
  	return 0;
  }
86db1e297   Jens Axboe   block: continue l...
52
53
54
55
56
  EXPORT_SYMBOL(blk_queue_ordered);
  
  /*
   * Cache flushing for ordered writes handling
   */
6f6a036e6   Adrian Bunk   block/blk-barrier...
57
  unsigned blk_ordered_cur_seq(struct request_queue *q)
86db1e297   Jens Axboe   block: continue l...
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
  {
  	if (!q->ordseq)
  		return 0;
  	return 1 << ffz(q->ordseq);
  }
  
  unsigned blk_ordered_req_seq(struct request *rq)
  {
  	struct request_queue *q = rq->q;
  
  	BUG_ON(q->ordseq == 0);
  
  	if (rq == &q->pre_flush_rq)
  		return QUEUE_ORDSEQ_PREFLUSH;
  	if (rq == &q->bar_rq)
  		return QUEUE_ORDSEQ_BAR;
  	if (rq == &q->post_flush_rq)
  		return QUEUE_ORDSEQ_POSTFLUSH;
  
  	/*
  	 * !fs requests don't need to follow barrier ordering.  Always
  	 * put them at the front.  This fixes the following deadlock.
  	 *
  	 * http://thread.gmane.org/gmane.linux.kernel/537473
  	 */
  	if (!blk_fs_request(rq))
  		return QUEUE_ORDSEQ_DRAIN;
  
  	if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
  	    (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
  		return QUEUE_ORDSEQ_DRAIN;
  	else
  		return QUEUE_ORDSEQ_DONE;
  }
8f11b3e99   Tejun Heo   block: make barri...
92
  bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
86db1e297   Jens Axboe   block: continue l...
93
94
95
96
97
98
99
100
101
102
  {
  	struct request *rq;
  
  	if (error && !q->orderr)
  		q->orderr = error;
  
  	BUG_ON(q->ordseq & seq);
  	q->ordseq |= seq;
  
  	if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
8f11b3e99   Tejun Heo   block: make barri...
103
  		return false;
86db1e297   Jens Axboe   block: continue l...
104
105
106
107
108
109
110
111
112
  
  	/*
  	 * Okay, sequence complete.
  	 */
  	q->ordseq = 0;
  	rq = q->orig_bar_rq;
  
  	if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq)))
  		BUG();
8f11b3e99   Tejun Heo   block: make barri...
113
114
  
  	return true;
86db1e297   Jens Axboe   block: continue l...
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
  }
  
  static void pre_flush_end_io(struct request *rq, int error)
  {
  	elv_completed_request(rq->q, rq);
  	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
  }
  
  static void bar_end_io(struct request *rq, int error)
  {
  	elv_completed_request(rq->q, rq);
  	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
  }
  
  static void post_flush_end_io(struct request *rq, int error)
  {
  	elv_completed_request(rq->q, rq);
  	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
  }
  
  static void queue_flush(struct request_queue *q, unsigned which)
  {
  	struct request *rq;
  	rq_end_io_fn *end_io;
313e42999   Tejun Heo   block: reorganize...
139
  	if (which == QUEUE_ORDERED_DO_PREFLUSH) {
86db1e297   Jens Axboe   block: continue l...
140
141
142
143
144
145
  		rq = &q->pre_flush_rq;
  		end_io = pre_flush_end_io;
  	} else {
  		rq = &q->post_flush_rq;
  		end_io = post_flush_end_io;
  	}
2a4aa30c5   FUJITA Tomonori   block: rename and...
146
  	blk_rq_init(q, rq);
1afb20f30   FUJITA Tomonori   block: make rq_in...
147
  	rq->cmd_flags = REQ_HARDBARRIER;
86db1e297   Jens Axboe   block: continue l...
148
149
150
151
152
153
  	rq->rq_disk = q->bar_rq.rq_disk;
  	rq->end_io = end_io;
  	q->prepare_flush_fn(q, rq);
  
  	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
  }
8f11b3e99   Tejun Heo   block: make barri...
154
  static inline bool start_ordered(struct request_queue *q, struct request **rqp)
86db1e297   Jens Axboe   block: continue l...
155
  {
8f11b3e99   Tejun Heo   block: make barri...
156
157
  	struct request *rq = *rqp;
  	unsigned skip = 0;
86db1e297   Jens Axboe   block: continue l...
158
159
160
  	q->orderr = 0;
  	q->ordered = q->next_ordered;
  	q->ordseq |= QUEUE_ORDSEQ_STARTED;
58eea927d   Tejun Heo   block: simplify e...
161
162
163
164
  	/*
  	 * For an empty barrier, there's no actual BAR request, which
  	 * in turn makes POSTFLUSH unnecessary.  Mask them off.
  	 */
a185eb4bc   Tejun Heo   block: fix empty ...
165
  	if (!rq->hard_nr_sectors) {
58eea927d   Tejun Heo   block: simplify e...
166
167
  		q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
  				QUEUE_ORDERED_DO_POSTFLUSH);
a185eb4bc   Tejun Heo   block: fix empty ...
168
169
170
171
172
173
174
175
176
177
178
179
  		/*
  		 * Empty barrier on a write-through device w/ ordered
  		 * tag has no command to issue and without any command
  		 * to issue, ordering by tag can't be used.  Drain
  		 * instead.
  		 */
  		if ((q->ordered & QUEUE_ORDERED_BY_TAG) &&
  		    !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) {
  			q->ordered &= ~QUEUE_ORDERED_BY_TAG;
  			q->ordered |= QUEUE_ORDERED_BY_DRAIN;
  		}
  	}
58eea927d   Tejun Heo   block: simplify e...
180

f671620e7   Tejun Heo   block: make every...
181
  	/* stash away the original request */
53a08807c   Tejun Heo   block: internal d...
182
  	elv_dequeue_request(q, rq);
86db1e297   Jens Axboe   block: continue l...
183
  	q->orig_bar_rq = rq;
f671620e7   Tejun Heo   block: make every...
184
  	rq = NULL;
86db1e297   Jens Axboe   block: continue l...
185
186
187
188
189
  
  	/*
  	 * Queue ordered sequence.  As we stack them at the head, we
  	 * need to queue in reverse order.  Note that we rely on that
  	 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
58eea927d   Tejun Heo   block: simplify e...
190
  	 * request gets inbetween ordered sequence.
86db1e297   Jens Axboe   block: continue l...
191
  	 */
58eea927d   Tejun Heo   block: simplify e...
192
  	if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
313e42999   Tejun Heo   block: reorganize...
193
  		queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
f671620e7   Tejun Heo   block: make every...
194
195
  		rq = &q->post_flush_rq;
  	} else
8f11b3e99   Tejun Heo   block: make barri...
196
  		skip |= QUEUE_ORDSEQ_POSTFLUSH;
86db1e297   Jens Axboe   block: continue l...
197

f671620e7   Tejun Heo   block: make every...
198
199
200
201
202
203
204
205
206
207
208
209
210
211
  	if (q->ordered & QUEUE_ORDERED_DO_BAR) {
  		rq = &q->bar_rq;
  
  		/* initialize proxy request and queue it */
  		blk_rq_init(q, rq);
  		if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
  			rq->cmd_flags |= REQ_RW;
  		if (q->ordered & QUEUE_ORDERED_DO_FUA)
  			rq->cmd_flags |= REQ_FUA;
  		init_request_from_bio(rq, q->orig_bar_rq->bio);
  		rq->end_io = bar_end_io;
  
  		elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
  	} else
8f11b3e99   Tejun Heo   block: make barri...
212
  		skip |= QUEUE_ORDSEQ_BAR;
86db1e297   Jens Axboe   block: continue l...
213

313e42999   Tejun Heo   block: reorganize...
214
215
  	if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
  		queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
86db1e297   Jens Axboe   block: continue l...
216
217
  		rq = &q->pre_flush_rq;
  	} else
8f11b3e99   Tejun Heo   block: make barri...
218
  		skip |= QUEUE_ORDSEQ_PREFLUSH;
86db1e297   Jens Axboe   block: continue l...
219

f671620e7   Tejun Heo   block: make every...
220
  	if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && q->in_flight)
86db1e297   Jens Axboe   block: continue l...
221
  		rq = NULL;
f671620e7   Tejun Heo   block: make every...
222
  	else
8f11b3e99   Tejun Heo   block: make barri...
223
  		skip |= QUEUE_ORDSEQ_DRAIN;
86db1e297   Jens Axboe   block: continue l...
224

8f11b3e99   Tejun Heo   block: make barri...
225
226
227
228
229
230
231
  	*rqp = rq;
  
  	/*
  	 * Complete skipped sequences.  If whole sequence is complete,
  	 * return false to tell elevator that this request is gone.
  	 */
  	return !blk_ordered_complete_seq(q, skip, 0);
86db1e297   Jens Axboe   block: continue l...
232
  }
8f11b3e99   Tejun Heo   block: make barri...
233
  bool blk_do_ordered(struct request_queue *q, struct request **rqp)
86db1e297   Jens Axboe   block: continue l...
234
235
236
237
238
239
  {
  	struct request *rq = *rqp;
  	const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
  
  	if (!q->ordseq) {
  		if (!is_barrier)
8f11b3e99   Tejun Heo   block: make barri...
240
  			return true;
86db1e297   Jens Axboe   block: continue l...
241

8f11b3e99   Tejun Heo   block: make barri...
242
243
244
  		if (q->next_ordered != QUEUE_ORDERED_NONE)
  			return start_ordered(q, rqp);
  		else {
86db1e297   Jens Axboe   block: continue l...
245
  			/*
a7384677b   Tejun Heo   block: remove dup...
246
247
  			 * Queue ordering not supported.  Terminate
  			 * with prejudice.
86db1e297   Jens Axboe   block: continue l...
248
  			 */
53a08807c   Tejun Heo   block: internal d...
249
  			elv_dequeue_request(q, rq);
86db1e297   Jens Axboe   block: continue l...
250
251
252
253
  			if (__blk_end_request(rq, -EOPNOTSUPP,
  					      blk_rq_bytes(rq)))
  				BUG();
  			*rqp = NULL;
8f11b3e99   Tejun Heo   block: make barri...
254
  			return false;
86db1e297   Jens Axboe   block: continue l...
255
256
257
258
259
260
261
262
263
264
  		}
  	}
  
  	/*
  	 * Ordered sequence in progress
  	 */
  
  	/* Special requests are not subject to ordering rules. */
  	if (!blk_fs_request(rq) &&
  	    rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
8f11b3e99   Tejun Heo   block: make barri...
265
  		return true;
86db1e297   Jens Axboe   block: continue l...
266

313e42999   Tejun Heo   block: reorganize...
267
  	if (q->ordered & QUEUE_ORDERED_BY_TAG) {
86db1e297   Jens Axboe   block: continue l...
268
269
270
271
272
273
274
275
276
  		/* Ordered by tag.  Blocking the next barrier is enough. */
  		if (is_barrier && rq != &q->bar_rq)
  			*rqp = NULL;
  	} else {
  		/* Ordered by draining.  Wait for turn. */
  		WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
  		if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
  			*rqp = NULL;
  	}
8f11b3e99   Tejun Heo   block: make barri...
277
  	return true;
86db1e297   Jens Axboe   block: continue l...
278
279
280
281
  }
  
  static void bio_end_empty_barrier(struct bio *bio, int err)
  {
cc66b4512   Jens Axboe   block: fix blkdev...
282
283
284
  	if (err) {
  		if (err == -EOPNOTSUPP)
  			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
86db1e297   Jens Axboe   block: continue l...
285
  		clear_bit(BIO_UPTODATE, &bio->bi_flags);
cc66b4512   Jens Axboe   block: fix blkdev...
286
  	}
86db1e297   Jens Axboe   block: continue l...
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
  
  	complete(bio->bi_private);
  }
  
  /**
   * blkdev_issue_flush - queue a flush
   * @bdev:	blockdev to issue flush for
   * @error_sector:	error sector
   *
   * Description:
   *    Issue a flush for the block device in question. Caller can supply
   *    room for storing the error offset in case of a flush error, if they
   *    wish to.  Caller must run wait_for_completion() on its own.
   */
  int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
  {
  	DECLARE_COMPLETION_ONSTACK(wait);
  	struct request_queue *q;
  	struct bio *bio;
  	int ret;
  
  	if (bdev->bd_disk == NULL)
  		return -ENXIO;
  
  	q = bdev_get_queue(bdev);
  	if (!q)
  		return -ENXIO;
  
  	bio = bio_alloc(GFP_KERNEL, 0);
  	if (!bio)
  		return -ENOMEM;
  
  	bio->bi_end_io = bio_end_empty_barrier;
  	bio->bi_private = &wait;
  	bio->bi_bdev = bdev;
2ebca85ab   OGAWA Hirofumi   Use WRITE_BARRIER...
322
  	submit_bio(WRITE_BARRIER, bio);
86db1e297   Jens Axboe   block: continue l...
323
324
325
326
327
328
329
330
331
332
333
334
  
  	wait_for_completion(&wait);
  
  	/*
  	 * The driver must store the error location in ->bi_sector, if
  	 * it supports it. For non-stacked drivers, this should be copied
  	 * from rq->sector.
  	 */
  	if (error_sector)
  		*error_sector = bio->bi_sector;
  
  	ret = 0;
cc66b4512   Jens Axboe   block: fix blkdev...
335
336
337
  	if (bio_flagged(bio, BIO_EOPNOTSUPP))
  		ret = -EOPNOTSUPP;
  	else if (!bio_flagged(bio, BIO_UPTODATE))
86db1e297   Jens Axboe   block: continue l...
338
339
340
341
342
  		ret = -EIO;
  
  	bio_put(bio);
  	return ret;
  }
86db1e297   Jens Axboe   block: continue l...
343
  EXPORT_SYMBOL(blkdev_issue_flush);
fb2dce862   David Woodhouse   Add 'discard' req...
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
  
  static void blkdev_discard_end_io(struct bio *bio, int err)
  {
  	if (err) {
  		if (err == -EOPNOTSUPP)
  			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
  		clear_bit(BIO_UPTODATE, &bio->bi_flags);
  	}
  
  	bio_put(bio);
  }
  
  /**
   * blkdev_issue_discard - queue a discard
   * @bdev:	blockdev to issue discard for
   * @sector:	start sector
   * @nr_sects:	number of sectors to discard
3e6053d76   Hugh Dickins   block: adjust blk...
361
   * @gfp_mask:	memory allocation flags (for bio_alloc)
fb2dce862   David Woodhouse   Add 'discard' req...
362
363
364
365
   *
   * Description:
   *    Issue a discard request for the sectors in question. Does not wait.
   */
3e6053d76   Hugh Dickins   block: adjust blk...
366
367
  int blkdev_issue_discard(struct block_device *bdev,
  			 sector_t sector, sector_t nr_sects, gfp_t gfp_mask)
fb2dce862   David Woodhouse   Add 'discard' req...
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
  {
  	struct request_queue *q;
  	struct bio *bio;
  	int ret = 0;
  
  	if (bdev->bd_disk == NULL)
  		return -ENXIO;
  
  	q = bdev_get_queue(bdev);
  	if (!q)
  		return -ENXIO;
  
  	if (!q->prepare_discard_fn)
  		return -EOPNOTSUPP;
  
  	while (nr_sects && !ret) {
3e6053d76   Hugh Dickins   block: adjust blk...
384
  		bio = bio_alloc(gfp_mask, 0);
fb2dce862   David Woodhouse   Add 'discard' req...
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
  		if (!bio)
  			return -ENOMEM;
  
  		bio->bi_end_io = blkdev_discard_end_io;
  		bio->bi_bdev = bdev;
  
  		bio->bi_sector = sector;
  
  		if (nr_sects > q->max_hw_sectors) {
  			bio->bi_size = q->max_hw_sectors << 9;
  			nr_sects -= q->max_hw_sectors;
  			sector += q->max_hw_sectors;
  		} else {
  			bio->bi_size = nr_sects << 9;
  			nr_sects = 0;
  		}
  		bio_get(bio);
e17fc0a1c   David Woodhouse   Allow elevators t...
402
  		submit_bio(DISCARD_BARRIER, bio);
fb2dce862   David Woodhouse   Add 'discard' req...
403
404
405
406
407
408
409
410
411
412
413
  
  		/* Check if it failed immediately */
  		if (bio_flagged(bio, BIO_EOPNOTSUPP))
  			ret = -EOPNOTSUPP;
  		else if (!bio_flagged(bio, BIO_UPTODATE))
  			ret = -EIO;
  		bio_put(bio);
  	}
  	return ret;
  }
  EXPORT_SYMBOL(blkdev_issue_discard);