Blame view

block/blk-flush.c 16.4 KB
86db1e297   Jens Axboe   block: continue l...
1
  /*
4fed947cb   Tejun Heo   block: implement ...
2
   * Functions to sequence FLUSH and FUA writes.
ae1b15396   Tejun Heo   block: reimplemen...
3
4
5
6
7
8
9
10
11
12
   *
   * Copyright (C) 2011		Max Planck Institute for Gravitational Physics
   * Copyright (C) 2011		Tejun Heo <tj@kernel.org>
   *
   * This file is released under the GPLv2.
   *
   * REQ_{FLUSH|FUA} requests are decomposed to sequences consisted of three
   * optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request
   * properties and hardware capability.
   *
28a8f0d31   Mike Christie   block, drivers, f...
13
14
   * If a request doesn't have data, only REQ_PREFLUSH makes sense, which
   * indicates a simple flush request.  If there is data, REQ_PREFLUSH indicates
ae1b15396   Tejun Heo   block: reimplemen...
15
16
17
18
19
20
21
22
   * that the device cache should be flushed before the data is executed, and
   * REQ_FUA means that the data must be on non-volatile media on request
   * completion.
   *
   * If the device doesn't have writeback cache, FLUSH and FUA don't make any
   * difference.  The requests are either completed immediately if there's no
   * data or executed as normal requests otherwise.
   *
28a8f0d31   Mike Christie   block, drivers, f...
23
   * If the device has writeback cache and supports FUA, REQ_PREFLUSH is
ae1b15396   Tejun Heo   block: reimplemen...
24
25
   * translated to PREFLUSH but REQ_FUA is passed down directly with DATA.
   *
28a8f0d31   Mike Christie   block, drivers, f...
26
27
   * If the device has writeback cache and doesn't support FUA, REQ_PREFLUSH
   * is translated to PREFLUSH and REQ_FUA to POSTFLUSH.
ae1b15396   Tejun Heo   block: reimplemen...
28
29
30
   *
   * The actual execution of flush is double buffered.  Whenever a request
   * needs to execute PRE or POSTFLUSH, it queues at
7c94e1c15   Ming Lei   block: introduce ...
31
   * fq->flush_queue[fq->flush_pending_idx].  Once certain criteria are met, a
3a5e02ced   Mike Christie   block, drivers: a...
32
   * REQ_OP_FLUSH is issued and the pending_idx is toggled.  When the flush
ae1b15396   Tejun Heo   block: reimplemen...
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
   * completes, all the requests which were pending are proceeded to the next
   * step.  This allows arbitrary merging of different types of FLUSH/FUA
   * requests.
   *
   * Currently, the following conditions are used to determine when to issue
   * flush.
   *
   * C1. At any given time, only one flush shall be in progress.  This makes
   *     double buffering sufficient.
   *
   * C2. Flush is deferred if any request is executing DATA of its sequence.
   *     This avoids issuing separate POSTFLUSHes for requests which shared
   *     PREFLUSH.
   *
   * C3. The second condition is ignored if there is a request which has
   *     waited longer than FLUSH_PENDING_TIMEOUT.  This is to avoid
   *     starvation in the unlikely case where there are continuous stream of
   *     FUA (without FLUSH) requests.
   *
   * For devices which support FUA, it isn't clear whether C2 (and thus C3)
   * is beneficial.
   *
   * Note that a sequenced FLUSH/FUA request with DATA is completed twice.
   * Once while executing DATA and again after the whole sequence is
   * complete.  The first completion updates the contained bio but doesn't
   * finish it so that the bio submitter is notified only after the whole
e80640213   Christoph Hellwig   block: split out ...
59
   * sequence is complete.  This is implemented by testing RQF_FLUSH_SEQ in
ae1b15396   Tejun Heo   block: reimplemen...
60
61
62
63
64
   * req_bio_endio().
   *
   * The above peculiarity requires that each FLUSH/FUA request has only one
   * bio attached to it, which is guaranteed as they aren't allowed to be
   * merged in the usual way.
86db1e297   Jens Axboe   block: continue l...
65
   */
ae1b15396   Tejun Heo   block: reimplemen...
66

86db1e297   Jens Axboe   block: continue l...
67
68
69
70
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/bio.h>
  #include <linux/blkdev.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
71
  #include <linux/gfp.h>
320ae51fe   Jens Axboe   blk-mq: new multi...
72
  #include <linux/blk-mq.h>
86db1e297   Jens Axboe   block: continue l...
73
74
  
  #include "blk.h"
320ae51fe   Jens Axboe   blk-mq: new multi...
75
  #include "blk-mq.h"
0048b4837   Ming Lei   blk-mq: fix race ...
76
  #include "blk-mq-tag.h"
86db1e297   Jens Axboe   block: continue l...
77

4fed947cb   Tejun Heo   block: implement ...
78
79
  /* FLUSH/FUA sequences */
  enum {
ae1b15396   Tejun Heo   block: reimplemen...
80
81
82
83
84
85
86
87
88
89
90
91
92
  	REQ_FSEQ_PREFLUSH	= (1 << 0), /* pre-flushing in progress */
  	REQ_FSEQ_DATA		= (1 << 1), /* data write in progress */
  	REQ_FSEQ_POSTFLUSH	= (1 << 2), /* post-flushing in progress */
  	REQ_FSEQ_DONE		= (1 << 3),
  
  	REQ_FSEQ_ACTIONS	= REQ_FSEQ_PREFLUSH | REQ_FSEQ_DATA |
  				  REQ_FSEQ_POSTFLUSH,
  
  	/*
  	 * If flush has been pending longer than the following timeout,
  	 * it's issued even if flush_data requests are still in flight.
  	 */
  	FLUSH_PENDING_TIMEOUT	= 5 * HZ,
4fed947cb   Tejun Heo   block: implement ...
93
  };
0bae352da   Ming Lei   block: flush: avo...
94
95
  static bool blk_kick_flush(struct request_queue *q,
  			   struct blk_flush_queue *fq);
28e7d1845   Tejun Heo   block: drop barri...
96

c888a8f95   Jens Axboe   block: kill off q...
97
  static unsigned int blk_flush_policy(unsigned long fflags, struct request *rq)
86db1e297   Jens Axboe   block: continue l...
98
  {
ae1b15396   Tejun Heo   block: reimplemen...
99
  	unsigned int policy = 0;
86db1e297   Jens Axboe   block: continue l...
100

fa1bf42ff   Jeff Moyer   allow blk_flush_p...
101
102
  	if (blk_rq_sectors(rq))
  		policy |= REQ_FSEQ_DATA;
c888a8f95   Jens Axboe   block: kill off q...
103
  	if (fflags & (1UL << QUEUE_FLAG_WC)) {
28a8f0d31   Mike Christie   block, drivers, f...
104
  		if (rq->cmd_flags & REQ_PREFLUSH)
ae1b15396   Tejun Heo   block: reimplemen...
105
  			policy |= REQ_FSEQ_PREFLUSH;
c888a8f95   Jens Axboe   block: kill off q...
106
107
  		if (!(fflags & (1UL << QUEUE_FLAG_FUA)) &&
  		    (rq->cmd_flags & REQ_FUA))
ae1b15396   Tejun Heo   block: reimplemen...
108
  			policy |= REQ_FSEQ_POSTFLUSH;
28e7d1845   Tejun Heo   block: drop barri...
109
  	}
ae1b15396   Tejun Heo   block: reimplemen...
110
  	return policy;
86db1e297   Jens Axboe   block: continue l...
111
  }
ae1b15396   Tejun Heo   block: reimplemen...
112
  static unsigned int blk_flush_cur_seq(struct request *rq)
47f70d5a6   Tejun Heo   block: kick queue...
113
  {
ae1b15396   Tejun Heo   block: reimplemen...
114
115
  	return 1 << ffz(rq->flush.seq);
  }
47f70d5a6   Tejun Heo   block: kick queue...
116

ae1b15396   Tejun Heo   block: reimplemen...
117
118
  static void blk_flush_restore_request(struct request *rq)
  {
47f70d5a6   Tejun Heo   block: kick queue...
119
  	/*
ae1b15396   Tejun Heo   block: reimplemen...
120
121
122
  	 * After flush data completion, @rq->bio is %NULL but we need to
  	 * complete the bio again.  @rq->biotail is guaranteed to equal the
  	 * original @rq->bio.  Restore it.
47f70d5a6   Tejun Heo   block: kick queue...
123
  	 */
ae1b15396   Tejun Heo   block: reimplemen...
124
125
126
  	rq->bio = rq->biotail;
  
  	/* make @rq a normal request */
e80640213   Christoph Hellwig   block: split out ...
127
  	rq->rq_flags &= ~RQF_FLUSH_SEQ;
4853abaae   Jeff Moyer   block: fix flush ...
128
  	rq->end_io = rq->flush.saved_end_io;
320ae51fe   Jens Axboe   blk-mq: new multi...
129
  }
10beafc19   Mike Snitzer   block: change flu...
130
  static bool blk_flush_queue_rq(struct request *rq, bool add_front)
320ae51fe   Jens Axboe   blk-mq: new multi...
131
  {
18741986a   Christoph Hellwig   blk-mq: rework fl...
132
  	if (rq->q->mq_ops) {
2b053aca7   Bart Van Assche   blk-mq: Add a kic...
133
  		blk_mq_add_to_requeue_list(rq, add_front, true);
18741986a   Christoph Hellwig   blk-mq: rework fl...
134
135
  		return false;
  	} else {
10beafc19   Mike Snitzer   block: change flu...
136
137
138
139
  		if (add_front)
  			list_add(&rq->queuelist, &rq->q->queue_head);
  		else
  			list_add_tail(&rq->queuelist, &rq->q->queue_head);
18741986a   Christoph Hellwig   blk-mq: rework fl...
140
141
  		return true;
  	}
47f70d5a6   Tejun Heo   block: kick queue...
142
  }
ae1b15396   Tejun Heo   block: reimplemen...
143
144
145
  /**
   * blk_flush_complete_seq - complete flush sequence
   * @rq: FLUSH/FUA request being sequenced
0bae352da   Ming Lei   block: flush: avo...
146
   * @fq: flush queue
ae1b15396   Tejun Heo   block: reimplemen...
147
148
149
150
151
152
153
   * @seq: sequences to complete (mask of %REQ_FSEQ_*, can be zero)
   * @error: whether an error occurred
   *
   * @rq just completed @seq part of its flush sequence, record the
   * completion and trigger the next step.
   *
   * CONTEXT:
7c94e1c15   Ming Lei   block: introduce ...
154
   * spin_lock_irq(q->queue_lock or fq->mq_flush_lock)
ae1b15396   Tejun Heo   block: reimplemen...
155
156
157
158
   *
   * RETURNS:
   * %true if requests were added to the dispatch queue, %false otherwise.
   */
0bae352da   Ming Lei   block: flush: avo...
159
160
161
  static bool blk_flush_complete_seq(struct request *rq,
  				   struct blk_flush_queue *fq,
  				   unsigned int seq, int error)
86db1e297   Jens Axboe   block: continue l...
162
  {
ae1b15396   Tejun Heo   block: reimplemen...
163
  	struct request_queue *q = rq->q;
7c94e1c15   Ming Lei   block: introduce ...
164
  	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
320ae51fe   Jens Axboe   blk-mq: new multi...
165
  	bool queued = false, kicked;
ae1b15396   Tejun Heo   block: reimplemen...
166
167
168
169
170
171
172
173
174
175
176
177
178
179
  
  	BUG_ON(rq->flush.seq & seq);
  	rq->flush.seq |= seq;
  
  	if (likely(!error))
  		seq = blk_flush_cur_seq(rq);
  	else
  		seq = REQ_FSEQ_DONE;
  
  	switch (seq) {
  	case REQ_FSEQ_PREFLUSH:
  	case REQ_FSEQ_POSTFLUSH:
  		/* queue for flush */
  		if (list_empty(pending))
7c94e1c15   Ming Lei   block: introduce ...
180
  			fq->flush_pending_since = jiffies;
ae1b15396   Tejun Heo   block: reimplemen...
181
182
183
184
  		list_move_tail(&rq->flush.list, pending);
  		break;
  
  	case REQ_FSEQ_DATA:
7c94e1c15   Ming Lei   block: introduce ...
185
  		list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
10beafc19   Mike Snitzer   block: change flu...
186
  		queued = blk_flush_queue_rq(rq, true);
ae1b15396   Tejun Heo   block: reimplemen...
187
188
189
190
191
192
193
194
195
196
197
198
  		break;
  
  	case REQ_FSEQ_DONE:
  		/*
  		 * @rq was previously adjusted by blk_flush_issue() for
  		 * flush sequencing and may already have gone through the
  		 * flush data request completion path.  Restore @rq for
  		 * normal completion and end it.
  		 */
  		BUG_ON(!list_empty(&rq->queuelist));
  		list_del_init(&rq->flush.list);
  		blk_flush_restore_request(rq);
320ae51fe   Jens Axboe   blk-mq: new multi...
199
  		if (q->mq_ops)
c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
200
  			blk_mq_end_request(rq, error);
320ae51fe   Jens Axboe   blk-mq: new multi...
201
202
  		else
  			__blk_end_request_all(rq, error);
ae1b15396   Tejun Heo   block: reimplemen...
203
204
205
206
207
  		break;
  
  	default:
  		BUG();
  	}
0bae352da   Ming Lei   block: flush: avo...
208
  	kicked = blk_kick_flush(q, fq);
320ae51fe   Jens Axboe   blk-mq: new multi...
209
  	return kicked | queued;
86db1e297   Jens Axboe   block: continue l...
210
  }
ae1b15396   Tejun Heo   block: reimplemen...
211
  static void flush_end_io(struct request *flush_rq, int error)
86db1e297   Jens Axboe   block: continue l...
212
  {
ae1b15396   Tejun Heo   block: reimplemen...
213
  	struct request_queue *q = flush_rq->q;
320ae51fe   Jens Axboe   blk-mq: new multi...
214
  	struct list_head *running;
ae1b15396   Tejun Heo   block: reimplemen...
215
216
  	bool queued = false;
  	struct request *rq, *n;
320ae51fe   Jens Axboe   blk-mq: new multi...
217
  	unsigned long flags = 0;
e97c293cd   Ming Lei   block: introduce ...
218
  	struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
ae1b15396   Tejun Heo   block: reimplemen...
219

223023750   Shaohua Li   blk-mq: blk_mq_ta...
220
  	if (q->mq_ops) {
0048b4837   Ming Lei   blk-mq: fix race ...
221
222
223
  		struct blk_mq_hw_ctx *hctx;
  
  		/* release the tag's ownership to the req cloned from */
7c94e1c15   Ming Lei   block: introduce ...
224
  		spin_lock_irqsave(&fq->mq_flush_lock, flags);
7d7e0f90b   Christoph Hellwig   blk-mq: remove ->...
225
  		hctx = blk_mq_map_queue(q, flush_rq->mq_ctx->cpu);
0048b4837   Ming Lei   blk-mq: fix race ...
226
  		blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
7ddab5de5   Ming Lei   block: avoid to u...
227
  		flush_rq->tag = -1;
223023750   Shaohua Li   blk-mq: blk_mq_ta...
228
  	}
18741986a   Christoph Hellwig   blk-mq: rework fl...
229

7c94e1c15   Ming Lei   block: introduce ...
230
231
  	running = &fq->flush_queue[fq->flush_running_idx];
  	BUG_ON(fq->flush_pending_idx == fq->flush_running_idx);
ae1b15396   Tejun Heo   block: reimplemen...
232
233
  
  	/* account completion of the flush request */
7c94e1c15   Ming Lei   block: introduce ...
234
  	fq->flush_running_idx ^= 1;
320ae51fe   Jens Axboe   blk-mq: new multi...
235
236
237
  
  	if (!q->mq_ops)
  		elv_completed_request(q, flush_rq);
ae1b15396   Tejun Heo   block: reimplemen...
238
239
240
241
242
243
  
  	/* and push the waiting requests to the next stage */
  	list_for_each_entry_safe(rq, n, running, flush.list) {
  		unsigned int seq = blk_flush_cur_seq(rq);
  
  		BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
0bae352da   Ming Lei   block: flush: avo...
244
  		queued |= blk_flush_complete_seq(rq, fq, seq, error);
ae1b15396   Tejun Heo   block: reimplemen...
245
  	}
47f70d5a6   Tejun Heo   block: kick queue...
246
  	/*
3ac0cc450   shaohua.li@intel.com   block: hold queue...
247
248
249
250
251
252
253
254
255
  	 * Kick the queue to avoid stall for two cases:
  	 * 1. Moving a request silently to empty queue_head may stall the
  	 * queue.
  	 * 2. When flush request is running in non-queueable queue, the
  	 * queue is hold. Restart the queue after flush request is finished
  	 * to avoid stall.
  	 * This function is called from request completion path and calling
  	 * directly into request_fn may confuse the driver.  Always use
  	 * kblockd.
47f70d5a6   Tejun Heo   block: kick queue...
256
  	 */
7c94e1c15   Ming Lei   block: introduce ...
257
  	if (queued || fq->flush_queue_delayed) {
18741986a   Christoph Hellwig   blk-mq: rework fl...
258
259
  		WARN_ON(q->mq_ops);
  		blk_run_queue_async(q);
320ae51fe   Jens Axboe   blk-mq: new multi...
260
  	}
7c94e1c15   Ming Lei   block: introduce ...
261
  	fq->flush_queue_delayed = 0;
320ae51fe   Jens Axboe   blk-mq: new multi...
262
  	if (q->mq_ops)
7c94e1c15   Ming Lei   block: introduce ...
263
  		spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
320ae51fe   Jens Axboe   blk-mq: new multi...
264
  }
ae1b15396   Tejun Heo   block: reimplemen...
265
266
267
  /**
   * blk_kick_flush - consider issuing flush request
   * @q: request_queue being kicked
0bae352da   Ming Lei   block: flush: avo...
268
   * @fq: flush queue
ae1b15396   Tejun Heo   block: reimplemen...
269
270
271
272
273
   *
   * Flush related states of @q have changed, consider issuing flush request.
   * Please read the comment at the top of this file for more info.
   *
   * CONTEXT:
7c94e1c15   Ming Lei   block: introduce ...
274
   * spin_lock_irq(q->queue_lock or fq->mq_flush_lock)
ae1b15396   Tejun Heo   block: reimplemen...
275
276
277
278
   *
   * RETURNS:
   * %true if flush was issued, %false otherwise.
   */
0bae352da   Ming Lei   block: flush: avo...
279
  static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
86db1e297   Jens Axboe   block: continue l...
280
  {
7c94e1c15   Ming Lei   block: introduce ...
281
  	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
ae1b15396   Tejun Heo   block: reimplemen...
282
283
  	struct request *first_rq =
  		list_first_entry(pending, struct request, flush.list);
7c94e1c15   Ming Lei   block: introduce ...
284
  	struct request *flush_rq = fq->flush_rq;
ae1b15396   Tejun Heo   block: reimplemen...
285
286
  
  	/* C1 described at the top of this file */
7c94e1c15   Ming Lei   block: introduce ...
287
  	if (fq->flush_pending_idx != fq->flush_running_idx || list_empty(pending))
ae1b15396   Tejun Heo   block: reimplemen...
288
289
290
  		return false;
  
  	/* C2 and C3 */
7c94e1c15   Ming Lei   block: introduce ...
291
  	if (!list_empty(&fq->flush_data_in_flight) &&
ae1b15396   Tejun Heo   block: reimplemen...
292
  	    time_before(jiffies,
7c94e1c15   Ming Lei   block: introduce ...
293
  			fq->flush_pending_since + FLUSH_PENDING_TIMEOUT))
ae1b15396   Tejun Heo   block: reimplemen...
294
295
296
297
298
299
  		return false;
  
  	/*
  	 * Issue flush and toggle pending_idx.  This makes pending_idx
  	 * different from running_idx, which means flush is in flight.
  	 */
7c94e1c15   Ming Lei   block: introduce ...
300
  	fq->flush_pending_idx ^= 1;
18741986a   Christoph Hellwig   blk-mq: rework fl...
301

7ddab5de5   Ming Lei   block: avoid to u...
302
  	blk_rq_init(q, flush_rq);
f70ced091   Ming Lei   blk-mq: support p...
303
304
305
  
  	/*
  	 * Borrow tag from the first request since they can't
0048b4837   Ming Lei   blk-mq: fix race ...
306
307
  	 * be in flight at the same time. And acquire the tag's
  	 * ownership for flush req.
f70ced091   Ming Lei   blk-mq: support p...
308
309
  	 */
  	if (q->mq_ops) {
0048b4837   Ming Lei   blk-mq: fix race ...
310
  		struct blk_mq_hw_ctx *hctx;
f70ced091   Ming Lei   blk-mq: support p...
311
312
  		flush_rq->mq_ctx = first_rq->mq_ctx;
  		flush_rq->tag = first_rq->tag;
0048b4837   Ming Lei   blk-mq: fix race ...
313
  		fq->orig_rq = first_rq;
7d7e0f90b   Christoph Hellwig   blk-mq: remove ->...
314
  		hctx = blk_mq_map_queue(q, first_rq->mq_ctx->cpu);
0048b4837   Ming Lei   blk-mq: fix race ...
315
  		blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq);
f70ced091   Ming Lei   blk-mq: support p...
316
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
317

7ddab5de5   Ming Lei   block: avoid to u...
318
  	flush_rq->cmd_type = REQ_TYPE_FS;
70fd76140   Christoph Hellwig   block,fs: use REQ...
319
  	flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
e80640213   Christoph Hellwig   block: split out ...
320
  	flush_rq->rq_flags |= RQF_FLUSH_SEQ;
7ddab5de5   Ming Lei   block: avoid to u...
321
322
  	flush_rq->rq_disk = first_rq->rq_disk;
  	flush_rq->end_io = flush_end_io;
ae1b15396   Tejun Heo   block: reimplemen...
323

7ddab5de5   Ming Lei   block: avoid to u...
324
  	return blk_flush_queue_rq(flush_rq, false);
86db1e297   Jens Axboe   block: continue l...
325
  }
ae1b15396   Tejun Heo   block: reimplemen...
326
  static void flush_data_end_io(struct request *rq, int error)
86db1e297   Jens Axboe   block: continue l...
327
  {
ae1b15396   Tejun Heo   block: reimplemen...
328
  	struct request_queue *q = rq->q;
e97c293cd   Ming Lei   block: introduce ...
329
  	struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
ae1b15396   Tejun Heo   block: reimplemen...
330

e83a46bbb   Tejun Heo   Merge branch 'for...
331
  	/*
94d7dea44   Ming Lei   block: flush: fix...
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
  	 * Updating q->in_flight[] here for making this tag usable
  	 * early. Because in blk_queue_start_tag(),
  	 * q->in_flight[BLK_RW_ASYNC] is used to limit async I/O and
  	 * reserve tags for sync I/O.
  	 *
  	 * More importantly this way can avoid the following I/O
  	 * deadlock:
  	 *
  	 * - suppose there are 40 fua requests comming to flush queue
  	 *   and queue depth is 31
  	 * - 30 rqs are scheduled then blk_queue_start_tag() can't alloc
  	 *   tag for async I/O any more
  	 * - all the 30 rqs are completed before FLUSH_PENDING_TIMEOUT
  	 *   and flush_data_end_io() is called
  	 * - the other rqs still can't go ahead if not updating
  	 *   q->in_flight[BLK_RW_ASYNC] here, meantime these rqs
  	 *   are held in flush data queue and make no progress of
  	 *   handling post flush rq
  	 * - only after the post flush rq is handled, all these rqs
  	 *   can be completed
  	 */
  
  	elv_completed_request(q, rq);
  
  	/* for avoiding double accounting */
36869cb93   Linus Torvalds   Merge branch 'for...
357
  	rq->rq_flags &= ~RQF_STARTED;
94d7dea44   Ming Lei   block: flush: fix...
358
359
  
  	/*
e83a46bbb   Tejun Heo   Merge branch 'for...
360
361
362
  	 * After populating an empty queue, kick it to avoid stall.  Read
  	 * the comment in flush_end_io().
  	 */
0bae352da   Ming Lei   block: flush: avo...
363
  	if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error))
24ecfbe27   Christoph Hellwig   block: add blk_ru...
364
  		blk_run_queue_async(q);
86db1e297   Jens Axboe   block: continue l...
365
  }
320ae51fe   Jens Axboe   blk-mq: new multi...
366
367
368
369
  static void mq_flush_data_end_io(struct request *rq, int error)
  {
  	struct request_queue *q = rq->q;
  	struct blk_mq_hw_ctx *hctx;
e97c293cd   Ming Lei   block: introduce ...
370
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
320ae51fe   Jens Axboe   blk-mq: new multi...
371
  	unsigned long flags;
e97c293cd   Ming Lei   block: introduce ...
372
  	struct blk_flush_queue *fq = blk_get_flush_queue(q, ctx);
320ae51fe   Jens Axboe   blk-mq: new multi...
373

7d7e0f90b   Christoph Hellwig   blk-mq: remove ->...
374
  	hctx = blk_mq_map_queue(q, ctx->cpu);
320ae51fe   Jens Axboe   blk-mq: new multi...
375
376
377
378
379
  
  	/*
  	 * After populating an empty queue, kick it to avoid stall.  Read
  	 * the comment in flush_end_io().
  	 */
7c94e1c15   Ming Lei   block: introduce ...
380
  	spin_lock_irqsave(&fq->mq_flush_lock, flags);
0bae352da   Ming Lei   block: flush: avo...
381
  	if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error))
320ae51fe   Jens Axboe   blk-mq: new multi...
382
  		blk_mq_run_hw_queue(hctx, true);
7c94e1c15   Ming Lei   block: introduce ...
383
  	spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
320ae51fe   Jens Axboe   blk-mq: new multi...
384
  }
ae1b15396   Tejun Heo   block: reimplemen...
385
386
387
388
  /**
   * blk_insert_flush - insert a new FLUSH/FUA request
   * @rq: request to insert
   *
b710a4805   Jens Axboe   block: get rid of...
389
   * To be called from __elv_add_request() for %ELEVATOR_INSERT_FLUSH insertions.
320ae51fe   Jens Axboe   blk-mq: new multi...
390
   * or __blk_mq_run_hw_queue() to dispatch request.
ae1b15396   Tejun Heo   block: reimplemen...
391
392
393
394
   * @rq is being submitted.  Analyze what needs to be done and put it on the
   * right queue.
   *
   * CONTEXT:
320ae51fe   Jens Axboe   blk-mq: new multi...
395
   * spin_lock_irq(q->queue_lock) in !mq case
ae1b15396   Tejun Heo   block: reimplemen...
396
397
   */
  void blk_insert_flush(struct request *rq)
86db1e297   Jens Axboe   block: continue l...
398
  {
ae1b15396   Tejun Heo   block: reimplemen...
399
  	struct request_queue *q = rq->q;
c888a8f95   Jens Axboe   block: kill off q...
400
  	unsigned long fflags = q->queue_flags;	/* may change, cache */
ae1b15396   Tejun Heo   block: reimplemen...
401
  	unsigned int policy = blk_flush_policy(fflags, rq);
e97c293cd   Ming Lei   block: introduce ...
402
  	struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
86db1e297   Jens Axboe   block: continue l...
403

ae1b15396   Tejun Heo   block: reimplemen...
404
405
  	/*
  	 * @policy now records what operations need to be done.  Adjust
28a8f0d31   Mike Christie   block, drivers, f...
406
  	 * REQ_PREFLUSH and FUA for the driver.
ae1b15396   Tejun Heo   block: reimplemen...
407
  	 */
28a8f0d31   Mike Christie   block, drivers, f...
408
  	rq->cmd_flags &= ~REQ_PREFLUSH;
c888a8f95   Jens Axboe   block: kill off q...
409
  	if (!(fflags & (1UL << QUEUE_FLAG_FUA)))
ae1b15396   Tejun Heo   block: reimplemen...
410
411
412
  		rq->cmd_flags &= ~REQ_FUA;
  
  	/*
ae5b2ec8a   Jens Axboe   block: set REQ_SY...
413
414
415
416
417
418
419
  	 * REQ_PREFLUSH|REQ_FUA implies REQ_SYNC, so if we clear any
  	 * of those flags, we have to set REQ_SYNC to avoid skewing
  	 * the request accounting.
  	 */
  	rq->cmd_flags |= REQ_SYNC;
  
  	/*
4853abaae   Jeff Moyer   block: fix flush ...
420
421
422
423
424
425
  	 * An empty flush handed down from a stacking driver may
  	 * translate into nothing if the underlying device does not
  	 * advertise a write-back cache.  In this case, simply
  	 * complete the request.
  	 */
  	if (!policy) {
320ae51fe   Jens Axboe   blk-mq: new multi...
426
  		if (q->mq_ops)
c8a446ad6   Christoph Hellwig   blk-mq: rename bl...
427
  			blk_mq_end_request(rq, 0);
320ae51fe   Jens Axboe   blk-mq: new multi...
428
429
  		else
  			__blk_end_bidi_request(rq, 0, 0, 0);
4853abaae   Jeff Moyer   block: fix flush ...
430
431
  		return;
  	}
834f9f61a   Jeff Moyer   blk-flush: fix in...
432
  	BUG_ON(rq->bio != rq->biotail); /*assumes zero or single bio rq */
4853abaae   Jeff Moyer   block: fix flush ...
433
434
  
  	/*
ae1b15396   Tejun Heo   block: reimplemen...
435
436
437
438
439
440
  	 * If there's data but flush is not necessary, the request can be
  	 * processed directly without going through flush machinery.  Queue
  	 * for normal execution.
  	 */
  	if ((policy & REQ_FSEQ_DATA) &&
  	    !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
320ae51fe   Jens Axboe   blk-mq: new multi...
441
  		if (q->mq_ops) {
c8e52ba5e   Jens Axboe   blk-flush: run th...
442
  			blk_mq_insert_request(rq, false, true, false);
320ae51fe   Jens Axboe   blk-mq: new multi...
443
  		} else
dcd8376c3   Jens Axboe   Revert "blk-flush...
444
  			list_add_tail(&rq->queuelist, &q->queue_head);
ae1b15396   Tejun Heo   block: reimplemen...
445
  		return;
28e7d1845   Tejun Heo   block: drop barri...
446
  	}
cde4c406d   Christoph Hellwig   block: simplify q...
447

ae1b15396   Tejun Heo   block: reimplemen...
448
449
450
451
452
453
  	/*
  	 * @rq should go through flush machinery.  Mark it part of flush
  	 * sequence and submit for further processing.
  	 */
  	memset(&rq->flush, 0, sizeof(rq->flush));
  	INIT_LIST_HEAD(&rq->flush.list);
e80640213   Christoph Hellwig   block: split out ...
454
  	rq->rq_flags |= RQF_FLUSH_SEQ;
4853abaae   Jeff Moyer   block: fix flush ...
455
  	rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
320ae51fe   Jens Axboe   blk-mq: new multi...
456
457
  	if (q->mq_ops) {
  		rq->end_io = mq_flush_data_end_io;
7c94e1c15   Ming Lei   block: introduce ...
458
  		spin_lock_irq(&fq->mq_flush_lock);
0bae352da   Ming Lei   block: flush: avo...
459
  		blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0);
7c94e1c15   Ming Lei   block: introduce ...
460
  		spin_unlock_irq(&fq->mq_flush_lock);
320ae51fe   Jens Axboe   blk-mq: new multi...
461
462
  		return;
  	}
ae1b15396   Tejun Heo   block: reimplemen...
463
  	rq->end_io = flush_data_end_io;
0bae352da   Ming Lei   block: flush: avo...
464
  	blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0);
86db1e297   Jens Axboe   block: continue l...
465
  }
ae1b15396   Tejun Heo   block: reimplemen...
466
  /**
86db1e297   Jens Axboe   block: continue l...
467
468
   * blkdev_issue_flush - queue a flush
   * @bdev:	blockdev to issue flush for
fbd9b09a1   Dmitry Monakhov   blkdev: generaliz...
469
   * @gfp_mask:	memory allocation flags (for bio_alloc)
86db1e297   Jens Axboe   block: continue l...
470
471
472
473
474
   * @error_sector:	error sector
   *
   * Description:
   *    Issue a flush for the block device in question. Caller can supply
   *    room for storing the error offset in case of a flush error, if they
f17e232e9   Dmitry Monakhov   blkdev: allow asy...
475
476
   *    wish to. If WAIT flag is not passed then caller may check only what
   *    request was pushed in some internal queue for later handling.
86db1e297   Jens Axboe   block: continue l...
477
   */
fbd9b09a1   Dmitry Monakhov   blkdev: generaliz...
478
  int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
dd3932edd   Christoph Hellwig   block: remove BLK...
479
  		sector_t *error_sector)
86db1e297   Jens Axboe   block: continue l...
480
  {
86db1e297   Jens Axboe   block: continue l...
481
482
  	struct request_queue *q;
  	struct bio *bio;
fbd9b09a1   Dmitry Monakhov   blkdev: generaliz...
483
  	int ret = 0;
86db1e297   Jens Axboe   block: continue l...
484
485
486
487
488
489
490
  
  	if (bdev->bd_disk == NULL)
  		return -ENXIO;
  
  	q = bdev_get_queue(bdev);
  	if (!q)
  		return -ENXIO;
f10d9f617   Dave Chinner   blkdev: check for...
491
492
493
494
  	/*
  	 * some block devices may not have their queue correctly set up here
  	 * (e.g. loop device without a backing file) and so issuing a flush
  	 * here will panic. Ensure there is a request function before issuing
d391a2dda   Tejun Heo   block: use REQ_FL...
495
  	 * the flush.
f10d9f617   Dave Chinner   blkdev: check for...
496
497
498
  	 */
  	if (!q->make_request_fn)
  		return -ENXIO;
fbd9b09a1   Dmitry Monakhov   blkdev: generaliz...
499
  	bio = bio_alloc(gfp_mask, 0);
86db1e297   Jens Axboe   block: continue l...
500
  	bio->bi_bdev = bdev;
70fd76140   Christoph Hellwig   block,fs: use REQ...
501
  	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
86db1e297   Jens Axboe   block: continue l...
502

4e49ea4a3   Mike Christie   block/fs/drivers:...
503
  	ret = submit_bio_wait(bio);
dd3932edd   Christoph Hellwig   block: remove BLK...
504
505
506
507
508
509
510
  
  	/*
  	 * The driver must store the error location in ->bi_sector, if
  	 * it supports it. For non-stacked drivers, this should be
  	 * copied from blk_rq_pos(rq).
  	 */
  	if (error_sector)
4f024f379   Kent Overstreet   block: Abstract o...
511
  		*error_sector = bio->bi_iter.bi_sector;
86db1e297   Jens Axboe   block: continue l...
512

86db1e297   Jens Axboe   block: continue l...
513
514
515
  	bio_put(bio);
  	return ret;
  }
86db1e297   Jens Axboe   block: continue l...
516
  EXPORT_SYMBOL(blkdev_issue_flush);
320ae51fe   Jens Axboe   blk-mq: new multi...
517

f70ced091   Ming Lei   blk-mq: support p...
518
519
  struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
  		int node, int cmd_size)
320ae51fe   Jens Axboe   blk-mq: new multi...
520
  {
7c94e1c15   Ming Lei   block: introduce ...
521
522
  	struct blk_flush_queue *fq;
  	int rq_sz = sizeof(struct request);
1bcb1eada   Ming Lei   blk-mq: allocate ...
523

f70ced091   Ming Lei   blk-mq: support p...
524
  	fq = kzalloc_node(sizeof(*fq), GFP_KERNEL, node);
7c94e1c15   Ming Lei   block: introduce ...
525
526
  	if (!fq)
  		goto fail;
1bcb1eada   Ming Lei   blk-mq: allocate ...
527

7c94e1c15   Ming Lei   block: introduce ...
528
529
  	if (q->mq_ops) {
  		spin_lock_init(&fq->mq_flush_lock);
f70ced091   Ming Lei   blk-mq: support p...
530
  		rq_sz = round_up(rq_sz + cmd_size, cache_line_size());
7c94e1c15   Ming Lei   block: introduce ...
531
  	}
f70ced091   Ming Lei   blk-mq: support p...
532
  	fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node);
7c94e1c15   Ming Lei   block: introduce ...
533
534
535
536
537
538
539
540
541
542
543
544
545
  	if (!fq->flush_rq)
  		goto fail_rq;
  
  	INIT_LIST_HEAD(&fq->flush_queue[0]);
  	INIT_LIST_HEAD(&fq->flush_queue[1]);
  	INIT_LIST_HEAD(&fq->flush_data_in_flight);
  
  	return fq;
  
   fail_rq:
  	kfree(fq);
   fail:
  	return NULL;
320ae51fe   Jens Axboe   blk-mq: new multi...
546
  }
f35526557   Ming Lei   block: introduce ...
547

ba483388e   Ming Lei   block: remove blk...
548
  void blk_free_flush_queue(struct blk_flush_queue *fq)
f35526557   Ming Lei   block: introduce ...
549
  {
7c94e1c15   Ming Lei   block: introduce ...
550
551
552
  	/* bio based request queue hasn't flush queue */
  	if (!fq)
  		return;
3c09676c1   Ming Lei   block: move flush...
553

7c94e1c15   Ming Lei   block: introduce ...
554
555
556
  	kfree(fq->flush_rq);
  	kfree(fq);
  }