Blame view

block/blk-flush.c 15.4 KB
8c16567d8   Christoph Hellwig   block: switch all...
1
  // SPDX-License-Identifier: GPL-2.0
86db1e297   Jens Axboe   block: continue l...
2
  /*
3140c3cfa   Omar Sandoval   block: update com...
3
   * Functions to sequence PREFLUSH and FUA writes.
ae1b15396   Tejun Heo   block: reimplemen...
4
5
6
7
   *
   * Copyright (C) 2011		Max Planck Institute for Gravitational Physics
   * Copyright (C) 2011		Tejun Heo <tj@kernel.org>
   *
3140c3cfa   Omar Sandoval   block: update com...
8
   * REQ_{PREFLUSH|FUA} requests are decomposed to sequences consisted of three
ae1b15396   Tejun Heo   block: reimplemen...
9
10
11
   * optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request
   * properties and hardware capability.
   *
28a8f0d31   Mike Christie   block, drivers, f...
12
13
   * If a request doesn't have data, only REQ_PREFLUSH makes sense, which
   * indicates a simple flush request.  If there is data, REQ_PREFLUSH indicates
ae1b15396   Tejun Heo   block: reimplemen...
14
15
16
17
   * that the device cache should be flushed before the data is executed, and
   * REQ_FUA means that the data must be on non-volatile media on request
   * completion.
   *
3140c3cfa   Omar Sandoval   block: update com...
18
19
20
   * If the device doesn't have writeback cache, PREFLUSH and FUA don't make any
   * difference.  The requests are either completed immediately if there's no data
   * or executed as normal requests otherwise.
ae1b15396   Tejun Heo   block: reimplemen...
21
   *
28a8f0d31   Mike Christie   block, drivers, f...
22
   * If the device has writeback cache and supports FUA, REQ_PREFLUSH is
ae1b15396   Tejun Heo   block: reimplemen...
23
24
   * translated to PREFLUSH but REQ_FUA is passed down directly with DATA.
   *
28a8f0d31   Mike Christie   block, drivers, f...
25
26
   * If the device has writeback cache and doesn't support FUA, REQ_PREFLUSH
   * is translated to PREFLUSH and REQ_FUA to POSTFLUSH.
ae1b15396   Tejun Heo   block: reimplemen...
27
28
29
   *
   * The actual execution of flush is double buffered.  Whenever a request
   * needs to execute PRE or POSTFLUSH, it queues at
7c94e1c15   Ming Lei   block: introduce ...
30
   * fq->flush_queue[fq->flush_pending_idx].  Once certain criteria are met, a
3a5e02ced   Mike Christie   block, drivers: a...
31
   * REQ_OP_FLUSH is issued and the pending_idx is toggled.  When the flush
ae1b15396   Tejun Heo   block: reimplemen...
32
   * completes, all the requests which were pending are proceeded to the next
3140c3cfa   Omar Sandoval   block: update com...
33
   * step.  This allows arbitrary merging of different types of PREFLUSH/FUA
ae1b15396   Tejun Heo   block: reimplemen...
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
   * requests.
   *
   * Currently, the following conditions are used to determine when to issue
   * flush.
   *
   * C1. At any given time, only one flush shall be in progress.  This makes
   *     double buffering sufficient.
   *
   * C2. Flush is deferred if any request is executing DATA of its sequence.
   *     This avoids issuing separate POSTFLUSHes for requests which shared
   *     PREFLUSH.
   *
   * C3. The second condition is ignored if there is a request which has
   *     waited longer than FLUSH_PENDING_TIMEOUT.  This is to avoid
   *     starvation in the unlikely case where there are continuous stream of
3140c3cfa   Omar Sandoval   block: update com...
49
   *     FUA (without PREFLUSH) requests.
ae1b15396   Tejun Heo   block: reimplemen...
50
51
52
53
   *
   * For devices which support FUA, it isn't clear whether C2 (and thus C3)
   * is beneficial.
   *
3140c3cfa   Omar Sandoval   block: update com...
54
   * Note that a sequenced PREFLUSH/FUA request with DATA is completed twice.
ae1b15396   Tejun Heo   block: reimplemen...
55
56
57
   * Once while executing DATA and again after the whole sequence is
   * complete.  The first completion updates the contained bio but doesn't
   * finish it so that the bio submitter is notified only after the whole
e80640213   Christoph Hellwig   block: split out ...
58
   * sequence is complete.  This is implemented by testing RQF_FLUSH_SEQ in
ae1b15396   Tejun Heo   block: reimplemen...
59
60
   * req_bio_endio().
   *
3140c3cfa   Omar Sandoval   block: update com...
61
   * The above peculiarity requires that each PREFLUSH/FUA request has only one
ae1b15396   Tejun Heo   block: reimplemen...
62
63
   * bio attached to it, which is guaranteed as they aren't allowed to be
   * merged in the usual way.
86db1e297   Jens Axboe   block: continue l...
64
   */
ae1b15396   Tejun Heo   block: reimplemen...
65

86db1e297   Jens Axboe   block: continue l...
66
67
68
69
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/bio.h>
  #include <linux/blkdev.h>
5a0e3ad6a   Tejun Heo   include cleanup: ...
70
  #include <linux/gfp.h>
320ae51fe   Jens Axboe   blk-mq: new multi...
71
  #include <linux/blk-mq.h>
86db1e297   Jens Axboe   block: continue l...
72
73
  
  #include "blk.h"
320ae51fe   Jens Axboe   blk-mq: new multi...
74
  #include "blk-mq.h"
0048b4837   Ming Lei   blk-mq: fix race ...
75
  #include "blk-mq-tag.h"
bd166ef18   Jens Axboe   blk-mq-sched: add...
76
  #include "blk-mq-sched.h"
86db1e297   Jens Axboe   block: continue l...
77

3140c3cfa   Omar Sandoval   block: update com...
78
  /* PREFLUSH/FUA sequences */
4fed947cb   Tejun Heo   block: implement ...
79
  enum {
ae1b15396   Tejun Heo   block: reimplemen...
80
81
82
83
84
85
86
87
88
89
90
91
92
  	REQ_FSEQ_PREFLUSH	= (1 << 0), /* pre-flushing in progress */
  	REQ_FSEQ_DATA		= (1 << 1), /* data write in progress */
  	REQ_FSEQ_POSTFLUSH	= (1 << 2), /* post-flushing in progress */
  	REQ_FSEQ_DONE		= (1 << 3),
  
  	REQ_FSEQ_ACTIONS	= REQ_FSEQ_PREFLUSH | REQ_FSEQ_DATA |
  				  REQ_FSEQ_POSTFLUSH,
  
  	/*
  	 * If flush has been pending longer than the following timeout,
  	 * it's issued even if flush_data requests are still in flight.
  	 */
  	FLUSH_PENDING_TIMEOUT	= 5 * HZ,
4fed947cb   Tejun Heo   block: implement ...
93
  };
404b8f5a0   Jens Axboe   block: cleanup ki...
94
  static void blk_kick_flush(struct request_queue *q,
84fca1b0c   Hannes Reinecke   block: pass failf...
95
  			   struct blk_flush_queue *fq, unsigned int flags);
28e7d1845   Tejun Heo   block: drop barri...
96

c888a8f95   Jens Axboe   block: kill off q...
97
  static unsigned int blk_flush_policy(unsigned long fflags, struct request *rq)
86db1e297   Jens Axboe   block: continue l...
98
  {
ae1b15396   Tejun Heo   block: reimplemen...
99
  	unsigned int policy = 0;
86db1e297   Jens Axboe   block: continue l...
100

fa1bf42ff   Jeff Moyer   allow blk_flush_p...
101
102
  	if (blk_rq_sectors(rq))
  		policy |= REQ_FSEQ_DATA;
c888a8f95   Jens Axboe   block: kill off q...
103
  	if (fflags & (1UL << QUEUE_FLAG_WC)) {
28a8f0d31   Mike Christie   block, drivers, f...
104
  		if (rq->cmd_flags & REQ_PREFLUSH)
ae1b15396   Tejun Heo   block: reimplemen...
105
  			policy |= REQ_FSEQ_PREFLUSH;
c888a8f95   Jens Axboe   block: kill off q...
106
107
  		if (!(fflags & (1UL << QUEUE_FLAG_FUA)) &&
  		    (rq->cmd_flags & REQ_FUA))
ae1b15396   Tejun Heo   block: reimplemen...
108
  			policy |= REQ_FSEQ_POSTFLUSH;
28e7d1845   Tejun Heo   block: drop barri...
109
  	}
ae1b15396   Tejun Heo   block: reimplemen...
110
  	return policy;
86db1e297   Jens Axboe   block: continue l...
111
  }
ae1b15396   Tejun Heo   block: reimplemen...
112
  static unsigned int blk_flush_cur_seq(struct request *rq)
47f70d5a6   Tejun Heo   block: kick queue...
113
  {
ae1b15396   Tejun Heo   block: reimplemen...
114
115
  	return 1 << ffz(rq->flush.seq);
  }
47f70d5a6   Tejun Heo   block: kick queue...
116

ae1b15396   Tejun Heo   block: reimplemen...
117
118
  static void blk_flush_restore_request(struct request *rq)
  {
47f70d5a6   Tejun Heo   block: kick queue...
119
  	/*
ae1b15396   Tejun Heo   block: reimplemen...
120
121
122
  	 * After flush data completion, @rq->bio is %NULL but we need to
  	 * complete the bio again.  @rq->biotail is guaranteed to equal the
  	 * original @rq->bio.  Restore it.
47f70d5a6   Tejun Heo   block: kick queue...
123
  	 */
ae1b15396   Tejun Heo   block: reimplemen...
124
125
126
  	rq->bio = rq->biotail;
  
  	/* make @rq a normal request */
e80640213   Christoph Hellwig   block: split out ...
127
  	rq->rq_flags &= ~RQF_FLUSH_SEQ;
4853abaae   Jeff Moyer   block: fix flush ...
128
  	rq->end_io = rq->flush.saved_end_io;
320ae51fe   Jens Axboe   blk-mq: new multi...
129
  }
404b8f5a0   Jens Axboe   block: cleanup ki...
130
  static void blk_flush_queue_rq(struct request *rq, bool add_front)
320ae51fe   Jens Axboe   blk-mq: new multi...
131
  {
7e992f847   Jens Axboe   block: remove non...
132
  	blk_mq_add_to_requeue_list(rq, add_front, true);
47f70d5a6   Tejun Heo   block: kick queue...
133
  }
b68663186   Konstantin Khlebnikov   block: add iostat...
134
135
  static void blk_account_io_flush(struct request *rq)
  {
8446fe925   Christoph Hellwig   block: switch par...
136
  	struct block_device *part = rq->rq_disk->part0;
b68663186   Konstantin Khlebnikov   block: add iostat...
137
138
139
140
141
142
143
  
  	part_stat_lock();
  	part_stat_inc(part, ios[STAT_FLUSH]);
  	part_stat_add(part, nsecs[STAT_FLUSH],
  		      ktime_get_ns() - rq->start_time_ns);
  	part_stat_unlock();
  }
ae1b15396   Tejun Heo   block: reimplemen...
144
145
  /**
   * blk_flush_complete_seq - complete flush sequence
3140c3cfa   Omar Sandoval   block: update com...
146
   * @rq: PREFLUSH/FUA request being sequenced
0bae352da   Ming Lei   block: flush: avo...
147
   * @fq: flush queue
ae1b15396   Tejun Heo   block: reimplemen...
148
149
150
151
152
153
154
   * @seq: sequences to complete (mask of %REQ_FSEQ_*, can be zero)
   * @error: whether an error occurred
   *
   * @rq just completed @seq part of its flush sequence, record the
   * completion and trigger the next step.
   *
   * CONTEXT:
9809b4eed   Christoph Hellwig   block: update a f...
155
   * spin_lock_irq(fq->mq_flush_lock)
ae1b15396   Tejun Heo   block: reimplemen...
156
   */
404b8f5a0   Jens Axboe   block: cleanup ki...
157
  static void blk_flush_complete_seq(struct request *rq,
0bae352da   Ming Lei   block: flush: avo...
158
  				   struct blk_flush_queue *fq,
2a842acab   Christoph Hellwig   block: introduce ...
159
  				   unsigned int seq, blk_status_t error)
86db1e297   Jens Axboe   block: continue l...
160
  {
ae1b15396   Tejun Heo   block: reimplemen...
161
  	struct request_queue *q = rq->q;
7c94e1c15   Ming Lei   block: introduce ...
162
  	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
190b02ed7   Jens Axboe   block: fix use-af...
163
  	unsigned int cmd_flags;
ae1b15396   Tejun Heo   block: reimplemen...
164
165
166
  
  	BUG_ON(rq->flush.seq & seq);
  	rq->flush.seq |= seq;
190b02ed7   Jens Axboe   block: fix use-af...
167
  	cmd_flags = rq->cmd_flags;
ae1b15396   Tejun Heo   block: reimplemen...
168
169
170
171
172
173
174
175
176
177
178
  
  	if (likely(!error))
  		seq = blk_flush_cur_seq(rq);
  	else
  		seq = REQ_FSEQ_DONE;
  
  	switch (seq) {
  	case REQ_FSEQ_PREFLUSH:
  	case REQ_FSEQ_POSTFLUSH:
  		/* queue for flush */
  		if (list_empty(pending))
7c94e1c15   Ming Lei   block: introduce ...
179
  			fq->flush_pending_since = jiffies;
ae1b15396   Tejun Heo   block: reimplemen...
180
181
182
183
  		list_move_tail(&rq->flush.list, pending);
  		break;
  
  	case REQ_FSEQ_DATA:
7c94e1c15   Ming Lei   block: introduce ...
184
  		list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
404b8f5a0   Jens Axboe   block: cleanup ki...
185
  		blk_flush_queue_rq(rq, true);
ae1b15396   Tejun Heo   block: reimplemen...
186
187
188
189
  		break;
  
  	case REQ_FSEQ_DONE:
  		/*
b68663186   Konstantin Khlebnikov   block: add iostat...
190
  		 * @rq was previously adjusted by blk_insert_flush() for
ae1b15396   Tejun Heo   block: reimplemen...
191
192
193
194
195
196
197
  		 * flush sequencing and may already have gone through the
  		 * flush data request completion path.  Restore @rq for
  		 * normal completion and end it.
  		 */
  		BUG_ON(!list_empty(&rq->queuelist));
  		list_del_init(&rq->flush.list);
  		blk_flush_restore_request(rq);
7e992f847   Jens Axboe   block: remove non...
198
  		blk_mq_end_request(rq, error);
ae1b15396   Tejun Heo   block: reimplemen...
199
200
201
202
203
  		break;
  
  	default:
  		BUG();
  	}
404b8f5a0   Jens Axboe   block: cleanup ki...
204
  	blk_kick_flush(q, fq, cmd_flags);
86db1e297   Jens Axboe   block: continue l...
205
  }
2a842acab   Christoph Hellwig   block: introduce ...
206
  static void flush_end_io(struct request *flush_rq, blk_status_t error)
86db1e297   Jens Axboe   block: continue l...
207
  {
ae1b15396   Tejun Heo   block: reimplemen...
208
  	struct request_queue *q = flush_rq->q;
320ae51fe   Jens Axboe   blk-mq: new multi...
209
  	struct list_head *running;
ae1b15396   Tejun Heo   block: reimplemen...
210
  	struct request *rq, *n;
320ae51fe   Jens Axboe   blk-mq: new multi...
211
  	unsigned long flags = 0;
e97c293cd   Ming Lei   block: introduce ...
212
  	struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
ae1b15396   Tejun Heo   block: reimplemen...
213

7e992f847   Jens Axboe   block: remove non...
214
215
  	/* release the tag's ownership to the req cloned from */
  	spin_lock_irqsave(&fq->mq_flush_lock, flags);
8d6996630   Yufen Yu   block: fix null p...
216
217
218
219
220
221
  
  	if (!refcount_dec_and_test(&flush_rq->ref)) {
  		fq->rq_status = error;
  		spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
  		return;
  	}
84da7acc3   Ming Lei   block: avoid doub...
222
  	blk_account_io_flush(flush_rq);
9f16a6673   Ming Lei   block: mark flush...
223
224
225
226
227
228
  	/*
  	 * Flush request has to be marked as IDLE when it is really ended
  	 * because its .end_io() is called from timeout code path too for
  	 * avoiding use-after-free.
  	 */
  	WRITE_ONCE(flush_rq->state, MQ_RQ_IDLE);
b157f4bf1   Ye Bin   block: Fix fsync ...
229
  	if (fq->rq_status != BLK_STS_OK) {
8d6996630   Yufen Yu   block: fix null p...
230
  		error = fq->rq_status;
b157f4bf1   Ye Bin   block: Fix fsync ...
231
232
  		fq->rq_status = BLK_STS_OK;
  	}
8d6996630   Yufen Yu   block: fix null p...
233

4e2f62e56   Jens Axboe   Revert "blk-mq: p...
234
  	if (!q->elevator) {
568f27006   Ming Lei   blk-mq: centralis...
235
  		flush_rq->tag = BLK_MQ_NO_TAG;
4e2f62e56   Jens Axboe   Revert "blk-mq: p...
236
237
  	} else {
  		blk_mq_put_driver_tag(flush_rq);
568f27006   Ming Lei   blk-mq: centralis...
238
  		flush_rq->internal_tag = BLK_MQ_NO_TAG;
4e2f62e56   Jens Axboe   Revert "blk-mq: p...
239
  	}
18741986a   Christoph Hellwig   blk-mq: rework fl...
240

7c94e1c15   Ming Lei   block: introduce ...
241
242
  	running = &fq->flush_queue[fq->flush_running_idx];
  	BUG_ON(fq->flush_pending_idx == fq->flush_running_idx);
ae1b15396   Tejun Heo   block: reimplemen...
243
244
  
  	/* account completion of the flush request */
7c94e1c15   Ming Lei   block: introduce ...
245
  	fq->flush_running_idx ^= 1;
320ae51fe   Jens Axboe   blk-mq: new multi...
246

ae1b15396   Tejun Heo   block: reimplemen...
247
248
249
250
251
  	/* and push the waiting requests to the next stage */
  	list_for_each_entry_safe(rq, n, running, flush.list) {
  		unsigned int seq = blk_flush_cur_seq(rq);
  
  		BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
404b8f5a0   Jens Axboe   block: cleanup ki...
252
  		blk_flush_complete_seq(rq, fq, seq, error);
ae1b15396   Tejun Heo   block: reimplemen...
253
  	}
7e992f847   Jens Axboe   block: remove non...
254
  	spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
320ae51fe   Jens Axboe   blk-mq: new multi...
255
  }
a9ed27a76   Ming Lei   blk-mq: fix is_fl...
256
257
258
259
  bool is_flush_rq(struct request *rq)
  {
  	return rq->end_io == flush_end_io;
  }
ae1b15396   Tejun Heo   block: reimplemen...
260
261
262
  /**
   * blk_kick_flush - consider issuing flush request
   * @q: request_queue being kicked
0bae352da   Ming Lei   block: flush: avo...
263
   * @fq: flush queue
84fca1b0c   Hannes Reinecke   block: pass failf...
264
   * @flags: cmd_flags of the original request
ae1b15396   Tejun Heo   block: reimplemen...
265
266
267
268
269
   *
   * Flush related states of @q have changed, consider issuing flush request.
   * Please read the comment at the top of this file for more info.
   *
   * CONTEXT:
9809b4eed   Christoph Hellwig   block: update a f...
270
   * spin_lock_irq(fq->mq_flush_lock)
ae1b15396   Tejun Heo   block: reimplemen...
271
   *
ae1b15396   Tejun Heo   block: reimplemen...
272
   */
404b8f5a0   Jens Axboe   block: cleanup ki...
273
  static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
84fca1b0c   Hannes Reinecke   block: pass failf...
274
  			   unsigned int flags)
86db1e297   Jens Axboe   block: continue l...
275
  {
7c94e1c15   Ming Lei   block: introduce ...
276
  	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
ae1b15396   Tejun Heo   block: reimplemen...
277
278
  	struct request *first_rq =
  		list_first_entry(pending, struct request, flush.list);
7c94e1c15   Ming Lei   block: introduce ...
279
  	struct request *flush_rq = fq->flush_rq;
ae1b15396   Tejun Heo   block: reimplemen...
280
281
  
  	/* C1 described at the top of this file */
7c94e1c15   Ming Lei   block: introduce ...
282
  	if (fq->flush_pending_idx != fq->flush_running_idx || list_empty(pending))
404b8f5a0   Jens Axboe   block: cleanup ki...
283
  		return;
ae1b15396   Tejun Heo   block: reimplemen...
284

b5718d6c0   Yufen Yu   block: defer flus...
285
286
  	/* C2 and C3 */
  	if (!list_empty(&fq->flush_data_in_flight) &&
ae1b15396   Tejun Heo   block: reimplemen...
287
  	    time_before(jiffies,
7c94e1c15   Ming Lei   block: introduce ...
288
  			fq->flush_pending_since + FLUSH_PENDING_TIMEOUT))
404b8f5a0   Jens Axboe   block: cleanup ki...
289
  		return;
ae1b15396   Tejun Heo   block: reimplemen...
290
291
292
293
294
  
  	/*
  	 * Issue flush and toggle pending_idx.  This makes pending_idx
  	 * different from running_idx, which means flush is in flight.
  	 */
7c94e1c15   Ming Lei   block: introduce ...
295
  	fq->flush_pending_idx ^= 1;
18741986a   Christoph Hellwig   blk-mq: rework fl...
296

7ddab5de5   Ming Lei   block: avoid to u...
297
  	blk_rq_init(q, flush_rq);
f70ced091   Ming Lei   blk-mq: support p...
298
299
  
  	/*
923218f61   Ming Lei   blk-mq: don't all...
300
301
302
303
304
305
  	 * In case of none scheduler, borrow tag from the first request
  	 * since they can't be in flight at the same time. And acquire
  	 * the tag's ownership for flush req.
  	 *
  	 * In case of IO scheduler, flush rq need to borrow scheduler tag
  	 * just for cheating put/get driver tag.
f70ced091   Ming Lei   blk-mq: support p...
306
  	 */
7e992f847   Jens Axboe   block: remove non...
307
  	flush_rq->mq_ctx = first_rq->mq_ctx;
ea4f995ee   Jens Axboe   blk-mq: cache req...
308
  	flush_rq->mq_hctx = first_rq->mq_hctx;
7e992f847   Jens Axboe   block: remove non...
309

c1e2b8422   Ming Lei   block: fix double...
310
  	if (!q->elevator) {
7e992f847   Jens Axboe   block: remove non...
311
  		flush_rq->tag = first_rq->tag;
c1e2b8422   Ming Lei   block: fix double...
312
313
314
315
316
317
318
319
  
  		/*
  		 * We borrow data request's driver tag, so have to mark
  		 * this flush request as INFLIGHT for avoiding double
  		 * account of this driver tag
  		 */
  		flush_rq->rq_flags |= RQF_MQ_INFLIGHT;
  	} else
7e992f847   Jens Axboe   block: remove non...
320
  		flush_rq->internal_tag = first_rq->internal_tag;
320ae51fe   Jens Axboe   blk-mq: new multi...
321

70fd76140   Christoph Hellwig   block,fs: use REQ...
322
  	flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
84fca1b0c   Hannes Reinecke   block: pass failf...
323
  	flush_rq->cmd_flags |= (flags & REQ_DRV) | (flags & REQ_FAILFAST_MASK);
e80640213   Christoph Hellwig   block: split out ...
324
  	flush_rq->rq_flags |= RQF_FLUSH_SEQ;
7ddab5de5   Ming Lei   block: avoid to u...
325
326
  	flush_rq->rq_disk = first_rq->rq_disk;
  	flush_rq->end_io = flush_end_io;
c2da19ed5   Ming Lei   blk-mq: fix kerne...
327
328
329
330
331
332
333
334
  	/*
  	 * Order WRITE ->end_io and WRITE rq->ref, and its pair is the one
  	 * implied in refcount_inc_not_zero() called from
  	 * blk_mq_find_and_get_req(), which orders WRITE/READ flush_rq->ref
  	 * and READ flush_rq->end_io
  	 */
  	smp_wmb();
  	refcount_set(&flush_rq->ref, 1);
ae1b15396   Tejun Heo   block: reimplemen...
335

404b8f5a0   Jens Axboe   block: cleanup ki...
336
  	blk_flush_queue_rq(flush_rq, false);
86db1e297   Jens Axboe   block: continue l...
337
  }
2a842acab   Christoph Hellwig   block: introduce ...
338
  static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
320ae51fe   Jens Axboe   blk-mq: new multi...
339
340
  {
  	struct request_queue *q = rq->q;
ea4f995ee   Jens Axboe   blk-mq: cache req...
341
  	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
e97c293cd   Ming Lei   block: introduce ...
342
  	struct blk_mq_ctx *ctx = rq->mq_ctx;
320ae51fe   Jens Axboe   blk-mq: new multi...
343
  	unsigned long flags;
e97c293cd   Ming Lei   block: introduce ...
344
  	struct blk_flush_queue *fq = blk_get_flush_queue(q, ctx);
320ae51fe   Jens Axboe   blk-mq: new multi...
345

4e2f62e56   Jens Axboe   Revert "blk-mq: p...
346
347
348
349
  	if (q->elevator) {
  		WARN_ON(rq->tag < 0);
  		blk_mq_put_driver_tag(rq);
  	}
320ae51fe   Jens Axboe   blk-mq: new multi...
350
351
352
353
  	/*
  	 * After populating an empty queue, kick it to avoid stall.  Read
  	 * the comment in flush_end_io().
  	 */
7c94e1c15   Ming Lei   block: introduce ...
354
  	spin_lock_irqsave(&fq->mq_flush_lock, flags);
bd166ef18   Jens Axboe   blk-mq-sched: add...
355
  	blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error);
7c94e1c15   Ming Lei   block: introduce ...
356
  	spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
bd166ef18   Jens Axboe   blk-mq-sched: add...
357

85bd6e61f   Jianchao Wang   blk-mq: fix a hun...
358
  	blk_mq_sched_restart(hctx);
320ae51fe   Jens Axboe   blk-mq: new multi...
359
  }
ae1b15396   Tejun Heo   block: reimplemen...
360
  /**
3140c3cfa   Omar Sandoval   block: update com...
361
   * blk_insert_flush - insert a new PREFLUSH/FUA request
ae1b15396   Tejun Heo   block: reimplemen...
362
363
   * @rq: request to insert
   *
b710a4805   Jens Axboe   block: get rid of...
364
   * To be called from __elv_add_request() for %ELEVATOR_INSERT_FLUSH insertions.
320ae51fe   Jens Axboe   blk-mq: new multi...
365
   * or __blk_mq_run_hw_queue() to dispatch request.
ae1b15396   Tejun Heo   block: reimplemen...
366
367
   * @rq is being submitted.  Analyze what needs to be done and put it on the
   * right queue.
ae1b15396   Tejun Heo   block: reimplemen...
368
369
   */
  void blk_insert_flush(struct request *rq)
86db1e297   Jens Axboe   block: continue l...
370
  {
ae1b15396   Tejun Heo   block: reimplemen...
371
  	struct request_queue *q = rq->q;
c888a8f95   Jens Axboe   block: kill off q...
372
  	unsigned long fflags = q->queue_flags;	/* may change, cache */
ae1b15396   Tejun Heo   block: reimplemen...
373
  	unsigned int policy = blk_flush_policy(fflags, rq);
e97c293cd   Ming Lei   block: introduce ...
374
  	struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
86db1e297   Jens Axboe   block: continue l...
375

ae1b15396   Tejun Heo   block: reimplemen...
376
377
  	/*
  	 * @policy now records what operations need to be done.  Adjust
28a8f0d31   Mike Christie   block, drivers, f...
378
  	 * REQ_PREFLUSH and FUA for the driver.
ae1b15396   Tejun Heo   block: reimplemen...
379
  	 */
28a8f0d31   Mike Christie   block, drivers, f...
380
  	rq->cmd_flags &= ~REQ_PREFLUSH;
c888a8f95   Jens Axboe   block: kill off q...
381
  	if (!(fflags & (1UL << QUEUE_FLAG_FUA)))
ae1b15396   Tejun Heo   block: reimplemen...
382
383
384
  		rq->cmd_flags &= ~REQ_FUA;
  
  	/*
ae5b2ec8a   Jens Axboe   block: set REQ_SY...
385
386
387
388
389
390
391
  	 * REQ_PREFLUSH|REQ_FUA implies REQ_SYNC, so if we clear any
  	 * of those flags, we have to set REQ_SYNC to avoid skewing
  	 * the request accounting.
  	 */
  	rq->cmd_flags |= REQ_SYNC;
  
  	/*
4853abaae   Jeff Moyer   block: fix flush ...
392
393
394
395
396
397
  	 * An empty flush handed down from a stacking driver may
  	 * translate into nothing if the underlying device does not
  	 * advertise a write-back cache.  In this case, simply
  	 * complete the request.
  	 */
  	if (!policy) {
7e992f847   Jens Axboe   block: remove non...
398
  		blk_mq_end_request(rq, 0);
4853abaae   Jeff Moyer   block: fix flush ...
399
400
  		return;
  	}
834f9f61a   Jeff Moyer   blk-flush: fix in...
401
  	BUG_ON(rq->bio != rq->biotail); /*assumes zero or single bio rq */
4853abaae   Jeff Moyer   block: fix flush ...
402
403
  
  	/*
ae1b15396   Tejun Heo   block: reimplemen...
404
405
406
407
408
409
  	 * If there's data but flush is not necessary, the request can be
  	 * processed directly without going through flush machinery.  Queue
  	 * for normal execution.
  	 */
  	if ((policy & REQ_FSEQ_DATA) &&
  	    !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
01e99aeca   Ming Lei   blk-mq: insert pa...
410
  		blk_mq_request_bypass_insert(rq, false, false);
ae1b15396   Tejun Heo   block: reimplemen...
411
  		return;
28e7d1845   Tejun Heo   block: drop barri...
412
  	}
cde4c406d   Christoph Hellwig   block: simplify q...
413

ae1b15396   Tejun Heo   block: reimplemen...
414
415
416
417
418
419
  	/*
  	 * @rq should go through flush machinery.  Mark it part of flush
  	 * sequence and submit for further processing.
  	 */
  	memset(&rq->flush, 0, sizeof(rq->flush));
  	INIT_LIST_HEAD(&rq->flush.list);
e80640213   Christoph Hellwig   block: split out ...
420
  	rq->rq_flags |= RQF_FLUSH_SEQ;
4853abaae   Jeff Moyer   block: fix flush ...
421
  	rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
320ae51fe   Jens Axboe   blk-mq: new multi...
422

7e992f847   Jens Axboe   block: remove non...
423
  	rq->end_io = mq_flush_data_end_io;
ae1b15396   Tejun Heo   block: reimplemen...
424

7e992f847   Jens Axboe   block: remove non...
425
  	spin_lock_irq(&fq->mq_flush_lock);
0bae352da   Ming Lei   block: flush: avo...
426
  	blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0);
7e992f847   Jens Axboe   block: remove non...
427
  	spin_unlock_irq(&fq->mq_flush_lock);
86db1e297   Jens Axboe   block: continue l...
428
  }
ae1b15396   Tejun Heo   block: reimplemen...
429
  /**
86db1e297   Jens Axboe   block: continue l...
430
431
   * blkdev_issue_flush - queue a flush
   * @bdev:	blockdev to issue flush for
86db1e297   Jens Axboe   block: continue l...
432
433
   *
   * Description:
9398554fb   Christoph Hellwig   block: remove the...
434
   *    Issue a flush for the block device in question.
86db1e297   Jens Axboe   block: continue l...
435
   */
c6bf3f0e2   Christoph Hellwig   block: use an on-...
436
  int blkdev_issue_flush(struct block_device *bdev)
86db1e297   Jens Axboe   block: continue l...
437
  {
c6bf3f0e2   Christoph Hellwig   block: use an on-...
438
  	struct bio bio;
86db1e297   Jens Axboe   block: continue l...
439

c6bf3f0e2   Christoph Hellwig   block: use an on-...
440
441
442
443
  	bio_init(&bio, NULL, 0);
  	bio_set_dev(&bio, bdev);
  	bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
  	return submit_bio_wait(&bio);
86db1e297   Jens Axboe   block: continue l...
444
  }
86db1e297   Jens Axboe   block: continue l...
445
  EXPORT_SYMBOL(blkdev_issue_flush);
320ae51fe   Jens Axboe   blk-mq: new multi...
446

754a15726   Guoqing Jiang   block: remove unn...
447
448
  struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
  					      gfp_t flags)
320ae51fe   Jens Axboe   blk-mq: new multi...
449
  {
7c94e1c15   Ming Lei   block: introduce ...
450
451
  	struct blk_flush_queue *fq;
  	int rq_sz = sizeof(struct request);
1bcb1eada   Ming Lei   blk-mq: allocate ...
452

5b202853f   Jianchao Wang   blk-mq: change gf...
453
  	fq = kzalloc_node(sizeof(*fq), flags, node);
7c94e1c15   Ming Lei   block: introduce ...
454
455
  	if (!fq)
  		goto fail;
1bcb1eada   Ming Lei   blk-mq: allocate ...
456

7e992f847   Jens Axboe   block: remove non...
457
  	spin_lock_init(&fq->mq_flush_lock);
7c94e1c15   Ming Lei   block: introduce ...
458

6d247d7f7   Christoph Hellwig   block: allow spec...
459
  	rq_sz = round_up(rq_sz + cmd_size, cache_line_size());
5b202853f   Jianchao Wang   blk-mq: change gf...
460
  	fq->flush_rq = kzalloc_node(rq_sz, flags, node);
7c94e1c15   Ming Lei   block: introduce ...
461
462
463
464
465
466
467
468
469
470
471
472
473
  	if (!fq->flush_rq)
  		goto fail_rq;
  
  	INIT_LIST_HEAD(&fq->flush_queue[0]);
  	INIT_LIST_HEAD(&fq->flush_queue[1]);
  	INIT_LIST_HEAD(&fq->flush_data_in_flight);
  
  	return fq;
  
   fail_rq:
  	kfree(fq);
   fail:
  	return NULL;
320ae51fe   Jens Axboe   blk-mq: new multi...
474
  }
f35526557   Ming Lei   block: introduce ...
475

ba483388e   Ming Lei   block: remove blk...
476
  void blk_free_flush_queue(struct blk_flush_queue *fq)
f35526557   Ming Lei   block: introduce ...
477
  {
7c94e1c15   Ming Lei   block: introduce ...
478
479
480
  	/* bio based request queue hasn't flush queue */
  	if (!fq)
  		return;
3c09676c1   Ming Lei   block: move flush...
481

7c94e1c15   Ming Lei   block: introduce ...
482
483
484
  	kfree(fq->flush_rq);
  	kfree(fq);
  }
fb01a2932   Ming Lei   blk-mq: add new A...
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
  
  /*
   * Allow driver to set its own lock class to fq->mq_flush_lock for
   * avoiding lockdep complaint.
   *
   * flush_end_io() may be called recursively from some driver, such as
   * nvme-loop, so lockdep may complain 'possible recursive locking' because
   * all 'struct blk_flush_queue' instance share same mq_flush_lock lock class
   * key. We need to assign different lock class for these driver's
   * fq->mq_flush_lock for avoiding the lockdep warning.
   *
   * Use dynamically allocated lock class key for each 'blk_flush_queue'
   * instance is over-kill, and more worse it introduces horrible boot delay
   * issue because synchronize_rcu() is implied in lockdep_unregister_key which
   * is called for each hctx release. SCSI probing may synchronously create and
   * destroy lots of MQ request_queues for non-existent devices, and some robot
   * test kernel always enable lockdep option. It is observed that more than half
   * an hour is taken during SCSI MQ probe with per-fq lock class.
   */
  void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx,
  		struct lock_class_key *key)
  {
  	lockdep_set_class(&hctx->fq->mq_flush_lock, key);
  }
  EXPORT_SYMBOL_GPL(blk_mq_hctx_set_fq_lock_class);